microWakeWord-Trainer-Nvidi…/cli/setup_python_venv

#!/bin/bash
PROGDIR="$(dirname "$(realpath "$0")")"
ROOTDIR="$(dirname "${PROGDIR}")"

KNOWN_ARGS=( data-dir python gpu no-gpu )
source "${PROGDIR}/shell.functions"

if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then
    echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2
    HELP=true
fi

if [ "${HELP}" == "true" ] ; then
    cat <<EOF >&2
Usage: setup_python_venv [ --gpu | --no-gpu ] [ --verbose ]

Options:
--gpu:     Install the GPU-capable versions of packages if available.  This
           is the default if the script detects that a GPU is available.

--no-gpu:  Install the non-GPU-capable versions of packages even if
           GPU-capable packages are available.  This is the default if the script
           detects that a GPU is NOT available.

--verbose: Print the detailed "pip install" output.

EOF
    exit 1
fi

[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
[ -d  "${DATA_DIR}" ] || {
    echo "Data directory '${DATA_DIR}' doesn't exist." >&2
    exit 1
}

cd "${DATA_DIR}"

[ -z "${GPU}" ] && {
    GPU=false
    [ -c /dev/nvidiactl ] && {
        GPU=true
        echo "   Nvidia GPU detected"
    }
}

"${GPU}" || export CUDA_VISIBLE_DEVICES=-1

VENV="${DATA_DIR}/.venv"
[ -n "${VIRTUAL_ENV}" ] && deactivate

if [ -n "${PYTHON}" ] ; then
    PYTHONS=( "${PYTHON}" )
    unset PYTHON
else
    # Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04)
    PYTHONS=( python3.12 python3.11 python3.10 )
fi

for p in "${PYTHONS[@]}" ; do
    "${p}" --version &>/dev/null && { PYTHON="${p}" ; break ; }
done

[ -n "${PYTHON}" ] || {
    echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2
    exit 1
}

if [ -d "${VENV}" ] ; then
    if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then
        source "${VENV}/bin/activate" || {
            echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
            exit 1
        }
    else
        rm -rf "${VENV}"
    fi
fi

echo "===== Setting up Python environment ${VENV} ====="

if [ -z "$VIRTUAL_ENV" ] ; then
    echo "   ===== Creating new virtualenv at '${VENV}' ====="
else
    echo "   ===== Updating virtualenv at '${VENV}' ====="
fi

${PYTHON} -m venv --upgrade-deps "${VENV}"
source "${VENV}/bin/activate"

set -euo pipefail

# Symlink CLI scripts into .venv/bin
declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) )
progfiles+=( "${PROGDIR}/shell.functions" )

# Also symlink the top-level entrypoint if present
[ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" )

for f in "${progfiles[@]}" ; do
    ln -sfr "${f}" ".venv/bin/$(basename "${f}")"
done

#
# Pip doesn't process packages from requirements.txt in order but order is
# important because tensorflow, torch, onnxruntime and micro-wake-word all
# depend on CUDA packages at various versions. They need to be installed in
# this specific order or they may not be able to use the GPU.
#
export PIP_PROGRESS_BAR=off
export PIP_NO_COLOR=1
export PIP_QUIET=0

pip_install() {
    if $VERBOSE ; then
        pip install "$@" || return 1
    else
        { pip install "$@" || return 1 ; } | stdbuf -i0 -o0 tr -d '[:print:]' | stdbuf -i0 -o0 tr '\n' '.'
    fi
    echo
}

START_TS=$EPOCHSECONDS

echo "   ===== Installing common requirements ====="
# requirements.txt lives in repo root now
pip_install -r "${ROOTDIR}/requirements.txt"

${GPU} && tfgpu='[and-cuda]' || tfgpu=""
echo "   ===== Installing Tensorflow${tfgpu} ====="
pip_install ai_edge_litert "tensorflow${tfgpu}==2.20.0" "tensorboard==2.20.0" \
    "tensorboard-data-server==0.7.2"

${GPU} && torchgpu='--index-url https://download.pytorch.org/whl/cu129' || torchgpu=""
echo "   ===== Installing torch and torchaudio ${torchgpu:+[cuda]} ====="
pip_install "torch==2.9.1" "torchaudio==2.9.1" ${torchgpu}

echo "   ===== Checking microwakeword ====="
MWW="${DATA_DIR}/tools/microWakeWord"
if [ ! -d "${MWW}" ] || [ -n "$(git -C "${MWW}" status --porcelain)" ] ; then
    rm -rf "${MWW}" || :
    echo "   Cloning micro-wake-word to ${DATA_DIR}/tools"
    git clone https://github.com/TaterTotterson/micro-wake-word "${MWW}" &>/dev/null
fi
echo "   Installing microwakeword"
pip_install -e "${MWW}"

echo "   ===== Checking piper-sample-generator ====="
PSG="${DATA_DIR}/tools/piper-sample-generator"
if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then
    rm -rf "${PSG}" || :
    echo "   Cloning piper-sample-generator to ${DATA_DIR}/tools"
    git clone https://github.com/rhasspy/piper-sample-generator "${PSG}" &>/dev/null
fi
echo "   Installing piper-sample-generator"
pip_install -e "${PSG}"
git -C tools/piper-sample-generator clean -fd &>/dev/null

MODELS_DIR="${PSG}/models"
VOICES_DIR="${PSG}/voices"
mkdir -p "${VOICES_DIR}"

# --- English generator model (multi-speaker, used with --language=en) ---
MODEL_NAME="en_US-libritts_r-medium.pt"
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
MODEL_URL="https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/${MODEL_NAME}"
if [ ! -f "${MODEL_FILE}" ] ; then
    echo "      Downloading ${MODEL_NAME} for piper-sample-generator"
    curl -sfL "${MODEL_URL}" -o "${MODEL_FILE}"
fi

if [ ! -f "${MODEL_FILE}.json" ] ; then
    echo "      Downloading ${MODEL_NAME}.json for piper-sample-generator"
    curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json"
fi

# --- Dutch ONNX voices (single-speaker, used with --language=nl) ---
# Working Dutch voices: pim, ronnie (nl_NL) and nathalie (nl_BE).
# nl_NL-mls-medium is intentionally excluded (known Piper issue: outputs gibberish).
HF_VOICES="https://huggingface.co/rhasspy/piper-voices/resolve/main"
declare -a NL_VOICES=(
    "nl/nl_NL/pim/medium/nl_NL-pim-medium"
    "nl/nl_NL/ronnie/medium/nl_NL-ronnie-medium"
    "nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium"
)
echo "   ===== Checking Dutch Piper voices ====="
for voice_path in "${NL_VOICES[@]}" ; do
    voice_name="$(basename "${voice_path}")"
    onnx_file="${VOICES_DIR}/${voice_name}.onnx"
    json_file="${VOICES_DIR}/${voice_name}.onnx.json"
    if [ ! -f "${onnx_file}" ] ; then
        echo "      Downloading ${voice_name}.onnx"
        curl -sfL "${HF_VOICES}/${voice_path}.onnx?download=true" -o "${onnx_file}"
    fi
    if [ ! -f "${json_file}" ] ; then
        echo "      Downloading ${voice_name}.onnx.json"
        curl -sfL "${HF_VOICES}/${voice_path}.onnx.json?download=true" -o "${json_file}"
    fi
done

${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu=""
echo "   ===== Installing onnxruntime${onnxgpu} ====="
pip_install "onnxruntime${onnxgpu}>=1.16.0"

echo "   ===== Installing keras ====="
# keras 3.13 has "issues" so we need to back down to 3.12.
pip_install "keras==3.12.0"

# -----------------------------------------------------------------------------
# Optional CUDA data dir (GPU-only)
# Some stacks expect a CUDA "nvvm/libdevice" tree. We create one in /data/cuda
# and link Triton's libdevice if it exists. This is safe and does NOT enable
# any extra XLA flags by itself.
# -----------------------------------------------------------------------------
if ${GPU} ; then
    CUDA_DATA_DIR="${DATA_DIR}/cuda"
    LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice"
    mkdir -p "${LIBDEVICE_DIR}"

    TRITON_LIBDEVICE="$(
        python - <<'PY'
import glob
paths = glob.glob("**/site-packages/triton/backends/nvidia/lib/libdevice.10.bc", recursive=True)
print(paths[0] if paths else "", end="")
PY
    )"

    if [ -n "${TRITON_LIBDEVICE}" ] ; then
        ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc"
        echo "   Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}"
    else
        echo "   ℹ️  Triton libdevice.10.bc not found (ok)"
    fi
fi

"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"

touch .mww-data-dir
END_TS=$EPOCHSECONDS

echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."

print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"