#!/bin/bash PROGDIR="$(dirname "$(realpath "$0")")" ROOTDIR="$(dirname "${PROGDIR}")" KNOWN_ARGS=( data-dir python gpu no-gpu ) source "${PROGDIR}/shell.functions" if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2 HELP=true fi if [ "${HELP}" == "true" ] ; then cat <&2 Usage: setup_python_venv [ --gpu | --no-gpu ] [ --verbose ] Options: --gpu: Install the GPU-capable versions of packages if available. This is the default if the script detects that a GPU is available. --no-gpu: Install the non-GPU-capable versions of packages even if GPU-capable packages are available. This is the default if the script detects that a GPU is NOT available. --verbose: Print the detailed "pip install" output. Environment overrides: MWW_TF_SPEC: Full TensorFlow package spec (e.g. "tf-nightly[and-cuda]" or "tensorflow[and-cuda]==2.20.0"). MWW_TENSORBOARD_SPEC: Comma-separated TensorBoard package specs. Example: "tensorboard==2.20.0,tensorboard-data-server==0.7.2" MWW_KERAS_SPEC: Keras package spec to install explicitly. EOF exit 1 fi [ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")" [ -d "${DATA_DIR}" ] || { echo "Data directory '${DATA_DIR}' doesn't exist." >&2 exit 1 } cd "${DATA_DIR}" [ -z "${GPU}" ] && { GPU=false [ -c /dev/nvidiactl ] && { GPU=true echo " Nvidia GPU detected" } } "${GPU}" || export CUDA_VISIBLE_DEVICES=-1 detect_gpu_compute_capability() { if command -v nvidia-smi >/dev/null 2>&1 ; then nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null \ | head -n 1 \ | tr -d '[:space:]' fi } GPU_COMPUTE_CAPABILITY="" IS_BLACKWELL=false if ${GPU} ; then GPU_COMPUTE_CAPABILITY="$(detect_gpu_compute_capability || true)" case "${GPU_COMPUTE_CAPABILITY}" in 12.*) IS_BLACKWELL=true ;; esac ${IS_BLACKWELL} && echo " Blackwell GPU detected (compute capability ${GPU_COMPUTE_CAPABILITY})" fi VENV="${DATA_DIR}/.venv" if [ -n "${VIRTUAL_ENV:-}" ] && [ "${VIRTUAL_ENV}" != "${VENV}" ] ; then if command -v deactivate >/dev/null 2>&1 ; then deactivate || : else # Recorder process can inherit VIRTUAL_ENV without the shell function. unset VIRTUAL_ENV fi fi if [ -n "${PYTHON}" ] ; then PYTHONS=( "${PYTHON}" ) unset PYTHON else # Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04) PYTHONS=( python3.12 python3.11 python3.10 ) fi for p in "${PYTHONS[@]}" ; do "${p}" --version &>/dev/null && { PYTHON="${p}" ; break ; } done [ -n "${PYTHON}" ] || { echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2 exit 1 } if [ -d "${VENV}" ] ; then if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then source "${VENV}/bin/activate" || { echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2 exit 1 } else rm -rf "${VENV}" fi fi echo "===== Setting up Python environment ${VENV} =====" if [ -z "$VIRTUAL_ENV" ] ; then echo " ===== Creating new virtualenv at '${VENV}' =====" else echo " ===== Updating virtualenv at '${VENV}' =====" fi ${PYTHON} -m venv --upgrade-deps "${VENV}" source "${VENV}/bin/activate" set -euo pipefail # Symlink CLI scripts into .venv/bin declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) ) progfiles+=( "${PROGDIR}/shell.functions" ) # Also symlink the top-level entrypoint if present [ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" ) for f in "${progfiles[@]}" ; do ln -sfr "${f}" ".venv/bin/$(basename "${f}")" done # # Pip doesn't process packages from requirements.txt in order but order is # important because tensorflow, torch, onnxruntime and micro-wake-word all # depend on CUDA packages at various versions. They need to be installed in # this specific order or they may not be able to use the GPU. # export PIP_PROGRESS_BAR=off export PIP_NO_COLOR=1 export PIP_QUIET=0 pip_install() { if $VERBOSE ; then pip install "$@" || return 1 else { pip install "$@" || return 1 ; } | stdbuf -i0 -o0 tr -d '[:print:]' | stdbuf -i0 -o0 tr '\n' '.' fi echo } START_TS=$EPOCHSECONDS echo " ===== Installing common requirements =====" # requirements.txt lives in repo root now pip_install -r "${ROOTDIR}/requirements.txt" ${GPU} && tfgpu='[and-cuda]' || tfgpu="" declare -a default_tensorboard_specs=() if ${GPU} && ${IS_BLACKWELL} ; then # Blackwell path: prefer nightly TF while upstream stable wheels catch up. DEFAULT_TF_SPEC="tf-nightly${tfgpu}" # Let tf-nightly resolve a compatible TensorBoard dependency by default. default_tensorboard_specs=() else DEFAULT_TF_SPEC="tensorflow${tfgpu}==2.20.0" default_tensorboard_specs=( "tensorboard==2.20.0" "tensorboard-data-server==0.7.2" ) fi TF_SPEC="${MWW_TF_SPEC:-${DEFAULT_TF_SPEC}}" declare -a tf_install_specs=( ai_edge_litert "${TF_SPEC}" ) if [ -n "${MWW_TENSORBOARD_SPEC:-}" ] ; then IFS=',' read -r -a user_tb_specs <<< "${MWW_TENSORBOARD_SPEC}" for tb_spec in "${user_tb_specs[@]}" ; do tb_spec="${tb_spec#"${tb_spec%%[![:space:]]*}"}" tb_spec="${tb_spec%"${tb_spec##*[![:space:]]}"}" [ -n "${tb_spec}" ] && tf_install_specs+=( "${tb_spec}" ) done else tf_install_specs+=( "${default_tensorboard_specs[@]}" ) fi echo " ===== Installing TensorFlow stack (${TF_SPEC}) =====" pip_install "${tf_install_specs[@]}" ${GPU} && torchgpu='--index-url https://download.pytorch.org/whl/cu129' || torchgpu="" echo " ===== Installing torch and torchaudio ${torchgpu:+[cuda]} =====" pip_install "torch==2.9.1" "torchaudio==2.9.1" ${torchgpu} echo " ===== Checking microwakeword =====" MWW="${DATA_DIR}/tools/microWakeWord" if [ ! -d "${MWW}" ] || [ -n "$(git -C "${MWW}" status --porcelain)" ] ; then rm -rf "${MWW}" || : echo " Cloning micro-wake-word to ${DATA_DIR}/tools" git clone https://github.com/TaterTotterson/micro-wake-word "${MWW}" &>/dev/null fi echo " Installing microwakeword" pip_install -e "${MWW}" echo " ===== Checking piper-sample-generator =====" PSG="${DATA_DIR}/tools/piper-sample-generator" PSG_GITHUB_ROOT="https://github.com/TaterTotterson/piper-sample-generator" RECLONE_PSG=false if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then RECLONE_PSG=true else PSG_ORIGIN_URL="$(git -C "${PSG}" remote get-url origin 2>/dev/null || true)" if [[ "${PSG_ORIGIN_URL}" != *"TaterTotterson/piper-sample-generator"* ]] ; then RECLONE_PSG=true fi fi if ${RECLONE_PSG} ; then rm -rf "${PSG}" || : echo " Cloning piper-sample-generator to ${DATA_DIR}/tools" git clone "${PSG_GITHUB_ROOT}" "${PSG}" &>/dev/null fi echo " Installing piper-sample-generator" pip_install -e "${PSG}" git -C tools/piper-sample-generator clean -fd &>/dev/null MODELS_DIR="${PSG}/models" VOICES_DIR="${PSG}/voices" mkdir -p "${VOICES_DIR}" # --- English generator model (multi-speaker, used with --language=en) --- MODEL_NAME="en_US-libritts_r-medium.pt" MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}" MODEL_URL="${PSG_GITHUB_ROOT}/releases/download/v2.0.0/${MODEL_NAME}" if [ ! -f "${MODEL_FILE}" ] ; then echo " Downloading ${MODEL_NAME} for piper-sample-generator" curl -sfL "${MODEL_URL}" -o "${MODEL_FILE}" fi if [ ! -f "${MODEL_FILE}.json" ] ; then echo " Downloading ${MODEL_NAME}.json for piper-sample-generator" curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json" fi # --- Dutch ONNX voices (single-speaker, used with --language=nl) --- # Working Dutch voices: pim, ronnie (nl_NL) and nathalie (nl_BE). # nl_NL-mls-medium is intentionally excluded (known Piper issue: outputs gibberish). HF_VOICES="https://huggingface.co/rhasspy/piper-voices/resolve/main" declare -a NL_VOICES=( "nl/nl_NL/pim/medium/nl_NL-pim-medium" "nl/nl_NL/ronnie/medium/nl_NL-ronnie-medium" "nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium" ) echo " ===== Checking Dutch Piper voices =====" for voice_path in "${NL_VOICES[@]}" ; do voice_name="$(basename "${voice_path}")" onnx_file="${VOICES_DIR}/${voice_name}.onnx" json_file="${VOICES_DIR}/${voice_name}.onnx.json" if [ ! -f "${onnx_file}" ] ; then echo " Downloading ${voice_name}.onnx" curl -sfL "${HF_VOICES}/${voice_path}.onnx?download=true" -o "${onnx_file}" fi if [ ! -f "${json_file}" ] ; then echo " Downloading ${voice_name}.onnx.json" curl -sfL "${HF_VOICES}/${voice_path}.onnx.json?download=true" -o "${json_file}" fi done ${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu="" echo " ===== Installing onnxruntime${onnxgpu} =====" pip_install "onnxruntime${onnxgpu}>=1.16.0" echo " ===== Installing keras =====" # Default: keep the known-good pin with stable TF 2.20. # For tf-nightly/custom TF specs, skip this pin unless explicitly requested. if [ -n "${MWW_KERAS_SPEC:-}" ] ; then pip_install "${MWW_KERAS_SPEC}" elif [ -n "${MWW_TF_SPEC:-}" ] || [[ "${TF_SPEC}" == tf-nightly* ]] ; then echo " Skipping explicit keras pin for ${TF_SPEC} (set MWW_KERAS_SPEC to force one)." else pip_install "keras==3.12.0" fi # ----------------------------------------------------------------------------- # Optional CUDA data dir (GPU-only) # Some stacks expect a CUDA "nvvm/libdevice" tree. We create one in /data/cuda # and link Triton's libdevice if it exists. This is safe and does NOT enable # any extra XLA flags by itself. # ----------------------------------------------------------------------------- if ${GPU} ; then CUDA_DATA_DIR="${DATA_DIR}/cuda" LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice" mkdir -p "${LIBDEVICE_DIR}" TRITON_LIBDEVICE="$( python - <<'PY' import glob paths = glob.glob("**/site-packages/triton/backends/nvidia/lib/libdevice.10.bc", recursive=True) print(paths[0] if paths else "", end="") PY )" if [ -n "${TRITON_LIBDEVICE}" ] ; then ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc" echo " Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}" else echo " ℹ️ Triton libdevice.10.bc not found (ok)" fi fi "${PROGDIR}/test_python" --data-dir="${DATA_DIR}" touch .mww-data-dir END_TS=$EPOCHSECONDS echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell." print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"