Files
microWakeWord-Trainer-Nvidi…/cli/setup_python_venv
MasterPhooey 058d73beaf piper fix
2026-03-14 00:18:29 -05:00

319 lines
10 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
PROGDIR="$(dirname "$(realpath "$0")")"
ROOTDIR="$(dirname "${PROGDIR}")"
KNOWN_ARGS=( data-dir python gpu no-gpu )
source "${PROGDIR}/shell.functions"
if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then
echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2
HELP=true
fi
if [ "${HELP}" == "true" ] ; then
cat <<EOF >&2
Usage: setup_python_venv [ --gpu | --no-gpu ] [ --verbose ]
Options:
--gpu: Install the GPU-capable versions of packages if available. This
is the default if the script detects that a GPU is available.
--no-gpu: Install the non-GPU-capable versions of packages even if
GPU-capable packages are available. This is the default if the script
detects that a GPU is NOT available.
--verbose: Print the detailed "pip install" output.
Environment overrides:
MWW_TF_SPEC: Full TensorFlow package spec (e.g. "tf-nightly[and-cuda]"
or "tensorflow[and-cuda]==2.20.0").
MWW_TENSORBOARD_SPEC: Comma-separated TensorBoard package specs.
Example: "tensorboard==2.20.0,tensorboard-data-server==0.7.2"
MWW_KERAS_SPEC: Keras package spec to install explicitly.
EOF
exit 1
fi
[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
[ -d "${DATA_DIR}" ] || {
echo "Data directory '${DATA_DIR}' doesn't exist." >&2
exit 1
}
cd "${DATA_DIR}"
[ -z "${GPU}" ] && {
GPU=false
[ -c /dev/nvidiactl ] && {
GPU=true
echo " Nvidia GPU detected"
}
}
"${GPU}" || export CUDA_VISIBLE_DEVICES=-1
detect_gpu_compute_capability() {
if command -v nvidia-smi >/dev/null 2>&1 ; then
nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null \
| head -n 1 \
| tr -d '[:space:]'
fi
}
GPU_COMPUTE_CAPABILITY=""
IS_BLACKWELL=false
if ${GPU} ; then
GPU_COMPUTE_CAPABILITY="$(detect_gpu_compute_capability || true)"
case "${GPU_COMPUTE_CAPABILITY}" in
12.*) IS_BLACKWELL=true ;;
esac
${IS_BLACKWELL} && echo " Blackwell GPU detected (compute capability ${GPU_COMPUTE_CAPABILITY})"
fi
VENV="${DATA_DIR}/.venv"
if [ -n "${VIRTUAL_ENV:-}" ] && [ "${VIRTUAL_ENV}" != "${VENV}" ] ; then
if command -v deactivate >/dev/null 2>&1 ; then
deactivate || :
else
# Recorder process can inherit VIRTUAL_ENV without the shell function.
unset VIRTUAL_ENV
fi
fi
if [ -n "${PYTHON}" ] ; then
PYTHONS=( "${PYTHON}" )
unset PYTHON
else
# Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04)
PYTHONS=( python3.12 python3.11 python3.10 )
fi
for p in "${PYTHONS[@]}" ; do
"${p}" --version &>/dev/null && { PYTHON="${p}" ; break ; }
done
[ -n "${PYTHON}" ] || {
echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2
exit 1
}
if [ -d "${VENV}" ] ; then
if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then
source "${VENV}/bin/activate" || {
echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
exit 1
}
else
rm -rf "${VENV}"
fi
fi
echo "===== Setting up Python environment ${VENV} ====="
if [ -z "$VIRTUAL_ENV" ] ; then
echo " ===== Creating new virtualenv at '${VENV}' ====="
else
echo " ===== Updating virtualenv at '${VENV}' ====="
fi
${PYTHON} -m venv --upgrade-deps "${VENV}"
source "${VENV}/bin/activate"
set -euo pipefail
# Symlink CLI scripts into .venv/bin
declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) )
progfiles+=( "${PROGDIR}/shell.functions" )
# Also symlink the top-level entrypoint if present
[ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" )
for f in "${progfiles[@]}" ; do
ln -sfr "${f}" ".venv/bin/$(basename "${f}")"
done
#
# Pip doesn't process packages from requirements.txt in order but order is
# important because tensorflow, torch, onnxruntime and micro-wake-word all
# depend on CUDA packages at various versions. They need to be installed in
# this specific order or they may not be able to use the GPU.
#
export PIP_PROGRESS_BAR=off
export PIP_NO_COLOR=1
export PIP_QUIET=0
pip_install() {
if $VERBOSE ; then
pip install "$@" || return 1
else
{ pip install "$@" || return 1 ; } | stdbuf -i0 -o0 tr -d '[:print:]' | stdbuf -i0 -o0 tr '\n' '.'
fi
echo
}
START_TS=$EPOCHSECONDS
echo " ===== Installing common requirements ====="
# requirements.txt lives in repo root now
pip_install -r "${ROOTDIR}/requirements.txt"
${GPU} && tfgpu='[and-cuda]' || tfgpu=""
declare -a default_tensorboard_specs=()
if ${GPU} && ${IS_BLACKWELL} ; then
# Blackwell path: prefer nightly TF while upstream stable wheels catch up.
DEFAULT_TF_SPEC="tf-nightly${tfgpu}"
# Let tf-nightly resolve a compatible TensorBoard dependency by default.
default_tensorboard_specs=()
else
DEFAULT_TF_SPEC="tensorflow${tfgpu}==2.20.0"
default_tensorboard_specs=( "tensorboard==2.20.0" "tensorboard-data-server==0.7.2" )
fi
TF_SPEC="${MWW_TF_SPEC:-${DEFAULT_TF_SPEC}}"
declare -a tf_install_specs=( ai_edge_litert "${TF_SPEC}" )
if [ -n "${MWW_TENSORBOARD_SPEC:-}" ] ; then
IFS=',' read -r -a user_tb_specs <<< "${MWW_TENSORBOARD_SPEC}"
for tb_spec in "${user_tb_specs[@]}" ; do
tb_spec="${tb_spec#"${tb_spec%%[![:space:]]*}"}"
tb_spec="${tb_spec%"${tb_spec##*[![:space:]]}"}"
[ -n "${tb_spec}" ] && tf_install_specs+=( "${tb_spec}" )
done
else
tf_install_specs+=( "${default_tensorboard_specs[@]}" )
fi
echo " ===== Installing TensorFlow stack (${TF_SPEC}) ====="
pip_install "${tf_install_specs[@]}"
${GPU} && torchgpu='--index-url https://download.pytorch.org/whl/cu129' || torchgpu=""
echo " ===== Installing torch and torchaudio ${torchgpu:+[cuda]} ====="
pip_install "torch==2.9.1" "torchaudio==2.9.1" ${torchgpu}
echo " ===== Checking microwakeword ====="
MWW="${DATA_DIR}/tools/microWakeWord"
if [ ! -d "${MWW}" ] || [ -n "$(git -C "${MWW}" status --porcelain)" ] ; then
rm -rf "${MWW}" || :
echo " Cloning micro-wake-word to ${DATA_DIR}/tools"
git clone https://github.com/TaterTotterson/micro-wake-word "${MWW}" &>/dev/null
fi
echo " Installing microwakeword"
pip_install -e "${MWW}"
echo " ===== Checking piper-sample-generator ====="
PSG="${DATA_DIR}/tools/piper-sample-generator"
PSG_GITHUB_ROOT="https://github.com/TaterTotterson/piper-sample-generator"
RECLONE_PSG=false
if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then
RECLONE_PSG=true
else
PSG_ORIGIN_URL="$(git -C "${PSG}" remote get-url origin 2>/dev/null || true)"
if [[ "${PSG_ORIGIN_URL}" != *"TaterTotterson/piper-sample-generator"* ]] ; then
RECLONE_PSG=true
fi
fi
if ${RECLONE_PSG} ; then
rm -rf "${PSG}" || :
echo " Cloning piper-sample-generator to ${DATA_DIR}/tools"
git clone "${PSG_GITHUB_ROOT}" "${PSG}" &>/dev/null
fi
echo " Installing piper-sample-generator"
pip_install -e "${PSG}"
git -C tools/piper-sample-generator clean -fd &>/dev/null
MODELS_DIR="${PSG}/models"
VOICES_DIR="${PSG}/voices"
mkdir -p "${VOICES_DIR}"
# --- English generator model (multi-speaker, used with --language=en) ---
MODEL_NAME="en_US-libritts_r-medium.pt"
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
MODEL_URL="${PSG_GITHUB_ROOT}/releases/download/v2.0.0/${MODEL_NAME}"
if [ ! -f "${MODEL_FILE}" ] ; then
echo " Downloading ${MODEL_NAME} for piper-sample-generator"
curl -sfL "${MODEL_URL}" -o "${MODEL_FILE}"
fi
if [ ! -f "${MODEL_FILE}.json" ] ; then
echo " Downloading ${MODEL_NAME}.json for piper-sample-generator"
curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json"
fi
# --- Dutch ONNX voices (single-speaker, used with --language=nl) ---
# Working Dutch voices: pim, ronnie (nl_NL) and nathalie (nl_BE).
# nl_NL-mls-medium is intentionally excluded (known Piper issue: outputs gibberish).
HF_VOICES="https://huggingface.co/rhasspy/piper-voices/resolve/main"
declare -a NL_VOICES=(
"nl/nl_NL/pim/medium/nl_NL-pim-medium"
"nl/nl_NL/ronnie/medium/nl_NL-ronnie-medium"
"nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium"
)
echo " ===== Checking Dutch Piper voices ====="
for voice_path in "${NL_VOICES[@]}" ; do
voice_name="$(basename "${voice_path}")"
onnx_file="${VOICES_DIR}/${voice_name}.onnx"
json_file="${VOICES_DIR}/${voice_name}.onnx.json"
if [ ! -f "${onnx_file}" ] ; then
echo " Downloading ${voice_name}.onnx"
curl -sfL "${HF_VOICES}/${voice_path}.onnx?download=true" -o "${onnx_file}"
fi
if [ ! -f "${json_file}" ] ; then
echo " Downloading ${voice_name}.onnx.json"
curl -sfL "${HF_VOICES}/${voice_path}.onnx.json?download=true" -o "${json_file}"
fi
done
${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu=""
echo " ===== Installing onnxruntime${onnxgpu} ====="
pip_install "onnxruntime${onnxgpu}>=1.16.0"
echo " ===== Installing keras ====="
# Default: keep the known-good pin with stable TF 2.20.
# For tf-nightly/custom TF specs, skip this pin unless explicitly requested.
if [ -n "${MWW_KERAS_SPEC:-}" ] ; then
pip_install "${MWW_KERAS_SPEC}"
elif [ -n "${MWW_TF_SPEC:-}" ] || [[ "${TF_SPEC}" == tf-nightly* ]] ; then
echo " Skipping explicit keras pin for ${TF_SPEC} (set MWW_KERAS_SPEC to force one)."
else
pip_install "keras==3.12.0"
fi
# -----------------------------------------------------------------------------
# Optional CUDA data dir (GPU-only)
# Some stacks expect a CUDA "nvvm/libdevice" tree. We create one in /data/cuda
# and link Triton's libdevice if it exists. This is safe and does NOT enable
# any extra XLA flags by itself.
# -----------------------------------------------------------------------------
if ${GPU} ; then
CUDA_DATA_DIR="${DATA_DIR}/cuda"
LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice"
mkdir -p "${LIBDEVICE_DIR}"
TRITON_LIBDEVICE="$(
python - <<'PY'
import glob
paths = glob.glob("**/site-packages/triton/backends/nvidia/lib/libdevice.10.bc", recursive=True)
print(paths[0] if paths else "", end="")
PY
)"
if [ -n "${TRITON_LIBDEVICE}" ] ; then
ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc"
echo " Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}"
else
echo " Triton libdevice.10.bc not found (ok)"
fi
fi
"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"
touch .mww-data-dir
END_TS=$EPOCHSECONDS
echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"