mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
308 lines
10 KiB
Bash
Executable File
308 lines
10 KiB
Bash
Executable File
#!/bin/bash
|
||
PROGDIR="$(dirname "$(realpath "$0")")"
|
||
ROOTDIR="$(dirname "${PROGDIR}")"
|
||
|
||
KNOWN_ARGS=( data-dir python gpu no-gpu )
|
||
source "${PROGDIR}/shell.functions"
|
||
|
||
if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then
|
||
echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2
|
||
HELP=true
|
||
fi
|
||
|
||
if [ "${HELP}" == "true" ] ; then
|
||
cat <<EOF >&2
|
||
Usage: setup_python_venv [ --gpu | --no-gpu ] [ --verbose ]
|
||
|
||
Options:
|
||
--gpu: Install the GPU-capable versions of packages if available. This
|
||
is the default if the script detects that a GPU is available.
|
||
|
||
--no-gpu: Install the non-GPU-capable versions of packages even if
|
||
GPU-capable packages are available. This is the default if the script
|
||
detects that a GPU is NOT available.
|
||
|
||
--verbose: Print the detailed "pip install" output.
|
||
|
||
Environment overrides:
|
||
MWW_TF_SPEC: Full TensorFlow package spec (e.g. "tf-nightly[and-cuda]"
|
||
or "tensorflow[and-cuda]==2.20.0").
|
||
MWW_TENSORBOARD_SPEC: Comma-separated TensorBoard package specs.
|
||
Example: "tensorboard==2.20.0,tensorboard-data-server==0.7.2"
|
||
MWW_KERAS_SPEC: Keras package spec to install explicitly.
|
||
|
||
EOF
|
||
exit 1
|
||
fi
|
||
|
||
[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
|
||
[ -d "${DATA_DIR}" ] || {
|
||
echo "Data directory '${DATA_DIR}' doesn't exist." >&2
|
||
exit 1
|
||
}
|
||
|
||
cd "${DATA_DIR}"
|
||
|
||
[ -z "${GPU}" ] && {
|
||
GPU=false
|
||
[ -c /dev/nvidiactl ] && {
|
||
GPU=true
|
||
echo " Nvidia GPU detected"
|
||
}
|
||
}
|
||
|
||
"${GPU}" || export CUDA_VISIBLE_DEVICES=-1
|
||
|
||
detect_gpu_compute_capability() {
|
||
if command -v nvidia-smi >/dev/null 2>&1 ; then
|
||
nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null \
|
||
| head -n 1 \
|
||
| tr -d '[:space:]'
|
||
fi
|
||
}
|
||
|
||
GPU_COMPUTE_CAPABILITY=""
|
||
IS_BLACKWELL=false
|
||
if ${GPU} ; then
|
||
GPU_COMPUTE_CAPABILITY="$(detect_gpu_compute_capability || true)"
|
||
case "${GPU_COMPUTE_CAPABILITY}" in
|
||
12.*) IS_BLACKWELL=true ;;
|
||
esac
|
||
${IS_BLACKWELL} && echo " Blackwell GPU detected (compute capability ${GPU_COMPUTE_CAPABILITY})"
|
||
fi
|
||
|
||
VENV="${DATA_DIR}/.venv"
|
||
if [ -n "${VIRTUAL_ENV:-}" ] && [ "${VIRTUAL_ENV}" != "${VENV}" ] ; then
|
||
if command -v deactivate >/dev/null 2>&1 ; then
|
||
deactivate || :
|
||
else
|
||
# Recorder process can inherit VIRTUAL_ENV without the shell function.
|
||
unset VIRTUAL_ENV
|
||
fi
|
||
fi
|
||
|
||
if [ -n "${PYTHON}" ] ; then
|
||
PYTHONS=( "${PYTHON}" )
|
||
unset PYTHON
|
||
else
|
||
# Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04)
|
||
PYTHONS=( python3.12 python3.11 python3.10 )
|
||
fi
|
||
|
||
for p in "${PYTHONS[@]}" ; do
|
||
"${p}" --version &>/dev/null && { PYTHON="${p}" ; break ; }
|
||
done
|
||
|
||
[ -n "${PYTHON}" ] || {
|
||
echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2
|
||
exit 1
|
||
}
|
||
|
||
if [ -d "${VENV}" ] ; then
|
||
if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then
|
||
source "${VENV}/bin/activate" || {
|
||
echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
|
||
exit 1
|
||
}
|
||
else
|
||
rm -rf "${VENV}"
|
||
fi
|
||
fi
|
||
|
||
echo "===== Setting up Python environment ${VENV} ====="
|
||
|
||
if [ -z "$VIRTUAL_ENV" ] ; then
|
||
echo " ===== Creating new virtualenv at '${VENV}' ====="
|
||
else
|
||
echo " ===== Updating virtualenv at '${VENV}' ====="
|
||
fi
|
||
|
||
${PYTHON} -m venv --upgrade-deps "${VENV}"
|
||
source "${VENV}/bin/activate"
|
||
|
||
set -euo pipefail
|
||
|
||
# Symlink CLI scripts into .venv/bin
|
||
declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) )
|
||
progfiles+=( "${PROGDIR}/shell.functions" )
|
||
|
||
# Also symlink the top-level entrypoint if present
|
||
[ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" )
|
||
|
||
for f in "${progfiles[@]}" ; do
|
||
ln -sfr "${f}" ".venv/bin/$(basename "${f}")"
|
||
done
|
||
|
||
#
|
||
# Pip doesn't process packages from requirements.txt in order but order is
|
||
# important because tensorflow, torch, onnxruntime and micro-wake-word all
|
||
# depend on CUDA packages at various versions. They need to be installed in
|
||
# this specific order or they may not be able to use the GPU.
|
||
#
|
||
export PIP_PROGRESS_BAR=off
|
||
export PIP_NO_COLOR=1
|
||
export PIP_QUIET=0
|
||
|
||
pip_install() {
|
||
if $VERBOSE ; then
|
||
pip install "$@" || return 1
|
||
else
|
||
{ pip install "$@" || return 1 ; } | stdbuf -i0 -o0 tr -d '[:print:]' | stdbuf -i0 -o0 tr '\n' '.'
|
||
fi
|
||
echo
|
||
}
|
||
|
||
START_TS=$EPOCHSECONDS
|
||
|
||
echo " ===== Installing common requirements ====="
|
||
# requirements.txt lives in repo root now
|
||
pip_install -r "${ROOTDIR}/requirements.txt"
|
||
|
||
${GPU} && tfgpu='[and-cuda]' || tfgpu=""
|
||
declare -a default_tensorboard_specs=()
|
||
|
||
if ${GPU} && ${IS_BLACKWELL} ; then
|
||
# Blackwell path: prefer nightly TF while upstream stable wheels catch up.
|
||
DEFAULT_TF_SPEC="tf-nightly${tfgpu}"
|
||
# Let tf-nightly resolve a compatible TensorBoard dependency by default.
|
||
default_tensorboard_specs=()
|
||
else
|
||
DEFAULT_TF_SPEC="tensorflow${tfgpu}==2.20.0"
|
||
default_tensorboard_specs=( "tensorboard==2.20.0" "tensorboard-data-server==0.7.2" )
|
||
fi
|
||
|
||
TF_SPEC="${MWW_TF_SPEC:-${DEFAULT_TF_SPEC}}"
|
||
declare -a tf_install_specs=( ai_edge_litert "${TF_SPEC}" )
|
||
|
||
if [ -n "${MWW_TENSORBOARD_SPEC:-}" ] ; then
|
||
IFS=',' read -r -a user_tb_specs <<< "${MWW_TENSORBOARD_SPEC}"
|
||
for tb_spec in "${user_tb_specs[@]}" ; do
|
||
tb_spec="${tb_spec#"${tb_spec%%[![:space:]]*}"}"
|
||
tb_spec="${tb_spec%"${tb_spec##*[![:space:]]}"}"
|
||
[ -n "${tb_spec}" ] && tf_install_specs+=( "${tb_spec}" )
|
||
done
|
||
else
|
||
tf_install_specs+=( "${default_tensorboard_specs[@]}" )
|
||
fi
|
||
|
||
echo " ===== Installing TensorFlow stack (${TF_SPEC}) ====="
|
||
pip_install "${tf_install_specs[@]}"
|
||
|
||
${GPU} && torchgpu='--index-url https://download.pytorch.org/whl/cu129' || torchgpu=""
|
||
echo " ===== Installing torch and torchaudio ${torchgpu:+[cuda]} ====="
|
||
pip_install "torch==2.9.1" "torchaudio==2.9.1" ${torchgpu}
|
||
|
||
echo " ===== Checking microwakeword ====="
|
||
MWW="${DATA_DIR}/tools/microWakeWord"
|
||
if [ ! -d "${MWW}" ] || [ -n "$(git -C "${MWW}" status --porcelain)" ] ; then
|
||
rm -rf "${MWW}" || :
|
||
echo " Cloning micro-wake-word to ${DATA_DIR}/tools"
|
||
git clone https://github.com/TaterTotterson/micro-wake-word "${MWW}" &>/dev/null
|
||
fi
|
||
echo " Installing microwakeword"
|
||
pip_install -e "${MWW}"
|
||
|
||
echo " ===== Checking piper-sample-generator ====="
|
||
PSG="${DATA_DIR}/tools/piper-sample-generator"
|
||
if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then
|
||
rm -rf "${PSG}" || :
|
||
echo " Cloning piper-sample-generator to ${DATA_DIR}/tools"
|
||
git clone https://github.com/rhasspy/piper-sample-generator "${PSG}" &>/dev/null
|
||
fi
|
||
echo " Installing piper-sample-generator"
|
||
pip_install -e "${PSG}"
|
||
git -C tools/piper-sample-generator clean -fd &>/dev/null
|
||
|
||
MODELS_DIR="${PSG}/models"
|
||
VOICES_DIR="${PSG}/voices"
|
||
mkdir -p "${VOICES_DIR}"
|
||
|
||
# --- English generator model (multi-speaker, used with --language=en) ---
|
||
MODEL_NAME="en_US-libritts_r-medium.pt"
|
||
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
|
||
MODEL_URL="https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/${MODEL_NAME}"
|
||
if [ ! -f "${MODEL_FILE}" ] ; then
|
||
echo " Downloading ${MODEL_NAME} for piper-sample-generator"
|
||
curl -sfL "${MODEL_URL}" -o "${MODEL_FILE}"
|
||
fi
|
||
|
||
if [ ! -f "${MODEL_FILE}.json" ] ; then
|
||
echo " Downloading ${MODEL_NAME}.json for piper-sample-generator"
|
||
curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json"
|
||
fi
|
||
|
||
# --- Dutch ONNX voices (single-speaker, used with --language=nl) ---
|
||
# Working Dutch voices: pim, ronnie (nl_NL) and nathalie (nl_BE).
|
||
# nl_NL-mls-medium is intentionally excluded (known Piper issue: outputs gibberish).
|
||
HF_VOICES="https://huggingface.co/rhasspy/piper-voices/resolve/main"
|
||
declare -a NL_VOICES=(
|
||
"nl/nl_NL/pim/medium/nl_NL-pim-medium"
|
||
"nl/nl_NL/ronnie/medium/nl_NL-ronnie-medium"
|
||
"nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium"
|
||
)
|
||
echo " ===== Checking Dutch Piper voices ====="
|
||
for voice_path in "${NL_VOICES[@]}" ; do
|
||
voice_name="$(basename "${voice_path}")"
|
||
onnx_file="${VOICES_DIR}/${voice_name}.onnx"
|
||
json_file="${VOICES_DIR}/${voice_name}.onnx.json"
|
||
if [ ! -f "${onnx_file}" ] ; then
|
||
echo " Downloading ${voice_name}.onnx"
|
||
curl -sfL "${HF_VOICES}/${voice_path}.onnx?download=true" -o "${onnx_file}"
|
||
fi
|
||
if [ ! -f "${json_file}" ] ; then
|
||
echo " Downloading ${voice_name}.onnx.json"
|
||
curl -sfL "${HF_VOICES}/${voice_path}.onnx.json?download=true" -o "${json_file}"
|
||
fi
|
||
done
|
||
|
||
${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu=""
|
||
echo " ===== Installing onnxruntime${onnxgpu} ====="
|
||
pip_install "onnxruntime${onnxgpu}>=1.16.0"
|
||
|
||
echo " ===== Installing keras ====="
|
||
# Default: keep the known-good pin with stable TF 2.20.
|
||
# For tf-nightly/custom TF specs, skip this pin unless explicitly requested.
|
||
if [ -n "${MWW_KERAS_SPEC:-}" ] ; then
|
||
pip_install "${MWW_KERAS_SPEC}"
|
||
elif [ -n "${MWW_TF_SPEC:-}" ] || [[ "${TF_SPEC}" == tf-nightly* ]] ; then
|
||
echo " Skipping explicit keras pin for ${TF_SPEC} (set MWW_KERAS_SPEC to force one)."
|
||
else
|
||
pip_install "keras==3.12.0"
|
||
fi
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Optional CUDA data dir (GPU-only)
|
||
# Some stacks expect a CUDA "nvvm/libdevice" tree. We create one in /data/cuda
|
||
# and link Triton's libdevice if it exists. This is safe and does NOT enable
|
||
# any extra XLA flags by itself.
|
||
# -----------------------------------------------------------------------------
|
||
if ${GPU} ; then
|
||
CUDA_DATA_DIR="${DATA_DIR}/cuda"
|
||
LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice"
|
||
mkdir -p "${LIBDEVICE_DIR}"
|
||
|
||
TRITON_LIBDEVICE="$(
|
||
python - <<'PY'
|
||
import glob
|
||
paths = glob.glob("**/site-packages/triton/backends/nvidia/lib/libdevice.10.bc", recursive=True)
|
||
print(paths[0] if paths else "", end="")
|
||
PY
|
||
)"
|
||
|
||
if [ -n "${TRITON_LIBDEVICE}" ] ; then
|
||
ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc"
|
||
echo " Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}"
|
||
else
|
||
echo " ℹ️ Triton libdevice.10.bc not found (ok)"
|
||
fi
|
||
fi
|
||
|
||
"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"
|
||
|
||
touch .mww-data-dir
|
||
END_TS=$EPOCHSECONDS
|
||
|
||
echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."
|
||
|
||
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"
|