Files
microWakeWord-Trainer-Nvidi…/cli/setup_python_venv
joopd d5dcfbf5f1 feat: add --language flag for non-English TTS voices (Dutch support)
- Add LANGUAGE default (en) to shell.functions
- setup_python_venv downloads Dutch ONNX voices (pim, ronnie, nathalie)
- wake_word_sample_generator uses multiple --model flags for single-speaker
  voices, cycling between them for variety
- train_wake_word accepts and passes --language through the pipeline
- recorder_server.py accepts language in session API
- Web UI adds language dropdown (English/Dutch)
2026-02-25 09:57:46 +01:00

243 lines
7.9 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
PROGDIR="$(dirname "$(realpath "$0")")"
ROOTDIR="$(dirname "${PROGDIR}")"
KNOWN_ARGS=( data-dir python gpu no-gpu )
source "${PROGDIR}/shell.functions"
if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then
echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2
HELP=true
fi
if [ "${HELP}" == "true" ] ; then
cat <<EOF >&2
Usage: setup_python_venv [ --gpu | --no-gpu ] [ --verbose ]
Options:
--gpu: Install the GPU-capable versions of packages if available. This
is the default if the script detects that a GPU is available.
--no-gpu: Install the non-GPU-capable versions of packages even if
GPU-capable packages are available. This is the default if the script
detects that a GPU is NOT available.
--verbose: Print the detailed "pip install" output.
EOF
exit 1
fi
[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
[ -d "${DATA_DIR}" ] || {
echo "Data directory '${DATA_DIR}' doesn't exist." >&2
exit 1
}
cd "${DATA_DIR}"
[ -z "${GPU}" ] && {
GPU=false
[ -c /dev/nvidiactl ] && {
GPU=true
echo " Nvidia GPU detected"
}
}
"${GPU}" || export CUDA_VISIBLE_DEVICES=-1
VENV="${DATA_DIR}/.venv"
[ -n "${VIRTUAL_ENV}" ] && deactivate
if [ -n "${PYTHON}" ] ; then
PYTHONS=( "${PYTHON}" )
unset PYTHON
else
# Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04)
PYTHONS=( python3.12 python3.11 python3.10 )
fi
for p in "${PYTHONS[@]}" ; do
"${p}" --version &>/dev/null && { PYTHON="${p}" ; break ; }
done
[ -n "${PYTHON}" ] || {
echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2
exit 1
}
if [ -d "${VENV}" ] ; then
if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then
source "${VENV}/bin/activate" || {
echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
exit 1
}
else
rm -rf "${VENV}"
fi
fi
echo "===== Setting up Python environment ${VENV} ====="
if [ -z "$VIRTUAL_ENV" ] ; then
echo " ===== Creating new virtualenv at '${VENV}' ====="
else
echo " ===== Updating virtualenv at '${VENV}' ====="
fi
${PYTHON} -m venv --upgrade-deps "${VENV}"
source "${VENV}/bin/activate"
set -euo pipefail
# Symlink CLI scripts into .venv/bin
declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) )
progfiles+=( "${PROGDIR}/shell.functions" )
# Also symlink the top-level entrypoint if present
[ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" )
for f in "${progfiles[@]}" ; do
ln -sfr "${f}" ".venv/bin/$(basename "${f}")"
done
#
# Pip doesn't process packages from requirements.txt in order but order is
# important because tensorflow, torch, onnxruntime and micro-wake-word all
# depend on CUDA packages at various versions. They need to be installed in
# this specific order or they may not be able to use the GPU.
#
export PIP_PROGRESS_BAR=off
export PIP_NO_COLOR=1
export PIP_QUIET=0
pip_install() {
if $VERBOSE ; then
pip install "$@" || return 1
else
{ pip install "$@" || return 1 ; } | stdbuf -i0 -o0 tr -d '[:print:]' | stdbuf -i0 -o0 tr '\n' '.'
fi
echo
}
START_TS=$EPOCHSECONDS
echo " ===== Installing common requirements ====="
# requirements.txt lives in repo root now
pip_install -r "${ROOTDIR}/requirements.txt"
${GPU} && tfgpu='[and-cuda]' || tfgpu=""
echo " ===== Installing Tensorflow${tfgpu} ====="
pip_install ai_edge_litert "tensorflow${tfgpu}==2.20.0" "tensorboard==2.20.0" \
"tensorboard-data-server==0.7.2"
${GPU} && torchgpu='--index-url https://download.pytorch.org/whl/cu129' || torchgpu=""
echo " ===== Installing torch and torchaudio ${torchgpu:+[cuda]} ====="
pip_install "torch==2.9.1" "torchaudio==2.9.1" ${torchgpu}
echo " ===== Checking microwakeword ====="
MWW="${DATA_DIR}/tools/microWakeWord"
if [ ! -d "${MWW}" ] || [ -n "$(git -C "${MWW}" status --porcelain)" ] ; then
rm -rf "${MWW}" || :
echo " Cloning micro-wake-word to ${DATA_DIR}/tools"
git clone https://github.com/TaterTotterson/micro-wake-word "${MWW}" &>/dev/null
fi
echo " Installing microwakeword"
pip_install -e "${MWW}"
echo " ===== Checking piper-sample-generator ====="
PSG="${DATA_DIR}/tools/piper-sample-generator"
if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then
rm -rf "${PSG}" || :
echo " Cloning piper-sample-generator to ${DATA_DIR}/tools"
git clone https://github.com/rhasspy/piper-sample-generator "${PSG}" &>/dev/null
fi
echo " Installing piper-sample-generator"
pip_install -e "${PSG}"
git -C tools/piper-sample-generator clean -fd &>/dev/null
MODELS_DIR="${PSG}/models"
VOICES_DIR="${PSG}/voices"
mkdir -p "${VOICES_DIR}"
# --- English generator model (multi-speaker, used with --language=en) ---
MODEL_NAME="en_US-libritts_r-medium.pt"
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
MODEL_URL="https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/${MODEL_NAME}"
if [ ! -f "${MODEL_FILE}" ] ; then
echo " Downloading ${MODEL_NAME} for piper-sample-generator"
curl -sfL "${MODEL_URL}" -o "${MODEL_FILE}"
fi
if [ ! -f "${MODEL_FILE}.json" ] ; then
echo " Downloading ${MODEL_NAME}.json for piper-sample-generator"
curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json"
fi
# --- Dutch ONNX voices (single-speaker, used with --language=nl) ---
# Working Dutch voices: pim, ronnie (nl_NL) and nathalie (nl_BE).
# nl_NL-mls-medium is intentionally excluded (known Piper issue: outputs gibberish).
HF_VOICES="https://huggingface.co/rhasspy/piper-voices/resolve/main"
declare -a NL_VOICES=(
"nl/nl_NL/pim/medium/nl_NL-pim-medium"
"nl/nl_NL/ronnie/medium/nl_NL-ronnie-medium"
"nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium"
)
echo " ===== Checking Dutch Piper voices ====="
for voice_path in "${NL_VOICES[@]}" ; do
voice_name="$(basename "${voice_path}")"
onnx_file="${VOICES_DIR}/${voice_name}.onnx"
json_file="${VOICES_DIR}/${voice_name}.onnx.json"
if [ ! -f "${onnx_file}" ] ; then
echo " Downloading ${voice_name}.onnx"
curl -sfL "${HF_VOICES}/${voice_path}.onnx?download=true" -o "${onnx_file}"
fi
if [ ! -f "${json_file}" ] ; then
echo " Downloading ${voice_name}.onnx.json"
curl -sfL "${HF_VOICES}/${voice_path}.onnx.json?download=true" -o "${json_file}"
fi
done
${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu=""
echo " ===== Installing onnxruntime${onnxgpu} ====="
pip_install "onnxruntime${onnxgpu}>=1.16.0"
echo " ===== Installing keras ====="
# keras 3.13 has "issues" so we need to back down to 3.12.
pip_install "keras==3.12.0"
# -----------------------------------------------------------------------------
# Optional CUDA data dir (GPU-only)
# Some stacks expect a CUDA "nvvm/libdevice" tree. We create one in /data/cuda
# and link Triton's libdevice if it exists. This is safe and does NOT enable
# any extra XLA flags by itself.
# -----------------------------------------------------------------------------
if ${GPU} ; then
CUDA_DATA_DIR="${DATA_DIR}/cuda"
LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice"
mkdir -p "${LIBDEVICE_DIR}"
TRITON_LIBDEVICE="$(
python - <<'PY'
import glob
paths = glob.glob("**/site-packages/triton/backends/nvidia/lib/libdevice.10.bc", recursive=True)
print(paths[0] if paths else "", end="")
PY
)"
if [ -n "${TRITON_LIBDEVICE}" ] ; then
ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc"
echo " Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}"
else
echo " Triton libdevice.10.bc not found (ok)"
fi
fi
"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"
touch .mww-data-dir
END_TS=$EPOCHSECONDS
echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"