mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
215 lines
6.7 KiB
Bash
Executable File
215 lines
6.7 KiB
Bash
Executable File
#!/bin/bash
|
||
PROGDIR="$(dirname "$(realpath "$0")")"
|
||
ROOTDIR="$(dirname "${PROGDIR}")"
|
||
|
||
KNOWN_ARGS=( data-dir python gpu no-gpu )
|
||
source "${PROGDIR}/shell.functions"
|
||
|
||
if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then
|
||
echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2
|
||
HELP=true
|
||
fi
|
||
|
||
if [ "${HELP}" == "true" ] ; then
|
||
cat <<EOF >&2
|
||
Usage: setup_python_venv [ --gpu | --no-gpu ] [ --verbose ]
|
||
|
||
Options:
|
||
--gpu: Install the GPU-capable versions of packages if available. This
|
||
is the default if the script detects that a GPU is available.
|
||
|
||
--no-gpu: Install the non-GPU-capable versions of packages even if
|
||
GPU-capable packages are available. This is the default if the script
|
||
detects that a GPU is NOT available.
|
||
|
||
--verbose: Print the detailed "pip install" output.
|
||
|
||
EOF
|
||
exit 1
|
||
fi
|
||
|
||
[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
|
||
[ -d "${DATA_DIR}" ] || {
|
||
echo "Data directory '${DATA_DIR}' doesn't exist." >&2
|
||
exit 1
|
||
}
|
||
|
||
cd "${DATA_DIR}"
|
||
|
||
[ -z "${GPU}" ] && {
|
||
GPU=false
|
||
[ -c /dev/nvidiactl ] && {
|
||
GPU=true
|
||
echo " Nvidia GPU detected"
|
||
}
|
||
}
|
||
|
||
"${GPU}" || export CUDA_VISIBLE_DEVICES=-1
|
||
|
||
VENV="${DATA_DIR}/.venv"
|
||
[ -n "${VIRTUAL_ENV}" ] && deactivate
|
||
|
||
if [ -n "${PYTHON}" ] ; then
|
||
PYTHONS=( "${PYTHON}" )
|
||
unset PYTHON
|
||
else
|
||
# Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04)
|
||
PYTHONS=( python3.12 python3.11 python3.10 )
|
||
fi
|
||
|
||
for p in "${PYTHONS[@]}" ; do
|
||
"${p}" --version &>/dev/null && { PYTHON="${p}" ; break ; }
|
||
done
|
||
|
||
[ -n "${PYTHON}" ] || {
|
||
echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2
|
||
exit 1
|
||
}
|
||
|
||
if [ -d "${VENV}" ] ; then
|
||
if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then
|
||
source "${VENV}/bin/activate" || {
|
||
echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
|
||
exit 1
|
||
}
|
||
else
|
||
rm -rf "${VENV}"
|
||
fi
|
||
fi
|
||
|
||
echo "===== Setting up Python environment ${VENV} ====="
|
||
|
||
if [ -z "$VIRTUAL_ENV" ] ; then
|
||
echo " ===== Creating new virtualenv at '${VENV}' ====="
|
||
else
|
||
echo " ===== Updating virtualenv at '${VENV}' ====="
|
||
fi
|
||
|
||
${PYTHON} -m venv --upgrade-deps "${VENV}"
|
||
source "${VENV}/bin/activate"
|
||
|
||
set -euo pipefail
|
||
|
||
# Symlink CLI scripts into .venv/bin
|
||
declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) )
|
||
progfiles+=( "${PROGDIR}/shell.functions" )
|
||
|
||
# Also symlink the top-level entrypoint if present
|
||
[ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" )
|
||
|
||
for f in "${progfiles[@]}" ; do
|
||
ln -sfr "${f}" ".venv/bin/$(basename "${f}")"
|
||
done
|
||
|
||
#
|
||
# Pip doesn't process packages from requirements.txt in order but order is
|
||
# important because tensorflow, torch, onnxruntime and micro-wake-word all
|
||
# depend on CUDA packages at various versions. They need to be installed in
|
||
# this specific order or they may not be able to use the GPU.
|
||
#
|
||
export PIP_PROGRESS_BAR=off
|
||
export PIP_NO_COLOR=1
|
||
export PIP_QUIET=0
|
||
|
||
pip_install() {
|
||
if $VERBOSE ; then
|
||
pip install "$@" || return 1
|
||
else
|
||
{ pip install "$@" || return 1 ; } | stdbuf -i0 -o0 tr -d '[:print:]' | stdbuf -i0 -o0 tr '\n' '.'
|
||
fi
|
||
echo
|
||
}
|
||
|
||
START_TS=$EPOCHSECONDS
|
||
|
||
echo " ===== Installing common requirements ====="
|
||
# requirements.txt lives in repo root now
|
||
pip_install -r "${ROOTDIR}/requirements.txt"
|
||
|
||
${GPU} && tfgpu='[and-cuda]' || tfgpu=""
|
||
echo " ===== Installing Tensorflow${tfgpu} ====="
|
||
pip_install ai_edge_litert "tensorflow${tfgpu}==2.20.0" "tensorboard==2.20.0" \
|
||
"tensorboard-data-server==0.7.2"
|
||
|
||
${GPU} && torchgpu='--index-url https://download.pytorch.org/whl/cu129' || torchgpu=""
|
||
echo " ===== Installing torch and torchaudio ${torchgpu:+[cuda]} ====="
|
||
pip_install "torch==2.9.1" "torchaudio==2.9.1" ${torchgpu}
|
||
|
||
echo " ===== Checking microwakeword ====="
|
||
MWW="${DATA_DIR}/tools/microWakeWord"
|
||
if [ ! -d "${MWW}" ] || [ -n "$(git -C "${MWW}" status --porcelain)" ] ; then
|
||
rm -rf "${MWW}" || :
|
||
echo " Cloning micro-wake-word to ${DATA_DIR}/tools"
|
||
git clone https://github.com/TaterTotterson/micro-wake-word "${MWW}" &>/dev/null
|
||
fi
|
||
echo " Installing microwakeword"
|
||
pip_install -e "${MWW}"
|
||
|
||
echo " ===== Checking piper-sample-generator ====="
|
||
PSG="${DATA_DIR}/tools/piper-sample-generator"
|
||
if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then
|
||
rm -rf "${PSG}" || :
|
||
echo " Cloning piper-sample-generator to ${DATA_DIR}/tools"
|
||
git clone https://github.com/rhasspy/piper-sample-generator "${PSG}" &>/dev/null
|
||
fi
|
||
echo " Installing piper-sample-generator"
|
||
pip_install -e "${PSG}"
|
||
git -C tools/piper-sample-generator clean -fd &>/dev/null
|
||
|
||
MODELS_DIR="${PSG}/models"
|
||
MODEL_NAME="en_US-libritts_r-medium.pt"
|
||
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
|
||
MODEL_URL="https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/${MODEL_NAME}"
|
||
if [ ! -f "${MODEL_FILE}" ] ; then
|
||
echo " Downloading ${MODEL_NAME} for piper-sample-generator"
|
||
curl -sfL "${MODEL_URL}" -o "${MODEL_FILE}"
|
||
fi
|
||
|
||
if [ ! -f "${MODEL_FILE}.json" ] ; then
|
||
echo " Downloading ${MODEL_NAME}.json for piper-sample-generator"
|
||
curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json"
|
||
fi
|
||
|
||
${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu=""
|
||
echo " ===== Installing onnxruntime${onnxgpu} ====="
|
||
pip_install "onnxruntime${onnxgpu}>=1.16.0"
|
||
|
||
echo " ===== Installing keras ====="
|
||
# keras 3.13 has "issues" so we need to back down to 3.12.
|
||
pip_install "keras==3.12.0"
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# Optional CUDA data dir (GPU-only)
|
||
# Some stacks expect a CUDA "nvvm/libdevice" tree. We create one in /data/cuda
|
||
# and link Triton's libdevice if it exists. This is safe and does NOT enable
|
||
# any extra XLA flags by itself.
|
||
# -----------------------------------------------------------------------------
|
||
if ${GPU} ; then
|
||
CUDA_DATA_DIR="${DATA_DIR}/cuda"
|
||
LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice"
|
||
mkdir -p "${LIBDEVICE_DIR}"
|
||
|
||
TRITON_LIBDEVICE="$(
|
||
python - <<'PY'
|
||
import glob
|
||
paths = glob.glob("**/site-packages/triton/backends/nvidia/lib/libdevice.10.bc", recursive=True)
|
||
print(paths[0] if paths else "", end="")
|
||
PY
|
||
)"
|
||
|
||
if [ -n "${TRITON_LIBDEVICE}" ] ; then
|
||
ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc"
|
||
echo " Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}"
|
||
else
|
||
echo " ℹ️ Triton libdevice.10.bc not found (ok)"
|
||
fi
|
||
fi
|
||
|
||
"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"
|
||
|
||
touch .mww-data-dir
|
||
END_TS=$EPOCHSECONDS
|
||
|
||
echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."
|
||
|
||
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete" |