#!/bin/bash set -e PROGPATH=$(realpath "$0") PROGDIR=$(dirname "${PROGPATH}") KNOWN_ARGS=( training-steps samples data-dir ) source "${PROGDIR}/shell.functions" WAKE_WORD="${POSITIONAL_ARGS[0]}" if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2 HELP=true fi if [ "${HELP}" == "true" ] || [ -z "${WAKE_WORD}" ] ; then cat <&2 Usage: $0 [ --samples= ] [ --training-steps= ] [ ] $0 -h/--help --samples: The number of samples to generate for the wake word. Used only to generate output file names. --training-steps: Number of training steps. Default: ${DEFAULT_TRAINING_STEPS} : The word to train spelled phonetically. Required. : A pretty name to save to the json metadata file. Default: The wake word with individual words capitalized. EOF exit 1 fi WORK_DIR="${DATA_DIR}/work" TRAINING_DS="${DATA_DIR}/training_datasets" [ ${#POSITIONAL_ARGS} -eq 2 ] && WAKE_WORD_TITLE="${POSITIONAL_ARGS[1]}" if [ ! -v WAKE_WORD_TITLE ] ; then declare -a WWNA=( ${WAKE_WORD//[^a-zA-Z0-9]/ } ) WAKE_WORD_TITLE="${WWNA[*]^}" elif [ -z "$WAKE_WORD_TITLE" ] ; then WAKE_WORD_TITLE="$WAKE_WORD" fi # shellcheck source=/dev/null source "${DATA_DIR}/.venv/bin/activate" # --- WSL2 GPU visibility fix (venv sometimes doesn't inherit WSL driver path) --- # Keep a copy so we can restore/preserve on fallback if desired. ORIG_XLA_FLAGS="${XLA_FLAGS:-}" if [ -d /usr/lib/wsl/lib ]; then export LD_LIBRARY_PATH="/usr/lib/wsl/lib:${LD_LIBRARY_PATH:-}" echo "ℹ️ WSL2 detected: LD_LIBRARY_PATH+=/usr/lib/wsl/lib" # Blackwell / PTXAS workaround: only apply on WSL *and* only if user didn't set XLA_FLAGS if [ -z "${XLA_FLAGS:-}" ]; then export XLA_FLAGS="--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found" echo "ℹ️ WSL2: setting XLA_FLAGS=${XLA_FLAGS}" else echo "ℹ️ Using user-provided XLA_FLAGS=${XLA_FLAGS}" fi fi # ----------------------------------------------------------------------------- check_directories() { for d in "$@" ; do [ -d "$d" ] || { echo "ERROR: Directory $d not found" >&2 ; exit 1 ; } done } check_directories ${WORK_DIR}/wake_word_samples_augmented \ ${TRAINING_DS}/negative_datasets/{speech,dinner_party,no_speech,dinner_party_eval} # Personal features are optional, but if present they MUST have /training PERSONAL_FEATURES_DIR="${WORK_DIR}/personal_augmented_features" HAS_PERSONAL="false" if [ -d "${PERSONAL_FEATURES_DIR}/training" ] ; then HAS_PERSONAL="true" echo "✅ Found personal features: ${PERSONAL_FEATURES_DIR}/training (will weight sampling_weight=3.0)" else echo "ℹ️ No personal features found at ${PERSONAL_FEATURES_DIR}/training (continuing without personal weighting)" fi cd "${WORK_DIR}" echo "===== Starting ${TRAINING_STEPS} training steps =====" START_TS=$EPOCHSECONDS mkdir -p "${WORK_DIR}/trained_models" || : # We write a YAML with a marker, then splice personal feature block in if it exists. YAML_PATH="${WORK_DIR}/trained_models/training_parameters.yaml" cat <<'EOF' > "${YAML_PATH}" batch_size: 16 clip_duration_ms: 1500 eval_step_interval: 500 features: - features_dir: __WAKEWORD_FEATURES__ penalty_weight: 1.0 sampling_weight: 2.0 truncation_strategy: truncate_start truth: true type: mmap __PERSONAL_FEATURE_MARKER__ - features_dir: __NEG_SPEECH__ penalty_weight: 1.0 sampling_weight: 12.0 truncation_strategy: random truth: false type: mmap - features_dir: __NEG_DINNER__ penalty_weight: 1.0 sampling_weight: 12.0 truncation_strategy: random truth: false type: mmap - features_dir: __NEG_NOSPEECH__ penalty_weight: 1.0 sampling_weight: 5.0 truncation_strategy: random truth: false type: mmap - features_dir: __NEG_DINNER_EVAL__ penalty_weight: 1.0 sampling_weight: 0.0 truncation_strategy: split truth: false type: mmap freq_mask_count: - 0 freq_mask_max_size: - 0 learning_rates: - 0.001 maximization_metric: average_viable_recall minimization_metric: null negative_class_weight: - 20 positive_class_weight: - 1 target_minimization: 0.9 time_mask_count: - 0 time_mask_max_size: - 0 train_dir: __TRAIN_DIR__ training_steps: - __TRAINING_STEPS__ window_step_ms: 10 EOF # Replace placeholders sed -i \ -e "s|__WAKEWORD_FEATURES__|${WORK_DIR}/wake_word_samples_augmented|g" \ -e "s|__NEG_SPEECH__|${TRAINING_DS}/negative_datasets/speech|g" \ -e "s|__NEG_DINNER__|${TRAINING_DS}/negative_datasets/dinner_party|g" \ -e "s|__NEG_NOSPEECH__|${TRAINING_DS}/negative_datasets/no_speech|g" \ -e "s|__NEG_DINNER_EVAL__|${TRAINING_DS}/negative_datasets/dinner_party_eval|g" \ -e "s|__TRAIN_DIR__|${WORK_DIR}/trained_models/wakeword|g" \ -e "s|__TRAINING_STEPS__|${TRAINING_STEPS}|g" \ "${YAML_PATH}" # Insert/remove personal block if [ "${HAS_PERSONAL}" = "true" ]; then # Insert directly after the wakeword feature block personal_block="$(cat <&1 \ | tr '\r' '\n' \ | stdbuf -i0 -o0 sed -r -e "/^Validation Batch/d" \ | tee "${TRAIN_LOG}" \ | sed -r -e "/^Validation Batch/d" -e "s/^INFO:absl:/ /g" return ${PIPESTATUS[0]} } # --------- ENV (keep compatible; DO NOT add unsupported XLA flags) ---------- export TF_CPP_MIN_LOG_LEVEL="${TF_CPP_MIN_LOG_LEVEL:-2}" export TF_XLA_FLAGS="${TF_XLA_FLAGS:---tf_xla_auto_jit=0}" export NVIDIA_TF32_OVERRIDE="${NVIDIA_TF32_OVERRIDE:-1}" export TF_FORCE_GPU_ALLOW_GROWTH="${TF_FORCE_GPU_ALLOW_GROWTH:-true}" export TF_GPU_ALLOCATOR="${TF_GPU_ALLOCATOR:-cuda_malloc_async}" if run_attempt "Attempt 1/2: GPU training (allow_growth + cuda_malloc_async)" ; then echo "✅ Training complete (GPU path)." else echo "⚠️ GPU attempt failed. Checking whether this looks like a GPU/OOM/runtime failure…" log_lc="$(tr '[:upper:]' '[:lower:]' < "${TRAIN_LOG}" || true)" looks_like_gpu_fail="false" for m in "${GPU_FALLBACK_MARKERS[@]}"; do if echo "${log_lc}" | grep -qF "${m}"; then looks_like_gpu_fail="true" break fi done if [ "${looks_like_gpu_fail}" = "true" ]; then echo "↪️ Detected GPU/OOM/runtime failure markers. Falling back to CPU." export CUDA_VISIBLE_DEVICES="" unset TF_GPU_ALLOCATOR # CPU attempt should not inherit GPU/XLA runtime knobs unset TF_XLA_FLAGS # Optional: clear XLA_FLAGS for CPU (usually irrelevant). If user had set it, restore. if [ -n "${ORIG_XLA_FLAGS}" ]; then export XLA_FLAGS="${ORIG_XLA_FLAGS}" else unset XLA_FLAGS fi if run_attempt "Attempt 2/2: CPU fallback (CUDA_VISIBLE_DEVICES='')" ; then echo "✅ Training complete (CPU fallback)." else echo "❌ Training failed on BOTH GPU and CPU. See: ${TRAIN_LOG}" >&2 exit 1 fi else echo "❌ Training failed (does not look GPU/OOM/runtime). See: ${TRAIN_LOG}" >&2 exit 1 fi fi source_path="${WORK_DIR}/trained_models/wakeword/tflite_stream_state_internal_quant/stream_state_internal_quant.tflite" if [ ! -f "${source_path}" ] ; then echo "Output model not found! Training didn't complete successfully. See ${TRAIN_LOG}" exit 1 fi cp "${WORK_DIR}/trained_models/wakeword/model_summary.txt" "${OUTPUT_DIR}/logs/" || : cp -a "${WORK_DIR}/trained_models/wakeword/logs/train" "${OUTPUT_DIR}/logs/" || : cp -a "${WORK_DIR}/trained_models/wakeword/logs/validation" "${OUTPUT_DIR}/logs/" || : echo -e "\n Training complete!" echo " Full log: ${TRAIN_LOG}" tflite_filename="${wake_word_filename}.tflite" tflite_path="${OUTPUT_DIR}/${tflite_filename}" cp "${source_path}" "${tflite_path}" json_path="${OUTPUT_DIR}/${wake_word_filename}.json" cat <<-EOF > "${json_path}" { "type": "micro", "wake_word": "${WAKE_WORD_TITLE}", "author": "Tater Totterson", "website": "https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git", "model": "${tflite_filename}", "trained_languages": ["en"], "version": 2, "micro": { "probability_cutoff": 0.97, "sliding_window_size": 5, "feature_step_size": 10, "tensor_arena_size": 30000, "minimum_esphome_version": "2024.7.0" } } EOF echo "Name: ${WAKE_WORD_TITLE}" echo "Model: ${tflite_path}" echo "Metadata: ${json_path}" echo END_TS=$EPOCHSECONDS print_elapsed_time "${START_TS}" "${END_TS}" "Training completed." echo