#!/bin/bash set -e PROGPATH=$(realpath "$0") PROGDIR=$(dirname "${PROGPATH}") KNOWN_ARGS=( training-steps samples data-dir ) source "${PROGDIR}/shell.functions" WAKE_WORD="${POSITIONAL_ARGS[0]}" if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2 HELP=true fi if [ "${HELP}" == "true" ] || [ -z "${WAKE_WORD}" ] ; then cat <&2 Usage: $0 [ --samples= ] [ --training-steps= ] [ ] $0 -h/--help --samples: The number of samples to generate for the wake word. Used only to generate output file names. --training-steps: Number of training steps. Default: ${DEFAULT_TRAINING_STEPS} : The word to train spelled phonetically. Required. : A pretty name to save to the json metadata file. Default: The wake word with individual words capitalized. EOF exit 1 fi WORK_DIR="${DATA_DIR}/work" TRAINING_DS="${DATA_DIR}/training_datasets" [ ${#POSITIONAL_ARGS} -eq 2 ] && WAKE_WORD_TITLE="${POSITIONAL_ARGS[1]}" if [ ! -v WAKE_WORD_TITLE ] ; then declare -a WWNA=( ${WAKE_WORD//[^a-zA-Z0-9]/ } ) WAKE_WORD_TITLE="${WWNA[*]^}" elif [ -z "$WAKE_WORD_TITLE" ] ; then WAKE_WORD_TITLE="$WAKE_WORD" fi # shellcheck source=/dev/null source "${DATA_DIR}/.venv/bin/activate" check_directories() { for d in "$@" ; do [ -d "$d" ] || { echo "ERROR: Directory $d not found" >&2 ; exit 1 ; } done } check_directories ${WORK_DIR}/wake_word_samples_augmented \ ${TRAINING_DS}/negative_datasets/{speech,dinner_party,no_speech,dinner_party_eval} # Personal features are optional, but if present they MUST have /training PERSONAL_FEATURES_DIR="${WORK_DIR}/personal_augmented_features" HAS_PERSONAL="false" if [ -d "${PERSONAL_FEATURES_DIR}/training" ] ; then HAS_PERSONAL="true" echo "✅ Found personal features: ${PERSONAL_FEATURES_DIR}/training (will weight sampling_weight=3.0)" else echo "ℹ️ No personal features found at ${PERSONAL_FEATURES_DIR}/training (continuing without personal weighting)" fi cd "${WORK_DIR}" echo "===== Starting ${TRAINING_STEPS} training steps =====" START_TS=$EPOCHSECONDS mkdir -p "${WORK_DIR}/trained_models" || : # We write a YAML with a marker, then splice personal feature block in if it exists. YAML_PATH="${WORK_DIR}/trained_models/training_parameters.yaml" cat <<'EOF' > "${YAML_PATH}" batch_size: 16 clip_duration_ms: 1500 eval_step_interval: 500 features: - features_dir: __WAKEWORD_FEATURES__ penalty_weight: 1.0 sampling_weight: 2.0 truncation_strategy: truncate_start truth: true type: mmap __PERSONAL_FEATURE_MARKER__ - features_dir: __NEG_SPEECH__ penalty_weight: 1.0 sampling_weight: 12.0 truncation_strategy: random truth: false type: mmap - features_dir: __NEG_DINNER__ penalty_weight: 1.0 sampling_weight: 12.0 truncation_strategy: random truth: false type: mmap - features_dir: __NEG_NOSPEECH__ penalty_weight: 1.0 sampling_weight: 5.0 truncation_strategy: random truth: false type: mmap - features_dir: __NEG_DINNER_EVAL__ penalty_weight: 1.0 sampling_weight: 0.0 truncation_strategy: split truth: false type: mmap freq_mask_count: - 0 freq_mask_max_size: - 0 learning_rates: - 0.001 maximization_metric: average_viable_recall minimization_metric: null negative_class_weight: - 20 positive_class_weight: - 1 target_minimization: 0.9 time_mask_count: - 0 time_mask_max_size: - 0 train_dir: __TRAIN_DIR__ training_steps: - __TRAINING_STEPS__ window_step_ms: 10 EOF # Replace placeholders (portable) sed -i \ -e "s|__WAKEWORD_FEATURES__|${WORK_DIR}/wake_word_samples_augmented|g" \ -e "s|__NEG_SPEECH__|${TRAINING_DS}/negative_datasets/speech|g" \ -e "s|__NEG_DINNER__|${TRAINING_DS}/negative_datasets/dinner_party|g" \ -e "s|__NEG_NOSPEECH__|${TRAINING_DS}/negative_datasets/no_speech|g" \ -e "s|__NEG_DINNER_EVAL__|${TRAINING_DS}/negative_datasets/dinner_party_eval|g" \ -e "s|__TRAIN_DIR__|${WORK_DIR}/trained_models/wakeword|g" \ -e "s|__TRAINING_STEPS__|${TRAINING_STEPS}|g" \ "${YAML_PATH}" # Insert/remove personal block if [ "${HAS_PERSONAL}" = "true" ]; then # Insert directly after the wakeword feature block (matches your notebook: insert(1, ...)) perl -0777 -i -pe 's/__PERSONAL_FEATURE_MARKER__/\n- features_dir: '"${PERSONAL_FEATURES_DIR}"'\n penalty_weight: 1.0\n sampling_weight: 3.0\n truncation_strategy: truncate_start\n truth: true\n type: mmap\n/g' "${YAML_PATH}" else # Remove marker line entirely sed -i -e "/__PERSONAL_FEATURE_MARKER__/d" "${YAML_PATH}" fi echo " Wrote training_parameters.yaml" rm -rf "${WORK_DIR}/trained_models/wakeword" wake_word_filename="$( echo "${WAKE_WORD}" \ | tr '[:upper:]' '[:lower:]' \ | sed -E 's/[^a-z0-9]+/_/g; s/^_+//; s/_+$//' )" [ -n "${wake_word_filename}" ] || wake_word_filename="wakeword" OUTPUT_DIR="${DATA_DIR}/output/$(date +'%Y-%m-%d-%H-%M-%S')-${wake_word_filename}-${SAMPLES}-${TRAINING_STEPS}" mkdir -p "${OUTPUT_DIR}/logs" || : TRAIN_LOG="${OUTPUT_DIR}/logs/training.log" TRAIN_ARGS=( -m microwakeword.model_train_eval --training_config "${WORK_DIR}/trained_models/training_parameters.yaml" --train 1 --restore_checkpoint 1 --test_tf_nonstreaming 0 --test_tflite_nonstreaming 0 --test_tflite_nonstreaming_quantized 0 --test_tflite_streaming 0 --test_tflite_streaming_quantized 1 --use_weights best_weights mixednet --pointwise_filters "64,64,64,64" --repeat_in_block "1,1,1,1" --mixconv_kernel_sizes "[5], [7,11], [9,15], [23]" --residual_connection "0,0,0,0" --first_conv_filters 32 --first_conv_kernel_size 5 --stride 2 ) GPU_FALLBACK_MARKERS=( "resourceexhaustederror" "resource exhausted" "oom" "out of memory" "cuda_error_out_of_memory" "failed to allocate" "cudnn" "cublas" "internalerror: cuda" "failed call to cuinit" "dst tensor is not initialized" "failed copying input tensor" "_eagerconst" ) run_attempt() { local label="$1" shift echo echo "================================================================================" echo "===== ${label} =====" echo "================================================================================" echo "→ ${PYTHON_BIN:-python} ${TRAIN_ARGS[*]}" echo "${PYTHON_BIN:-python}" "${TRAIN_ARGS[@]}" 2>&1 \ | tr '\r' '\n' \ | stdbuf -i0 -o0 sed -r -e "/^Validation Batch/d" \ | tee "${TRAIN_LOG}" \ | sed -r -e "/^Validation Batch/d" -e "s/^INFO:absl:/ /g" return ${PIPESTATUS[0]} } export TF_CPP_MIN_LOG_LEVEL="${TF_CPP_MIN_LOG_LEVEL:-2}" export TF_XLA_FLAGS="${TF_XLA_FLAGS:---tf_xla_auto_jit=0}" export NVIDIA_TF32_OVERRIDE="${NVIDIA_TF32_OVERRIDE:-1}" export TF_FORCE_GPU_ALLOW_GROWTH="${TF_FORCE_GPU_ALLOW_GROWTH:-true}" export TF_GPU_ALLOCATOR="${TF_GPU_ALLOCATOR:-cuda_malloc_async}" if run_attempt "Attempt 1/2: GPU training (allow_growth + cuda_malloc_async)" ; then echo "✅ Training complete (GPU path)." else echo "⚠️ GPU attempt failed. Checking whether this looks like a GPU/OOM/runtime failure…" log_lc="$(tr '[:upper:]' '[:lower:]' < "${TRAIN_LOG}" || true)" looks_like_gpu_fail="false" for m in "${GPU_FALLBACK_MARKERS[@]}"; do if echo "${log_lc}" | grep -qF "${m}"; then looks_like_gpu_fail="true" break fi done if [ "${looks_like_gpu_fail}" = "true" ]; then echo "↪️ Detected GPU/OOM/runtime failure markers. Falling back to CPU." export CUDA_VISIBLE_DEVICES="" unset TF_GPU_ALLOCATOR if run_attempt "Attempt 2/2: CPU fallback (CUDA_VISIBLE_DEVICES='')" ; then echo "✅ Training complete (CPU fallback)." else echo "❌ Training failed on BOTH GPU and CPU. See: ${TRAIN_LOG}" >&2 exit 1 fi else echo "❌ Training failed (does not look GPU/OOM/runtime). See: ${TRAIN_LOG}" >&2 exit 1 fi fi source_path="${WORK_DIR}/trained_models/wakeword/tflite_stream_state_internal_quant/stream_state_internal_quant.tflite" if [ ! -f "${source_path}" ] ; then echo "Output model not found! Training didn't complete successfully. See ${TRAIN_LOG}" exit 1 fi cp "${WORK_DIR}/trained_models/wakeword/model_summary.txt" "${OUTPUT_DIR}/logs/" || : cp -a "${WORK_DIR}/trained_models/wakeword/logs/train" "${OUTPUT_DIR}/logs/" || : cp -a "${WORK_DIR}/trained_models/wakeword/logs/validation" "${OUTPUT_DIR}/logs/" || : echo -e "\n Training complete!" echo " Full log: ${TRAIN_LOG}" tflite_filename="${wake_word_filename}.tflite" tflite_path="${OUTPUT_DIR}/${tflite_filename}" cp "${source_path}" "${tflite_path}" json_path="${OUTPUT_DIR}/${wake_word_filename}.json" cat <<-EOF > "${json_path}" { "type": "micro", "wake_word": "${WAKE_WORD_TITLE}", "author": "Tater Totterson", "website": "https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git", "model": "${tflite_filename}", "trained_languages": ["en"], "version": 2, "micro": { "probability_cutoff": 0.97, "sliding_window_size": 5, "feature_step_size": 10, "tensor_arena_size": 30000, "minimum_esphome_version": "2024.7.0" } } EOF echo "Name: ${WAKE_WORD_TITLE}" echo "Model: ${tflite_path}" echo "Metadata: ${json_path}" echo END_TS=$EPOCHSECONDS print_elapsed_time "${START_TS}" "${END_TS}" "Training completed." echo