mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
cli + web recorder ui
This commit is contained in:
@@ -67,42 +67,66 @@ find_rev() {
|
||||
}
|
||||
|
||||
converter() {
|
||||
source ${DATA_DIR}/.venv/bin/activate
|
||||
# shellcheck source=/dev/null
|
||||
source "${DATA_DIR}/.venv/bin/activate"
|
||||
|
||||
python - "${AUDIO_DIR}" "${AUDIO16K_DIR}" <<-EOF
|
||||
import os, sys, subprocess, scipy.io.wavfile, numpy as np
|
||||
import os, sys
|
||||
from pathlib import Path
|
||||
import soundfile as sf
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
import scipy.io.wavfile
|
||||
import librosa
|
||||
from tqdm import tqdm
|
||||
|
||||
def write_wav(dst: Path, data: np.ndarray, sr: int):
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
x = np.clip(data, -1.0, 1.0)
|
||||
scipy.io.wavfile.write(dst, sr, (x * 32767).astype(np.int16))
|
||||
|
||||
audioset_dir = Path(sys.argv[1])
|
||||
audioset_out = Path(sys.argv[2])
|
||||
|
||||
# convert FLAC → 16k mono WAV
|
||||
flacs = list(audioset_dir.rglob("*.flac"))
|
||||
print(f" FLAC files: {len(flacs)}")
|
||||
total = len(flacs)
|
||||
print(f" FLAC files: {total}")
|
||||
print(" Converting AudioSet → 16k mono WAV")
|
||||
print(" Sit tight — this step can take a while.")
|
||||
print("")
|
||||
|
||||
audioset_bad = []
|
||||
ok = 0
|
||||
for p in tqdm(flacs, desc=" AudioSet→WAV (resample 16k mono)"):
|
||||
skipped = 0
|
||||
|
||||
START = datetime.now(timezone.utc).replace(microsecond=0)
|
||||
|
||||
# Heartbeat interval (prints every N files)
|
||||
HEARTBEAT_EVERY = 500
|
||||
|
||||
for idx, p in enumerate(flacs, start=1):
|
||||
try:
|
||||
outfile = Path(audioset_out / (p.stem + ".wav"))
|
||||
outfile = audioset_out / (p.stem + ".wav")
|
||||
if outfile.exists():
|
||||
continue
|
||||
y, _ = librosa.load(p, sr=16000, mono=True)
|
||||
if y.size == 0:
|
||||
raise ValueError("empty audio")
|
||||
write_wav(outfile, y, 16000)
|
||||
ok += 1
|
||||
skipped += 1
|
||||
else:
|
||||
y, _ = librosa.load(p, sr=16000, mono=True)
|
||||
if y.size == 0:
|
||||
raise ValueError("empty audio")
|
||||
write_wav(outfile, y, 16000)
|
||||
ok += 1
|
||||
except Exception as e:
|
||||
audioset_bad.append(f"{p}:{e}")
|
||||
|
||||
if idx == 1 or (idx % HEARTBEAT_EVERY) == 0 or idx == total:
|
||||
print(f" Progress: {idx}/{total} (ok={ok}, skipped={skipped}, failed={len(audioset_bad)})")
|
||||
|
||||
if audioset_bad:
|
||||
(audioset_out / "audioset_corrupted_files.log").write_text("\n".join(audioset_bad))
|
||||
print(f" AudioSet complete ({ok} ok, {len(audioset_bad)} failed)")
|
||||
|
||||
END = datetime.now(timezone.utc).replace(microsecond=0)
|
||||
elapsed = END - START
|
||||
print("")
|
||||
print(f" AudioSet complete ({ok} ok, {skipped} skipped, {len(audioset_bad)} failed) Elapsed: {elapsed}")
|
||||
EOF
|
||||
}
|
||||
|
||||
@@ -110,13 +134,15 @@ expected_filecount=$(get_total_filecount filecounts)
|
||||
actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || :
|
||||
write_filecount=false
|
||||
|
||||
if [ "${actual_filecount}" -ne 0 ] && [ "${actual_filecount}" -eq "${expected_filecount}" ] ; then
|
||||
echo " Existing Audioset valid"
|
||||
# Option B behavior: if we already have output WAVs, don't re-download/re-extract/re-convert
|
||||
if [ "${actual_filecount}" -ne 0 ] ; then
|
||||
echo " Existing ${AUDIO16K_DIR} present (${actual_filecount} wav); skipping extract/convert"
|
||||
else
|
||||
dl=$(find_rev)
|
||||
[ -n "$dl" ] || { echo " Could not locate an AudioSet revision with FLAC tarballs still present on HF." ; exit 1 ; }
|
||||
rev=${dl%%,*}
|
||||
pattern=${dl##*,}
|
||||
|
||||
echo " Checking 10 tarballs"
|
||||
for i in {0..9} ; do
|
||||
fname="downloads/bal_train0${i}.tar"
|
||||
@@ -137,17 +163,16 @@ else
|
||||
rm -rf "${fname}"
|
||||
fi
|
||||
done
|
||||
|
||||
rm -rf "${AUDIO16K_DIR}/audioset_corrupted_files.log" || :
|
||||
converter
|
||||
if [ -f "${AUDIO16K_DIR}/audioset_corrupted_files.log" ] ; then
|
||||
failed=$(cat "${AUDIO16K_DIR}/audioset_corrupted_files.log" | wc -l)
|
||||
filecounts[failed]=-${failed}
|
||||
fi
|
||||
|
||||
# Recompute counts and warn (but do not fail)
|
||||
expected_filecount=$(get_total_filecount filecounts)
|
||||
actual_filecount=$(find ${AUDIO16K_DIR} -name "*.wav" 2>/dev/null | wc -l) || :
|
||||
actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || :
|
||||
if [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then
|
||||
echo " Converted file count(${actual_filecount}) != expected file count(${expected_filecount})" >&2
|
||||
exit 1
|
||||
echo " WARNING: mismatch is expected if some AudioSet files are corrupted; continuing." >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -171,5 +196,4 @@ if "${CLEANUP_INTERMEDIATE_FILES}" && [ -d "${AUDIO_DIR}" ] ; then
|
||||
fi
|
||||
|
||||
echo " Audioset complete"
|
||||
exit 0
|
||||
|
||||
exit 0
|
||||
@@ -8,9 +8,9 @@ if [ ! -v DATA_DIR ] ; then
|
||||
[ -f .mww-data-dir ] && DATA_DIR="${PWD}" || DATA_DIR="/data"
|
||||
fi
|
||||
|
||||
DEFAULT_SAMPLES=20000
|
||||
DEFAULT_SAMPLES=50000
|
||||
DEFAULT_BATCH_SIZE=100
|
||||
DEFAULT_TRAINING_STEPS=25000
|
||||
DEFAULT_TRAINING_STEPS=40000
|
||||
|
||||
[ -f "${DATA_DIR}/.defaults.env" ] && source "${DATA_DIR}/.defaults.env" || :
|
||||
|
||||
|
||||
@@ -71,17 +71,16 @@ if not files:
|
||||
max_samples = len(files)
|
||||
|
||||
print(f"\n===== Augmenting {max_samples} wake word samples =====")
|
||||
|
||||
print(" Initializing libraries")
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"]="3"
|
||||
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"
|
||||
os.environ["TF_GPU_ALLOCATOR"]="cuda_malloc_async"
|
||||
os.environ["TF_XLA_FLAGS"]="--tf_xla_auto_jit=0"
|
||||
os.environ["NVIDIA_TF32_OVERRIDE"]="1"
|
||||
os.environ["TF_CUDNN_WORKSPACE_LIMIT_IN_MB"]="512"
|
||||
os.environ["GLOG_minloglevel"]="9"
|
||||
os.environ["GRPC_VERBOSITY"]="ERROR"
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
|
||||
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"
|
||||
os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=0"
|
||||
os.environ["NVIDIA_TF32_OVERRIDE"] = "1"
|
||||
os.environ["TF_CUDNN_WORKSPACE_LIMIT_IN_MB"] = "512"
|
||||
os.environ["GLOG_minloglevel"] = "9"
|
||||
os.environ["GRPC_VERBOSITY"] = "ERROR"
|
||||
|
||||
print(" Loading Tensorflow")
|
||||
import tensorflow as tf
|
||||
@@ -98,6 +97,7 @@ gc.collect()
|
||||
|
||||
import numpy as np
|
||||
import librosa
|
||||
from tqdm import tqdm
|
||||
from mmap_ninja.ragged import RaggedMmap
|
||||
from microwakeword.audio.augmentation import Augmentation
|
||||
from microwakeword.audio.clips import Clips
|
||||
@@ -108,7 +108,7 @@ START_TIME = datetime.now(timezone.utc).replace(microsecond=0)
|
||||
|
||||
# Paths to augmented data
|
||||
impulse_paths = [ args.mit_rirs_16k_dir ]
|
||||
background_paths = [ args.fma_16k_dir, args.audioset_16k_dir]
|
||||
background_paths = [ args.fma_16k_dir, args.audioset_16k_dir ]
|
||||
|
||||
clips = Clips(
|
||||
input_directory=args.input_dir,
|
||||
@@ -139,8 +139,6 @@ augmenter = Augmentation(
|
||||
max_jitter_s=0.3,
|
||||
)
|
||||
|
||||
# Augment samples and save the training, validation, and testing sets.
|
||||
|
||||
def audio_generator_from_wavs(self, split="train", repeat=1):
|
||||
"""
|
||||
Yield 1-D float32 arrays loaded via librosa from input_dir/*.wav.
|
||||
@@ -175,7 +173,7 @@ def audio_generator_from_wavs(self, split="train", repeat=1):
|
||||
# Bind the patched generator to your existing `clips` instance
|
||||
clips.audio_generator = types.MethodType(audio_generator_from_wavs, clips)
|
||||
|
||||
# ---- Split config (same as before) ----
|
||||
# ---- Split config ----
|
||||
split_cfg = {
|
||||
"training": {"name": "train", "repetition": 2, "slide_frames": 10},
|
||||
"validation": {"name": "validation", "repetition": 1, "slide_frames": 10},
|
||||
@@ -188,28 +186,34 @@ for split, cfg in split_cfg.items():
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f" Augmenting {split}")
|
||||
|
||||
print(f" Generating spectrograms")
|
||||
print(" Generating spectrograms")
|
||||
spectros = SpectrogramGeneration(
|
||||
clips=clips, # now backed by our WAV loader
|
||||
augmenter=augmenter, # your existing augmenter
|
||||
clips=clips,
|
||||
augmenter=augmenter,
|
||||
slide_frames=cfg["slide_frames"],
|
||||
step_ms=10,
|
||||
)
|
||||
|
||||
print(f" Generating files")
|
||||
print(" Generating files")
|
||||
print(" Sit tight — this step can take a while.")
|
||||
|
||||
gen = spectros.spectrogram_generator(
|
||||
split=cfg["name"],
|
||||
repeat=cfg["repetition"],
|
||||
)
|
||||
|
||||
RaggedMmap.from_generator(
|
||||
out_dir=str(out_dir / "wakeword_mmap"),
|
||||
sample_generator=spectros.spectrogram_generator(
|
||||
split=cfg["name"], repeat=cfg["repetition"]
|
||||
),
|
||||
sample_generator=gen,
|
||||
batch_size=100,
|
||||
verbose=False,
|
||||
verbose=False, # keep mmap quiet
|
||||
)
|
||||
|
||||
print(f" {split} augmentation complete")
|
||||
|
||||
END_TIME = datetime.now(timezone.utc).replace(microsecond=0)
|
||||
et = END_TIME - START_TIME
|
||||
print(f"\n{'=' * 80}")
|
||||
msg=f"Augmented {max_samples} wake word samples."
|
||||
msg = f"Augmented {max_samples} wake word samples."
|
||||
print(f"{msg:>50s} Elapsed time: {et!s}")
|
||||
print(f"{'=' * 80}\n")
|
||||
print(f"{'=' * 80}\n")
|
||||
@@ -129,88 +129,136 @@ EOF
|
||||
echo " Wrote training_parameters.yaml"
|
||||
rm -rf "${WORK_DIR}/trained_models/wakeword"
|
||||
|
||||
export TF_CPP_MIN_LOG_LEVEL=9
|
||||
export TF_FORCE_GPU_ALLOW_GROWTH=true
|
||||
export TF_GPU_ALLOCATOR=cuda_malloc_async
|
||||
export TF_XLA_FLAGS="--tf_xla_auto_jit=0"
|
||||
export NVIDIA_TF32_OVERRIDE=1
|
||||
export TF_CUDNN_WORKSPACE_LIMIT_IN_MB=512
|
||||
export GLOG_minloglevel=9
|
||||
export GRPC_VERBOSITY=ERROR
|
||||
|
||||
echo " Loading Tensorflow"
|
||||
|
||||
wake_word_filename="${WAKE_WORD//[ \`~\!\$&*\(\)\{\}\[\]\|\;\'\"<>.?\/]/_}"
|
||||
wake_word_filename="${WAKE_WORD//[ \`~\!\$&*$begin:math:text$$end:math:text$\{\}$begin:math:display$$end:math:display$\|\;\'\"<>.?\/]/_}"
|
||||
OUTPUT_DIR="${DATA_DIR}/output/$(date +'%Y-%m-%d-%H-%M-%S')-${wake_word_filename}-${SAMPLES}-${TRAINING_STEPS}"
|
||||
mkdir -p "${OUTPUT_DIR}/logs" || :
|
||||
|
||||
python - \
|
||||
--training_config="${WORK_DIR}/trained_models/training_parameters.yaml" \
|
||||
--train 1 \
|
||||
--restore_checkpoint 1 \
|
||||
--test_tf_nonstreaming 0 \
|
||||
--test_tflite_nonstreaming 0 \
|
||||
--test_tflite_nonstreaming_quantized 0 \
|
||||
--test_tflite_streaming 0 \
|
||||
--test_tflite_streaming_quantized 1 \
|
||||
--use_weights "best_weights" \
|
||||
mixednet \
|
||||
--pointwise_filters "64,64,64,64" \
|
||||
--repeat_in_block "1,1,1,1" \
|
||||
--mixconv_kernel_sizes "[5], [7,11], [9,15], [23]" \
|
||||
--residual_connection "0,0,0,0" \
|
||||
--first_conv_filters 32 \
|
||||
--first_conv_kernel_size 5 \
|
||||
--stride 2 <<EOF 2>&1 | tr '\r' '\n' | stdbuf -i0 -o0 sed -r -e "/^Validation Batch/d" |\
|
||||
tee "${OUTPUT_DIR}/logs/training.log" | sed -r -e '/^INFO:absl:/!d' \
|
||||
-r -e "/None|Sharding|unsupported characters|AUC|fingerprint/d" \
|
||||
-r -e 's/INFO:absl:/ /g' \
|
||||
-r -e "s/, (recall =|estimated false|average viable recall)/,\n \1/g"
|
||||
TRAIN_LOG="${OUTPUT_DIR}/logs/training.log"
|
||||
|
||||
import sys, os, gc
|
||||
import runpy
|
||||
import yaml
|
||||
print(" Loading Tensorflow")
|
||||
import tensorflow as tf
|
||||
# ------------------------------------------------------------------
|
||||
# Training args (same as before)
|
||||
# ------------------------------------------------------------------
|
||||
TRAIN_ARGS=(
|
||||
-m microwakeword.model_train_eval
|
||||
--training_config "${WORK_DIR}/trained_models/training_parameters.yaml"
|
||||
--train 1
|
||||
--restore_checkpoint 1
|
||||
--test_tf_nonstreaming 0
|
||||
--test_tflite_nonstreaming 0
|
||||
--test_tflite_nonstreaming_quantized 0
|
||||
--test_tflite_streaming 0
|
||||
--test_tflite_streaming_quantized 1
|
||||
--use_weights best_weights
|
||||
mixednet
|
||||
--pointwise_filters "64,64,64,64"
|
||||
--repeat_in_block "1,1,1,1"
|
||||
--mixconv_kernel_sizes "[5], [7,11], [9,15], [23]"
|
||||
--residual_connection "0,0,0,0"
|
||||
--first_conv_filters 32
|
||||
--first_conv_kernel_size 5
|
||||
--stride 2
|
||||
)
|
||||
|
||||
print(" GPU memory config")
|
||||
# Per-device memory growth (belt + suspenders)
|
||||
for g in tf.config.list_physical_devices("GPU"):
|
||||
try:
|
||||
tf.config.experimental.set_memory_growth(g, True)
|
||||
except Exception:
|
||||
pass
|
||||
print(f"INFO:absl:GPUs: {tf.config.list_physical_devices('GPU')}")
|
||||
gc.collect()
|
||||
# ------------------------------------------------------------------
|
||||
# GPU failure markers that should trigger CPU fallback
|
||||
# (OOM + known GPU runtime/copy/init failures)
|
||||
# ------------------------------------------------------------------
|
||||
GPU_FALLBACK_MARKERS=(
|
||||
"resourceexhaustederror"
|
||||
"resource exhausted"
|
||||
"oom"
|
||||
"out of memory"
|
||||
"cuda_error_out_of_memory"
|
||||
"failed to allocate"
|
||||
"cudnn"
|
||||
"cublas"
|
||||
"internalerror: cuda"
|
||||
"failed call to cuinit"
|
||||
"dst tensor is not initialized"
|
||||
"failed copying input tensor"
|
||||
"_eagerconst"
|
||||
)
|
||||
|
||||
print()
|
||||
try:
|
||||
runpy.run_module("microwakeword.model_train_eval", run_name="__main__", alter_sys=True)
|
||||
except Exception as e:
|
||||
print(e, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
EOF
|
||||
run_attempt() {
|
||||
local label="$1"
|
||||
shift
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "===== ${label} ====="
|
||||
echo "================================================================================"
|
||||
echo "→ ${PYTHON_BIN:-python} ${TRAIN_ARGS[*]}"
|
||||
echo
|
||||
|
||||
# stream everything except validation minibatch spam
|
||||
"${PYTHON_BIN:-python}" "${TRAIN_ARGS[@]}" 2>&1 \
|
||||
| tr '\r' '\n' \
|
||||
| stdbuf -i0 -o0 sed -r -e "/^Validation Batch/d" \
|
||||
| tee "${TRAIN_LOG}" \
|
||||
| sed -r -e "/^Validation Batch/d" -e "s/^INFO:absl:/ /g"
|
||||
|
||||
return ${PIPESTATUS[0]}
|
||||
}
|
||||
|
||||
# ---- Common TF env (mirrors your notebook) ----
|
||||
export TF_CPP_MIN_LOG_LEVEL="${TF_CPP_MIN_LOG_LEVEL:-2}"
|
||||
export TF_XLA_FLAGS="${TF_XLA_FLAGS:---tf_xla_auto_jit=0}"
|
||||
export NVIDIA_TF32_OVERRIDE="${NVIDIA_TF32_OVERRIDE:-1}"
|
||||
export TF_FORCE_GPU_ALLOW_GROWTH="${TF_FORCE_GPU_ALLOW_GROWTH:-true}"
|
||||
export TF_GPU_ALLOCATOR="${TF_GPU_ALLOCATOR:-cuda_malloc_async}"
|
||||
|
||||
# Attempt 1: GPU
|
||||
if run_attempt "Attempt 1/2: GPU training (allow_growth + cuda_malloc_async)" ; then
|
||||
echo "✅ Training complete (GPU path)."
|
||||
else
|
||||
echo "⚠️ GPU attempt failed. Checking whether this looks like a GPU/OOM/runtime failure…"
|
||||
|
||||
# Check log for GPU/OOM/runtime markers
|
||||
log_lc="$(tr '[:upper:]' '[:lower:]' < "${TRAIN_LOG}" || true)"
|
||||
looks_like_gpu_fail="false"
|
||||
for m in "${GPU_FALLBACK_MARKERS[@]}"; do
|
||||
if echo "${log_lc}" | grep -qF "${m}"; then
|
||||
looks_like_gpu_fail="true"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${looks_like_gpu_fail}" = "true" ]; then
|
||||
echo "↪️ Detected GPU/OOM/runtime failure markers. Falling back to CPU."
|
||||
|
||||
# Attempt 2: CPU (hide GPU completely)
|
||||
export CUDA_VISIBLE_DEVICES=""
|
||||
unset TF_GPU_ALLOCATOR
|
||||
if run_attempt "Attempt 2/2: CPU fallback (CUDA_VISIBLE_DEVICES='')" ; then
|
||||
echo "✅ Training complete (CPU fallback)."
|
||||
else
|
||||
echo "❌ Training failed on BOTH GPU and CPU. See: ${TRAIN_LOG}" >&2
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "❌ Training failed (does not look GPU/OOM/runtime). See: ${TRAIN_LOG}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
source_path="${WORK_DIR}/trained_models/wakeword/tflite_stream_state_internal_quant/stream_state_internal_quant.tflite"
|
||||
|
||||
if [ ! -f "${source_path}" ] ; then
|
||||
echo "Output model not found! Training didn't complete successfully. See ${WORK_DIR}/training.log"
|
||||
echo "Output model not found! Training didn't complete successfully. See ${TRAIN_LOG}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cp "${WORK_DIR}/trained_models/wakeword/model_summary.txt" "${OUTPUT_DIR}/logs/"
|
||||
cp -a "${WORK_DIR}/trained_models/wakeword/logs/train" "${OUTPUT_DIR}/logs/"
|
||||
cp -a "${WORK_DIR}/trained_models/wakeword/logs/validation" "${OUTPUT_DIR}/logs/"
|
||||
cp "${WORK_DIR}/trained_models/wakeword/model_summary.txt" "${OUTPUT_DIR}/logs/" || :
|
||||
cp -a "${WORK_DIR}/trained_models/wakeword/logs/train" "${OUTPUT_DIR}/logs/" || :
|
||||
cp -a "${WORK_DIR}/trained_models/wakeword/logs/validation" "${OUTPUT_DIR}/logs/" || :
|
||||
|
||||
echo -e "\n Training complete!"
|
||||
echo " Full log: ${OUTPUT_DIR}/logs/training.log"
|
||||
echo " Full log: ${TRAIN_LOG}"
|
||||
|
||||
tflite_filename="${wake_word_filename}.tflite"
|
||||
tflite_path="${OUTPUT_DIR}/${tflite_filename}"
|
||||
|
||||
cp "${source_path}" "${tflite_path}"
|
||||
|
||||
# --- Write JSON metadata file with matching model name ---
|
||||
json_path="${OUTPUT_DIR}/${wake_word_filename}.json"
|
||||
cat <<-EOF > "${json_path}"
|
||||
{
|
||||
@@ -237,5 +285,4 @@ echo "Metadata: ${json_path}"
|
||||
echo
|
||||
END_TS=$EPOCHSECONDS
|
||||
print_elapsed_time "${START_TS}" "${END_TS}" "Training completed."
|
||||
echo
|
||||
|
||||
echo
|
||||
Reference in New Issue
Block a user