Files
microWakeWord-Trainer-Nvidi…/cli/setup_mit_audio
George Joseph cb81f7f02d Train from the command line
The files in the `cli` directory allow you to train wake words
from the command line without needing to use the Jupyter notebook
or a web browser.  Basically, the logic from the notebook has been
placed in separate shell scripts and python files wrapped by 3 high-level
scripts that do the following:

* setup_python_venv: Creates a Python virtual environment with all the
packages needed to train.  The venv is created in the container's /data
directory and is therefore stored on the host, not in the container's root
docker volume.

* setup_training_datasets: Downloads, extracts and converts the MIT RIR,
FMA, Audioset and Negative training reference datasets.  Also stored in /data.

* train_wake_word: Generates the wake word samples, augments them with the
audio from the training datasets, and finally runs the microwakeword training.
The resulting model tflite and json files are placed in the /data/output
directory.

See the README.md file for much more information.
2025-12-28 12:48:51 -07:00

125 lines
3.7 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
PROGPATH=$(realpath "$0")
PROGDIR=$(dirname "${PROGPATH}")
source "${PROGDIR}/shell.functions"
if [ "${HELP}" == "true" ] ; then
cat <<EOF >&2
Usage: $0 [ --cleanup-archives ] [ --cleanup-input-files ] [ --data-dir=<data_dir> ]
--cleanup-archives : Automatically clean up any downloaded archvies after
extraction.
--cleanup-intermediate-files
: Automatically clean up the intermediate files after they've
: converted to 16k.
<data_dir> : Path to the data directory.
: Default: ${DATA_DIR}
EOF
exit 1
fi
mkdir -p "${DATA_DIR}/training_datasets/downloads" || :
cd "${DATA_DIR}/training_datasets"
AUDIO_URL="https://mcdermottlab.mit.edu/Reverb/IRMAudio/Audio.zip"
AUDIO_ZIPFILE="MIT_RIR_Audio.zip"
AUDIO_ZIP="./downloads/${AUDIO_ZIPFILE}"
AUDIO_DIR="./mit_rirs"
mkdir -p "${AUDIO_DIR}" || :
AUDIO16K_DIR="./mit_rirs_16k"
mkdir -p "${AUDIO16K_DIR}" || :
AUDIO_FILECOUNT="./downloads/mit_rir_filecount"
AUDIO_IN_GLOB="*.wav"
declare -A filecounts=( [${AUDIO_ZIPFILE}]=0 )
get_filecounts filecounts "${AUDIO_FILECOUNT}"
echo "===== Checking MIT_RIR ====="
converter() {
source ${DATA_DIR}/.venv/bin/activate
python - "${AUDIO_DIR}" "${AUDIO16K_DIR}" <<-EOF
import os, sys, subprocess, scipy.io.wavfile, numpy as np
from pathlib import Path
import soundfile as sf
import librosa
from tqdm import tqdm
def write_wav(dst: Path, data: np.ndarray, sr: int):
x = np.clip(data, -1.0, 1.0)
scipy.io.wavfile.write(dst, sr, (x * 32767).astype(np.int16))
rir_in = Path(sys.argv[1])
rir_out = Path(sys.argv[2])
waves = list(rir_in.rglob("*.wav"))
try:
print(" MIT RIR normalizing to 16k…")
# Normalize to 16k mono
for p in tqdm(waves, desc=" MIT_RIR (resample 16k mono)"):
outfile = Path(rir_out / p.name)
if outfile.exists():
continue
a, sr = sf.read(p, always_2d=False)
if a.ndim > 1:
a = a[:, 0]
if sr != 16000:
a, _ = librosa.load(p, sr=16000, mono=True)
write_wav(outfile, a, 16000)
print(" MIT RIR normalization complete")
except Exception as e2:
print(f" MIT RIR fallback failed: {e2}")
raise
EOF
}
expected_filecount=${filecounts[${AUDIO_ZIPFILE}]}
actual_filecount=$(find "${AUDIO16K_DIR}" -name '*.wav' 2>/dev/null | wc -l) || :
write_filecount=false
if [ "${actual_filecount}" -ne 0 ] && [ "${actual_filecount}" -eq "${expected_filecount}" ] ; then
echo " Existing ${AUDIO16K_DIR} valid"
else
actual_filecount=$(find "${AUDIO_DIR}" -name "${AUDIO_IN_GLOB}" 2>/dev/null | wc -l) || :
if [ "${actual_filecount}" -eq 0 ] || [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then
if [ ! -f "${AUDIO_ZIP}" ] ; then
echo " Downloading ${AUDIO_ZIPFILE}"
curl -sfL "${AUDIO_URL}" -o "${AUDIO_ZIP}"
fi
rm -rf "${AUDIO_DIR}" || :
echo " Unzipping ${AUDIO_ZIPFILE}"
unzip -u -q -d "${AUDIO_DIR}" "${AUDIO_ZIP}"
fi
if "${CLEANUP_ARCHIVES}" && [ -f "${AUDIO_ZIP}" ] ; then
echo " Cleaning up ${AUDIO_ZIPFILE}"
rm -rf "${AUDIO_ZIP}"
fi
converter
actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || :
filecounts[${AUDIO_ZIPFILE}]="${actual_filecount}"
write_filecount=true
fi
if ${write_filecount} ; then
write_filecounts filecounts "${AUDIO_FILECOUNT}"
fi
if "${CLEANUP_ARCHIVES}" && [ -f "${AUDIO_ZIP}" ] ; then
echo " Cleaning up ${AUDIO_ZIPFILE}"
rm -rf "${AUDIO_ZIP}"
fi
if "${CLEANUP_INTERMEDIATE_FILES}" && [ -d "${AUDIO_DIR}" ]; then
echo " Cleaning up ${AUDIO_DIR}"
rm -rf "${AUDIO_DIR}"
fi
echo " MIT_RIR complete"
exit 0