mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Train from the command line
The files in the `cli` directory allow you to train wake words from the command line without needing to use the Jupyter notebook or a web browser. Basically, the logic from the notebook has been placed in separate shell scripts and python files wrapped by 3 high-level scripts that do the following: * setup_python_venv: Creates a Python virtual environment with all the packages needed to train. The venv is created in the container's /data directory and is therefore stored on the host, not in the container's root docker volume. * setup_training_datasets: Downloads, extracts and converts the MIT RIR, FMA, Audioset and Negative training reference datasets. Also stored in /data. * train_wake_word: Generates the wake word samples, augments them with the audio from the training datasets, and finally runs the microwakeword training. The resulting model tflite and json files are placed in the /data/output directory. See the README.md file for much more information.
This commit is contained in:
85
cli/setup_negative_datasets
Executable file
85
cli/setup_negative_datasets
Executable file
@@ -0,0 +1,85 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
PROGPATH=$(realpath "$0")
|
||||
PROGDIR=$(dirname "${PROGPATH}")
|
||||
|
||||
source "${PROGDIR}/shell.functions"
|
||||
|
||||
if [ "${HELP}" == "true" ] ; then
|
||||
cat <<EOF >&2
|
||||
Usage: $0 [ --cleanup-archives ] [ --data-dir=<data_dir> ]
|
||||
|
||||
--cleanup-archives : Automatically clean up any downloaded archvies after
|
||||
extraction.
|
||||
<data_dir> : Path to the data directory.
|
||||
: Default: ${DATA_DIR}
|
||||
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "${DATA_DIR}/training_datasets/downloads" || :
|
||||
cd "${DATA_DIR}/training_datasets"
|
||||
|
||||
mkdir -p ./negative_datasets || :
|
||||
|
||||
NEGATIVE_DATASET_URL="https://huggingface.co/datasets/kahrendt/microwakeword/resolve/main"
|
||||
declare -a NEGATIVE_DATASETS=( dinner_party dinner_party_eval no_speech speech )
|
||||
AUDIO_FILECOUNT="./downloads/negative_filecount"
|
||||
|
||||
declare -A filecounts=( [dinner_party.zip]=0 [dinner_party_eval.zip]=0 [no_speech.zip]=0 [speech.zip]=0 )
|
||||
get_filecounts filecounts "${AUDIO_FILECOUNT}"
|
||||
|
||||
echo "===== Checking negative datasets: ${NEGATIVE_DATASETS[*]} ====="
|
||||
write_filecount=false
|
||||
|
||||
for ds in "${NEGATIVE_DATASETS[@]}" ; do
|
||||
AUDIO_ZIPFILE="${ds}.zip"
|
||||
AUDIO_ZIP="./downloads/${AUDIO_ZIPFILE}"
|
||||
AUDIO_DIR="./negative_datasets/${ds}"
|
||||
mkdir -p "${AUDIO_DIR}" || :
|
||||
|
||||
expected_filecount=${filecounts[${AUDIO_ZIPFILE}]}
|
||||
actual_filecount=$(find "${AUDIO_DIR}" -name '*.ninja' 2>/dev/null | wc -l) || :
|
||||
|
||||
if [ "${actual_filecount}" -ne 0 ] && [ "${actual_filecount}" -eq "${expected_filecount}" ] ; then
|
||||
echo " Existing ${ds} valid"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ ! -f "${AUDIO_ZIP}" ] ; then
|
||||
echo " Downloading ${AUDIO_ZIPFILE}"
|
||||
curl -sfL "${NEGATIVE_DATASET_URL}/${ds}.zip" -o "${AUDIO_ZIP}"
|
||||
fi
|
||||
|
||||
rm -rf "${AUDIO_DIR}" || :
|
||||
echo " Unzipping ${AUDIO_ZIPFILE}"
|
||||
unzip -q -d "./negative_datasets" "${AUDIO_ZIP}"
|
||||
actual_filecount=$(find "${AUDIO_DIR}" -name '*.ninja' 2>/dev/null | wc -l) || :
|
||||
filecounts[${AUDIO_ZIPFILE}]="${actual_filecount}"
|
||||
write_filecount=true
|
||||
|
||||
if "${CLEANUP_ARCHIVES}" && [ -f "${AUDIO_ZIP}" ] ; then
|
||||
echo " Cleaning up ${AUDIO_ZIPFILE}"
|
||||
rm -rf "${AUDIO_ZIP}"
|
||||
fi
|
||||
done
|
||||
|
||||
if ${write_filecount} ; then
|
||||
write_filecounts filecounts "${AUDIO_FILECOUNT}"
|
||||
fi
|
||||
|
||||
if "${CLEANUP_ARCHIVES}" ; then
|
||||
for ds in "${NEGATIVE_DATASETS[@]}" ; do
|
||||
AUDIO_ZIPFILE="${ds}.zip"
|
||||
AUDIO_ZIP="./downloads/${AUDIO_ZIPFILE}"
|
||||
if [ -f "${AUDIO_ZIP}" ] ; then
|
||||
echo " Cleaning up ${AUDIO_ZIPFILE}"
|
||||
rm -rf "${AUDIO_ZIP}"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
echo " Negative datasets complete"
|
||||
|
||||
Reference in New Issue
Block a user