cli + web recorder ui

This commit is contained in:
MasterPhooey
2026-01-17 01:23:51 -06:00
parent b700bf095c
commit 5bc0f12a7f
24 changed files with 2002 additions and 2033 deletions

BIN
cli/.DS_Store vendored Normal file

Binary file not shown.

View File

@@ -1,135 +0,0 @@
# ~/.bashrc: executed by bash(1) for non-login shells.
# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
# for examples
# If not running interactively, don't do anything
[ -z "$PS1" ] && return
# don't put duplicate lines in the history. See bash(1) for more options
# ... or force ignoredups and ignorespace
HISTCONTROL=ignoredups:ignorespace
# append to the history file, don't overwrite it
shopt -s histappend
# for setting history length see HISTSIZE and HISTFILESIZE in bash(1)
HISTSIZE=1000
HISTFILESIZE=2000
# check the window size after each command and, if necessary,
# update the values of LINES and COLUMNS.
shopt -s checkwinsize
# make less more friendly for non-text input files, see lesspipe(1)
[ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)"
# set variable identifying the chroot you work in (used in the prompt below)
if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
debian_chroot=$(cat /etc/debian_chroot)
fi
# set a fancy prompt (non-color, unless we know we "want" color)
case "$TERM" in
xterm-color) color_prompt=yes;;
esac
# uncomment for a colored prompt, if the terminal has the capability; turned
# off by default to not distract the user: the focus in a terminal window
# should be on the output of commands, not on the prompt
#force_color_prompt=yes
if [ -n "$force_color_prompt" ]; then
if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then
# We have color support; assume it's compliant with Ecma-48
# (ISO/IEC-6429). (Lack of such support is extremely rare, and such
# a case would tend to support setf rather than setaf.)
color_prompt=yes
else
color_prompt=
fi
fi
if [ "$color_prompt" = yes ]; then
PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
else
PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ '
fi
unset color_prompt force_color_prompt
# If this is an xterm set the title to user@host:dir
case "$TERM" in
xterm*|rxvt*)
PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1"
;;
*)
;;
esac
# enable color support of ls and also add handy aliases
if [ -x /usr/bin/dircolors ]; then
test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)"
alias ls='ls --color=auto'
#alias dir='dir --color=auto'
#alias vdir='vdir --color=auto'
alias grep='grep --color=auto'
alias fgrep='fgrep --color=auto'
alias egrep='egrep --color=auto'
fi
# some more ls aliases
alias ll='ls -alF'
alias la='ls -A'
alias l='ls -CF'
# Alias definitions.
# You may want to put all your additions into a separate file like
# ~/.bash_aliases, instead of adding them here directly.
# See /usr/share/doc/bash-doc/examples in the bash-doc package.
if [ -f ~/.bash_aliases ]; then
. ~/.bash_aliases
fi
# enable programmable completion features (you don't need to enable
# this, if it's already enabled in /etc/bash.bashrc and /etc/profile
# sources /etc/bash.bashrc).
#if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
# . /etc/bash_completion
#fi
if [ -f /data/.bashrc ]; then
. /data/.bashrc
fi
if ! mountpoint -q /data ; then
cat <<-EOF >&2
=======================================================
WARNING: The /data directory is NOT mounted.
Running the training process without /data mounted
could add over 140Gb of python packages and training
files to this container's storage which is probably
NOT what you want.
You should remove this container and re-create it with
a 'docker run' option like '-v <host_work_dir>:/data'
making sure the host directory is on a device that has
enough free space.
=======================================================
EOF
fi
if [ -d /data/.venv ]; then
. /data/.venv/bin/activate
else
cat <<-EOF >&2
=======================================================
WARNING: A python virtual environment wasn't found
at /data/.venv. You'll need to run 'setup_python_venv'
before you'll be able to use this container for
training.
=======================================================
EOF
fi
alias venv='[ -d /data/.venv ] && source /data/.venv/bin/activate || echo "/data/.venv does not exist yet"'

View File

@@ -1,27 +0,0 @@
# Since this is a pure python environment, we don't need to start
# with a huge CUDA image. A standard Ubuntu image will do.
FROM ubuntu:24.04
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_ROOT_USER_ACTION=ignore \
HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
PATH="/root/mww-scripts:${PATH}"
# System deps
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.12 python3.12-venv python3.12-dev python3-pip python-is-python3 \
git wget curl unzip ca-certificates nano less \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p /data
COPY --chown=root:root --chmod=0755 .bashrc /root/
COPY --chown=root:root --chmod=0755 setup_* wake_word_sample* train_wake_word \
test_python cudainfo system_summary shell.functions requirements.txt /root/mww-scripts/
# Docker and Podman send the CMD a SIGTERM when you "stop" the container. Unfortunately, bash
# normally doesn't exit when it recieves a SIGTERM so docker/podman has to wait for the "stop"
# to timeout then SIGKILL the container.
# This little scriptlet causes bash to exit immediately when it receives the SIGTERM.
CMD ["/usr/bin/bash", "-c", "exec /usr/bin/bash --rcfile <(echo '[ -f ~/.bashrc ] && source ~/.bashrc ; trap exit SIGTERM ;')" ]

View File

@@ -1,507 +0,0 @@
# Run training from the command line
## Overview
With these scripts and Dockerfile, you can train new wake words from the
command line without using a Jupyter notebook.
Differences between this Docker image and the Jupyter notebook image:
* The Python training environment isn't included in the image. Instead, a
"virtual environment" (venv) is created in the `/data` directory which you
will have mounted to a host directory. This cuts about 7gb from the image
and allows the virtualenv to persist across container instances.
* The logic from the Jupyter notebook is contained in individual Python
and shell scripts
* No ports need to be exposed since the Jupyter notebook server isn't being
run.
## TL;DR
For the impatient among you...
```shell
$ mkdir /some/work/directory # On a device with more than 150GB free space
$ docker build -t microwakeword-cli:latest .
$ docker run -it --rm --gpus=all -v /some/work/directory:/data --name=mww-cli microwakeword-cli:latest
root@mww-cli:/# cd /data
root@mww-cli:/data# setup_python_venv
##### You have about 4 minutes to drink coffee
root@mww-cli:/data# setup_training_datasets --cleanup-archives --cleanup-intermediate-files
##### You have about 25 minutes for a quick lunch (on a 1gb/sec internet connection)
root@mww-cli:/data# train_wake_word --cleanup-work-dir "wake_word" "Wake Word"
##### You have about 30-45 minutes for a nap depending on available system resources.
##### You'll be informed of where to find your trained model.
```
Load the trained model on your device and give it a try but don't be surprized
if you get a lot of missed or false activations. Read on to find out why.
## Get Started
Good, you stuck around! Now read the rest of the document before doing
anything.
### Using a GPU
Having an Nvidia GPU available can cut the training time by up to half. The
open-source nouveau driver shipped with Linux kernels doesn't support CUDA
however so if you have an Nvidia GPU and want to use it for training, you'll
need to install the official Nvidia driver from
https://www.nvidia.com/en-in/drivers/unix/
### Build the image
You can use either Docker or Podman as your container management tool.
`docker` is used in the examples but if you have podman, just substitute
the command.
Start by navigating to the directory that contains this README file and
the accompanying Dockerfile. Then...
```shell
docker build -t microwakeword-cli:latest .
```
This should be fairly quick and result in an image that's about 320mb in size
as it's basically a standard Ubunbtu24.04 image with a few added tools.
So why isn't a pre-built image available for download? Because it'll probably
take longer to download a pre-built image than for you to create it locally.
GitHub's container registry is notoriously erratic when it comes to download
throughput.
### Create a host work directory
This directory will contain the Python virtual environment plus all of the
downloaded and generated data needed for training and the final trained
models. A full environment will need about 150gb of free space but read
further to see how to reduce this.
Your `<host_data_dir>` will be mounted inside the container as `/data`.
The training container will start a Bash shell so if you have Bash
aliases or Bashy things you like, create a `.bashrc` file in your
`<host_data_dir>` and put them in there. It'll automatically be included
any time you enter the container.
### Create and start the container
There are lots of options that control container creation. The simplest example
will create the container and give you an interactive shell. When you exit the
shell, the container will be stopped and removed leaving your `<host_data_dir>`
intact.
```shell
$ docker run -it --rm --gpus=all -v <host_work_directory>:/data microwakeword-cli:latest
```
Options:
* Remove the `--gpus=all` option if you don't have an Nvidia GPU or don't want to use it.
* Remove the `--rm` and add a `--name=mww-cli` option to keep the container
around and give it a name for training more than one wake word. You
can stop and remove it when you're ready.
* Add a `-d` option to start the container in the background and use `docker
attach mww-cli` or `docker exec -it mww-cli /bin/bash` to connect to it.
When the container starts, you'll see:
```text
=======================================================
WARNING: A python virtual environment wasn't found
at /data/.venv. You'll need to run setup_python_venv
before you'll be able to use this container for
training.
=======================================================
root@mww-cli:/#
```
Don't worry about the python WARNING right now. You'll be creating the
virtualenv in the next step.
If you've forgotton to create and/or mount your host data directory, you'll
see an additional warning:
```text
=======================================================
WARNING: The /data directory is NOT mounted.
Running the training process without /data mounted
could add over 140Gb of python packages and training
files to this container's storage which is probably
NOT what you want.
You should remove this container and re-create it with
a 'docker run' option like '-v <host_work_dir>:/data'
making sure the host directory is on a device that has
enough free space.
=======================================================
```
You can certainly continue but it's a "really bad idea"™ because your
container storage could grow from a few hundred mb to over 140gb.
At this point, you're in a Bash shell.
### Create the Python virtual environment
The Python virtual environment will contain all the software needed to train.
It gets created as `/data/.venv` and will take up about 11gb of disk space.
The scripts that do all the work will be in the container's PATH so to setup
the virtual environment and install all of the packages, just run:
```text
setup_python_venv [ --verbose ]
Options:
--verbose: Print the detailed "pip install" output.
```
When the installation is finished, a test of the major components will be
run.
Once the process is done, you should change to the `/data` directory and
activate the virtual environment with:
```shell
root@mww-cli:/# cd /data
root@mww-cli:/data# source .venv/bin/activate
(.venv) root@mww-cli:/data#
```
Technically, you don't need to do either of these since the scripts
are in the PATH and they know to use the `/data` directory for everything.
It's more of an "if you're interested" thing.
At this point, you have a container with all software installed.
## Get the reference data
The training process itself relies on a significant amount of audio reference
data that creates a simulated "audio environment" that your wake word will be
trained in. These "training datasets" include things like varying amounts of
reverberation, background music, background conversations, background noise,
etc. All said and done, it amounts to about 30gb of audio but with the
downloaded archives and extracted intermediate files, you'll need about 85gb
of free space. Thankfully, you only need to download the files once no
matter how many wake words you want to train and since it's stored in
`/data`, you can even remove the docker container and recreate it without
losing any of it. There are 4 datasets that are required.
This is a three stage process...
1. Download zipfiles or tarballs. (about 30gb)
2. Extract them. (about 50gb)
3. Convert them into the final form. (about 31gb)
NOTE: The sizes add up to more than the 85gb stated earlier because one
of the datasets doesn't need to be covnerted and is counted in both
steps 2 and 3. You really do only need 85gb.
To download the archives, unpack them, and convert the audio to what's needed
by the training process, run:
```text
setup_training_datasets [ --cleanup-archives ] [ --cleanup-intermediate-files ]
Options:
--cleanup-archives: Automatically delete the tarballs or zipfiles after
they've been extracted.
--cleanup-intermediate-files: Automatically delete the intermediate files
after they've been converted.
```
On a 1gb/sec Internet connection, this will take about 25 minutes.
The script detects if the datasets have already been downloaded, extracted
and/or converted and skips those steps as appropriate so if you've run the
script without the cleanup options, you can just run it again with those
options to clean them up.
Now you're ready to train a wake word. Almost.
## Train a Wake Word
Training is done in 3 stages.
1. Generate thousands of samples of the wake word with various voices,
pitches, speeds, inflections, etc.
2. Augment the samples with the training datasets to add background noise, etc.
3. Run the Tensorflow training.
### Generate a sample for verification
Before you start the full process, you're going to want to generate a single
wake word sample and play it back to ensure it sounds right. The wake word
should be spelled phonetically to give the sample generator the best chance
of success.
```text
root@mww-cli:/# wake_word_sample_generator --samples=1 "hey buster"
===== Generating 1 sample of 'hey buster' =====
Loading /data/tools/piper-sample-generator/models/en_US-libritts_r-medium.pt
Successfully loaded the model
Batch 1/0 complete
Done
Sample available at /data/work/test_sample/hey_buster.wav
Play it from your host.
```
You should then play that file from your host. The reason I used "hey buster"
as the wake word is to demonstrate why it's important to generate and listen
to a sample. If you try that exact input and play it back, you'll notice
that the generator didn't capture the "er" at the end very well. To get it to
do so, I had to add a period on the end as a "spacer".
"hey buster." worked much better.
When you're happy with the sample, you can run the full process.
### Run the full training process
```text
train_wake_word [ --samples=<samples> ] [ --batch-size=<batch_size> ]
[ --training-steps=<steps> ] [ --cleanup-work-dir ]
<wake_word> [ <wake_word_title> ]
Options:
--samples: The number of samples to generate for the wake word.
Default: 20000
--batch-size: How many samples should be generated at a time. The more
samples, the more memory is needed.
Default: 100
--training-steps: Number of training steps. More training steps means better
detection and false positive rates but also more time to train.
Default: 25000
--cleanup-work-dir: Delete the /data/work directory after successful training.
Default: false
<wake_word> The word to train spelled phonetically.
Required.
<wake_word_title> An optional pretty name to save to the json metadata file.
Default: The wake word with individual words capitalized
and punctuation removed.
```
By default, the training process creates 20,000 samples of your wake word and
runs 25,000 training steps. See [Tensorboard Results](#tensorboard-results)
in the [Extra Credit](#extra-credit) section below for
why these are the defaults. Depending on resources available, this could take
between 30 and 60 minutes.
The resulting tflite model files and logs will be placed in the
`/data/output/<timestamp>-<wake_word>-<samples>-<training-steps>` directory
and will therefore be available from your host in the directory you mapped
`/data` to. File names will have non-filename-friendly characters in your
wake word changed to underscores to make things easier. You'll need both the
tflite and json files to load on your device. Exactly how you load them
depends on the device and is beyond the scope of this project.
The only real measure of success is how well the resulting model works
on a real device. If you encounter too many missed or false activations,
increasing the number of samples would probably improve the results more
than increasing the number of training steps. See
[Tensorboard Results](#tensorboard-results) in the [Extra Credit](#extra-credit) section below.
The output from the last step is filtered some by the script but still quite
verbose. The full log will be available in the output directory as
`training.log` if you're interested. Intepreting the log is beyond the scope
of this project however.
You can train additional wake words or change the number of samples and
training steps by simply running `train_wake_word` again. No need to repeat
any of the earlier setup steps. If you change the wake word or the number of
wake word samples, the work directory will be deleted and all 3 steps re-run.
If you only change the number of training steps, the data from the first two
steps is still valid and only the 3rd step is run.
All of the intermediate data is stored in the `/data/work` directory which will
grow to about 17gb with 20,000 wake word samples. Once the tflite model is
successfully generated and you're happy with the results, you can delete the
`/data/work` directory.
### Training more than one wake word
Once you have a container running, you
can easily train multiple wake words from your host:
```shell
for wp in "hey_alexa" "hey_jenkins" ; do
docker exec -it mww-cli train_wake_word --cleanup-work-dir "$wp"
done
```
### Training time examples
Training times depend on lots of things. These are examples only.
Your Mileage May Vary!!!
```text
===============================================================================
Training Summary
CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores) Memory: 64195 mb
GPU: N/A
Generate 10000 samples, 100/batch Elapsed time: 0:06:17
Augment 10000 samples Elapsed time: 0:04:05
10000 training steps Elapsed time: 0:15:04
==================================================
Total Elapsed time: 0:25:26
================================================================================
================================================================================
Training Summary
CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores) Memory: 64195 mb
GPU: NVIDIA GeForce RTX 3060 (3584 cores) Memory: 11909 mb
Generate 10000 samples, 100/batch Elapsed time: 0:00:29
Augment 10000 samples Elapsed time: 0:03:40
10000 training steps Elapsed time: 0:08:00
======================================================
Total Elapsed time: 0:12:09
================================================================================
================================================================================
Training Summary
CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores) Memory: 64195 mb
GPU: N/A
Generate 20000 samples, 100/batch Elapsed time: 0:10:38
Augment 20000 samples Elapsed time: 0:07:04
25000 training steps Elapsed time: 0:25:21
======================================================
Total Elapsed time: 0:43:03
================================================================================
================================================================================
Training Summary
CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores) Memory: 64195 mb
GPU: NVIDIA GeForce RTX 3060 (3584 cores) Memory: 11909 mb
Generate 20000 samples, 100/batch Elapsed time: 0:00:53
Augment 20000 samples Elapsed time: 0:07:05
25000 training steps Elapsed time: 0:19:13
======================================================
Total Elapsed time: 0:27:11
================================================================================
================================================================================
Training Summary
CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores) Memory: 64195 mb
GPU: N/A
Generate 50000 samples, 100/batch Elapsed time: 0:30:47
Augment 50000 samples Elapsed time: 0:20:22
40000 training steps Elapsed time: 1:01:51
==================================================
Total Elapsed time: 1:53:00
================================================================================
================================================================================
Training Summary
CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores) Memory: 64195 mb
GPU: NVIDIA GeForce RTX 3060 (3584 cores) Memory: 11909 mb
Generate 50000 samples, 100/batch Elapsed time: 0:02:08
Augment 50000 samples Elapsed time: 0:19:13
40000 training steps Elapsed time: 0:42:23
======================================================
Total Elapsed time: 1:03:44
================================================================================
```
The sample generation process is really the only one that uses multiple CPUs so
having fewer CPU threads available will probably make little difference.
## Extra Credit
### Training defaults
If you plan on training multiple wake words, you can set your own default
training parameters by creating a `/data/.defaults.env` file with the
following contents:
```shell
# Variable names follow the command line parameters converted to upper case
# and with the dashes ('-') converted to underscores ('_').
export SAMPLES=10000
export TRAINING_STEPS=10000
# Don't use the GPU for any operations. Stick with the CPU only.
##export CUDA_VISIBLE_DEVICES=-1
```
### Examine your model with Tensorboard
Tensorboard is a web-based graphical model viewer. You can use it to get an
idea of how many training steps are needed before accuracy results stop
improving. To use it, you'll have to expose port 6006 by adding `-p
6006:6006` to your `docker run` command line. If you didn't, don't worry.
Remember, the /data directory is mapped to a directory on your host so you
can simply stop and delete the current container and recreate it with the new
`docker run` command. No need to re-run any of the setup or training steps.
To start Tensorboard, run:
```shell
root@mww-cli:/# cd /data
root@mww-cli:/data# source .venv/bin/activate
(.venv) root@mww-cli:/data# tensorboard --bind_all --logdir ./output
```
Now on your host, point your browser at `http://localhost:6006/`,
click "SCALARS" at the top and take a look at the various charts. You'll see
a "train" and "validation" item for each training run you've performed. It's
the "train" items you're interested in.
<a id="tensorboard-results"></a>
You have to be a Tensorflow expert to decipher most of the charts but
the "Accuracy" chart for this particular wake word and 50,000 samples would
seem to idicate that there's very little improvement after about 20,000
training steps.
![Accuracy Chart, 50000 samples](tensorboard1.png)
In contrast, with only 5,000 wake word samples, there's still improvement to be had after
20,000 training steps.
![Accuracy Chart, 5000 samples](tensorboard2.png)
Given that it's faster to generate wake word samples than it is to train,
20,000 samples and 25,000 training steps seems like a good compromise. This
chart has a bit less smoothing to show a bit more detail and includes the
50,000 sample run as well. This run took only 27 minutes as opposed to the
63 minutes it took for the 50,000 sample run. Now you know why 20,000 and
25,000 are the defaults for these scripts.
![Accuracy Chart, 25000 samples](tensorboard3.png)

View File

@@ -1,10 +0,0 @@
# --- Packages needed by our scripts ---
numpy==1.26.4
scipy==1.12.0
librosa==0.10.2.post1
soundfile==0.12.1
tqdm==4.67.1
scikit-learn==1.6.0
numba==0.63.1
PyYAML==6.0.3

View File

@@ -1,5 +1,6 @@
#!/bin/bash
PROGDIR="$(dirname $(realpath $0))"
PROGDIR="$(dirname "$(realpath "$0")")"
ROOTDIR="$(dirname "${PROGDIR}")"
KNOWN_ARGS=( data-dir python gpu no-gpu )
source "${PROGDIR}/shell.functions"
@@ -27,7 +28,7 @@ EOF
exit 1
fi
[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath ${DATA_DIR})"
[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
[ -d "${DATA_DIR}" ] || {
echo "Data directory '${DATA_DIR}' doesn't exist." >&2
exit 1
@@ -52,7 +53,8 @@ if [ -n "${PYTHON}" ] ; then
PYTHONS=( "${PYTHON}" )
unset PYTHON
else
PYTHONS=( python3.12 python3.10 )
# Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04)
PYTHONS=( python3.12 python3.11 python3.10 )
fi
for p in "${PYTHONS[@]}" ; do
@@ -60,14 +62,14 @@ for p in "${PYTHONS[@]}" ; do
done
[ -n "${PYTHON}" ] || {
echo "A python 3.12 or 3.10 interpreter wasn't found. You 'll need to install one before proceeding." >&2
echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2
exit 1
}
if [ -d "${VENV}" ] ; then
if [ -d "${VENV}" ] ; then
if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then
source "${VENV}/bin/activate" || {
echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
exit 1
}
else
@@ -82,24 +84,28 @@ if [ -z "$VIRTUAL_ENV" ] ; then
else
echo " ===== Updating virtualenv at '${VENV}' ====="
fi
${PYTHON} -m venv --upgrade-deps "${VENV}"
source "${VENV}/bin/activate"
set -euo pipefail
declare -a progfiles=( $(find ${PROGDIR} -mindepth 1 -maxdepth 1 -executable -type f) )
# Symlink CLI scripts into .venv/bin
declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) )
progfiles+=( "${PROGDIR}/shell.functions" )
# Also symlink the top-level entrypoint if present
[ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" )
for f in "${progfiles[@]}" ; do
ln -sfr "${f}" ".venv/bin/$(basename ${f})"
ln -sfr "${f}" ".venv/bin/$(basename "${f}")"
done
#
# Pip doesn't process packages from requirements.txt in
# order but order is important because tensorflow, torch,
# onnxruntime and micro-wake-word all depend on CUDA packages
# at various versions. They need to be installed in this specific
# order or they may not be able to use the GPU.
# Pip doesn't process packages from requirements.txt in order but order is
# important because tensorflow, torch, onnxruntime and micro-wake-word all
# depend on CUDA packages at various versions. They need to be installed in
# this specific order or they may not be able to use the GPU.
#
export PIP_PROGRESS_BAR=off
export PIP_NO_COLOR=1
@@ -117,7 +123,8 @@ pip_install() {
START_TS=$EPOCHSECONDS
echo " ===== Installing common requirements ====="
pip_install -r "${PROGDIR}/requirements.txt"
# requirements.txt lives in repo root now
pip_install -r "${ROOTDIR}/requirements.txt"
${GPU} && tfgpu='[and-cuda]' || tfgpu=""
echo " ===== Installing Tensorflow${tfgpu} ====="
@@ -140,7 +147,7 @@ pip_install -e "${MWW}"
echo " ===== Checking piper-sample-generator ====="
PSG="${DATA_DIR}/tools/piper-sample-generator"
if [ ! -d "${PSG}" ] || [ -n "$(git -C ${PSG} status --porcelain)" ] ; then
if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then
rm -rf "${PSG}" || :
echo " Cloning piper-sample-generator to ${DATA_DIR}/tools"
git clone https://github.com/rhasspy/piper-sample-generator "${PSG}" &>/dev/null
@@ -171,13 +178,11 @@ echo " ===== Installing keras ====="
# keras 3.13 has "issues" so we need to back down to 3.12.
pip_install "keras==3.12.0"
${PROGDIR}/test_python --data-dir="${DATA_DIR}"
"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"
touch .mww-data-dir
END_TS=$EPOCHSECONDS
echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"

View File

@@ -1,8 +1,9 @@
#!/bin/bash
set -euo pipefail
PROGPATH=$(realpath "$0")
PROGDIR=$(dirname "${PROGPATH}")
PROGPATH="$(realpath "$0")"
PROGDIR="$(dirname "${PROGPATH}")"
ROOTDIR="$(dirname "${PROGDIR}")" # repo root (train_wake_word, requirements.txt, etc.)
KNOWN_ARGS=( data-dir cleanup-archives cleanup-intermediate-files )
source "${PROGDIR}/shell.functions"
@@ -27,22 +28,38 @@ EOF
exit 1
fi
# Normalize + validate DATA_DIR (shell.functions typically sets a default,
# but this makes the script standalone-safe)
[ -n "${DATA_DIR:-}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
[ -d "${DATA_DIR}" ] || {
echo "Data directory '${DATA_DIR}' doesn't exist." >&2
exit 1
}
cd "${DATA_DIR}"
START_TS=$EPOCHSECONDS
echo -e "\n===== Setting up Training Datasets =====\n"
${PROGDIR}/setup_negative_datasets --cleanup-archives=${CLEANUP_ARCHIVES} \
--cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
"${PROGDIR}/setup_negative_datasets" \
--cleanup-archives="${CLEANUP_ARCHIVES}" \
--cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
--data-dir="${DATA_DIR}"
${PROGDIR}/setup_mit_audio --cleanup-archives=${CLEANUP_ARCHIVES} \
--cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
"${PROGDIR}/setup_mit_audio" \
--cleanup-archives="${CLEANUP_ARCHIVES}" \
--cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
--data-dir="${DATA_DIR}"
${PROGDIR}/setup_audioset --cleanup-archives=${CLEANUP_ARCHIVES} \
--cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
"${PROGDIR}/setup_audioset" \
--cleanup-archives="${CLEANUP_ARCHIVES}" \
--cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
--data-dir="${DATA_DIR}"
${PROGDIR}/setup_fma --cleanup-archives=${CLEANUP_ARCHIVES} \
--cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
"${PROGDIR}/setup_fma" \
--cleanup-archives="${CLEANUP_ARCHIVES}" \
--cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
--data-dir="${DATA_DIR}"
END_TS=$(date +%s.%N)
END_TS=$EPOCHSECONDS
print_elapsed_time "${START_TS}" "${END_TS}" "Training dataset setup"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

View File

@@ -1,125 +0,0 @@
#!/bin/bash
set -e
PROGPATH=$(realpath "$0")
PROGDIR=$(dirname "${PROGPATH}")
KNOWN_ARGS=( samples batch-size training-steps data-dir cleanup-work-dir )
source "${PROGDIR}/shell.functions"
WAKE_WORD=${POSITIONAL_ARGS[0]}
if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then
echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2
HELP=true
fi
if [ "${HELP}" == "true" ] || [ -z "${WAKE_WORD}" ] ; then
cat <<EOF >&2
Usage: train_wake_word [ --samples=<samples> ] [ --batch-size=<batch_size> ]
[ --training-steps=<steps> ] [ --cleanup-work-dir ]
<wake_word> [ <wake_word_title> ]
Options:
--samples: The number of samples to generate for the wake word.
Default: ${DEFAULT_SAMPLES}
--batch-size: How many samples should be generated at a time. The more
samples per batch, the more memory is needed.
Default: ${DEFAULT_BATCH_SIZE}
--training-steps: Number of training steps. More training steps means better
detection and false positive rates but also more time to train.
Default: ${DEFAULT_TRAINING_STEPS}
--cleanup-work-dir: Delete the /data/work directory after successful training.
Default: false
<wake_word> The word to train spelled phonetically.
Required.
<wake_word_title> An optional pretty name to save to the json metadata file.
Default: The wake word with individual words capitalized
and punctuation removed.
EOF
exit 1
fi
# shellcheck source=/dev/null
source "${DATA_DIR}/.venv/bin/activate"
cd "${DATA_DIR}"
mkdir -p "${DATA_DIR}/work" || :
[ ${#POSITIONAL_ARGS} -eq 2 ] && WAKE_WORD_TITLE="${POSITIONAL_ARGS[1]}" || :
if [ ! -v WAKE_WORD_TITLE ] ; then
declare -a WWNA=( ${WAKE_WORD//[^a-zA-Z0-9]/ } )
WAKE_WORD_TITLE="${WWNA[*]^}"
elif [ -z "$WAKE_WORD_TITLE" ] ; then
WAKE_WORD_TITLE="$WAKE_WORD"
fi
printf "%-80s\n" "=" | tr ' ' "="
echo "===== Running '${WAKE_WORD}(${WAKE_WORD_TITLE})' generation, augmentation and training ====="
"${PROGDIR}/cudainfo"
echo
START_TS=$EPOCHSECONDS
export TF_CPP_MIN_LOG_LEVEL=9
export TF_FORCE_GPU_ALLOW_GROWTH=true
export TF_GPU_ALLOCATOR=cuda_malloc_async
export TF_XLA_FLAGS="--tf_xla_auto_jit=0"
export NVIDIA_TF32_OVERRIDE=1
export TF_CUDNN_WORKSPACE_LIMIT_IN_MB=512
export GLOG_minloglevel=2
export GRPC_VERBOSITY=ERROR
"${PROGDIR}/wake_word_sample_generator" \
--samples=${SAMPLES} \
--batch-size=${BATCH_SIZE} \
--data-dir="${DATA_DIR}" "${WAKE_WORD}"
POST_GEN_TS=$EPOCHSECONDS
ww="${WAKE_WORD// /_}"
ww="${ww//./}"
AUGMENT=false
GENERATED_DIR="${DATA_DIR}/work/wake_word_samples"
AUGMENTED_DIR="${DATA_DIR}/work/wake_word_samples_augmented"
[ -d "${AUGMENTED_DIR}" ] || AUGMENT=true
[ "${GENERATED_DIR}/0.wav" -nt "${AUGMENTED_DIR}/testing/wakeword_mmap/data.ninja" ] && AUGMENT=true || :
if ${AUGMENT} ; then
rm -rf "${AUGMENTED_DIR}" || :
mkdir -p "${AUGMENTED_DIR}" || :
"${PROGDIR}/wake_word_sample_augmenter" --data-dir="${DATA_DIR}" || { rm -rf "${AUGMENTED_DIR}" ; exit 1 ; }
else
echo "Augmentation not required"
echo
fi
POST_AUGMENT_TS=$EPOCHSECONDS
"${PROGDIR}/wake_word_sample_trainer" --samples=${SAMPLES} --training-steps=${TRAINING_STEPS} --data-dir="${DATA_DIR}" \
"${WAKE_WORD}" "${WAKE_WORD_TITLE}"
if ${CLEANUP_WORK_DIR} ; then
rm -rf "${DATA_DIR}/work/trained_models" "${DATA_DIR}/work/wake_word_samples" \
"${DATA_DIR}/work/wake_word_samples_augmented" "${DATA_DIR}/work/last_wake_word" || :
fi
END_TS=$EPOCHSECONDS
python -c $'print(f"{\'=\' * 80}")'
printf "%44s\n\n" "Training Summary"
"${PROGDIR}/system_summary"
echo
print_elapsed_time --no-separators "${START_TS}" "${POST_GEN_TS}" "Generate ${SAMPLES} samples, ${BATCH_SIZE}/batch"
print_elapsed_time --no-separators "${POST_GEN_TS}" "${POST_AUGMENT_TS}" "Augment ${SAMPLES} samples"
print_elapsed_time --no-separators "${POST_AUGMENT_TS}" "${END_TS}" "${TRAINING_STEPS} training steps"
python -c $'msg="="*54 ; print(f"{msg:>80s}")'
print_elapsed_time --no-separators "${START_TS}" "${END_TS}" "Total"
python -c $'print(f"{\'=\' * 80}")'

0
cli/wake_word_sample_augmenter Executable file → Normal file
View File

0
cli/wake_word_sample_trainer Executable file → Normal file
View File