cli + web recorder ui

2026-06-12 20:10:19 -06:00 · 2026-01-17 01:23:51 -06:00
parent b700bf095c
commit 5bc0f12a7f
24 changed files with 2002 additions and 2033 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/cli/.bashrc
+++ b/cli/.bashrc
--- a/201
+++ b/201
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/README.md
+++ b/README.md
@@ -1,123 +1,507 @@
-<div align="center">
-  <img src="https://raw.githubusercontent.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker/refs/heads/main/mmw.png" alt="MicroWakeWord Trainer Logo" width="100" />
-  <h1>microWakeWord Trainer Docker</h1>
-</div>
+# Run training from the command line

-# 🥔 MicroWakeWord Trainer – Tater Approved  
+## Overview

-**✅ Tater Totterson tested & working on an NVIDIA RTX 3070 Laptop GPU (8 GB VRAM).**  
-Easily train microWakeWord detection models with this pre-built Docker image and JupyterLab notebook.  
+With these scripts and Dockerfile, you can train new wake words from the
+command line without using a Jupyter notebook.

---
+Differences between this Docker image and the Jupyter notebook image:

-## 🚀 Quick Start  
+* The Python training environment isn't included in the image.  Instead, a
+  "virtual environment" (venv) is created in the `/data` directory which you
+   will have mounted to a host directory. This cuts about 7gb from the image
+   and allows the virtualenv to persist across container instances.

-Follow these steps to get up and running:  
+* The logic from the Jupyter notebook is contained in individual Python
+  and shell scripts

-### 1️⃣ Pull the Pre-Built Docker Image  
+* No ports need to be exposed since the Jupyter notebook server isn't being
+  run.

-```bash
-docker pull ghcr.io/tatertotterson/microwakeword:latest
+## TL;DR
+
+For the impatient among you...
+
+```shell
+$ mkdir /some/work/directory  # On a device with more than 150GB free space
+$ docker build -t microwakeword-cli:latest .
+$ docker run -it --rm --gpus=all -v /some/work/directory:/data --name=mww-cli microwakeword-cli:latest
+root@mww-cli:/# cd /data
+root@mww-cli:/data# setup_python_venv
+##### You have about 4 minutes to drink coffee
+
+root@mww-cli:/data# setup_training_datasets --cleanup-archives --cleanup-intermediate-files
+##### You have about 25 minutes for a quick lunch (on a 1gb/sec internet connection)
+
+root@mww-cli:/data# train_wake_word --cleanup-work-dir "wake_word" "Wake Word"
+##### You have about 30-45 minutes for a nap depending on available system resources.
+##### You'll be informed of where to find your trained model.
 ```

---
+Load the trained model on your device and give it a try but don't be surprized
+if you get a lot of missed or false activations.  Read on to find out why.

-### 2️⃣ Run the Docker Container  
+## Get Started

-```bash
-docker run --rm -it \
-    --gpus all \
-    -p 8888:8888 \
-    -v $(pwd):/data \
-    ghcr.io/tatertotterson/microwakeword:latest
+Good, you stuck around!  Now read the rest of the document before doing
+anything.
+
+### Using a GPU
+
+Having an Nvidia GPU available can cut the training time by up to half.  The
+open-source nouveau driver shipped with Linux kernels doesn't support CUDA
+however so if you have an Nvidia GPU and want to use it for training, you'll
+need to install the official Nvidia driver from
+https://www.nvidia.com/en-in/drivers/unix/
+
+### Build the image
+
+You can use either Docker or Podman as your container management tool.
+`docker` is used in the examples but if you have podman, just substitute
+the command.
+
+Start by navigating to the directory that contains this README file and
+the accompanying Dockerfile.  Then...
+
+
+```shell
+docker build -t microwakeword-cli:latest .
 ```

-**What these flags do:**  
- `--gpus all` → Enables GPU acceleration  
- `-p 8888:8888` → Exposes JupyterLab on port 8888  
- `-v $(pwd):/data` → Saves your work in the current folder  
+This should be fairly quick and result in an image that's about 320mb in size
+as it's basically a standard Ubunbtu24.04 image with a few added tools.

---
+So why isn't a pre-built image available for download?  Because it'll probably
+take longer to download a pre-built image than for you to create it locally.
+GitHub's container registry is notoriously erratic when it comes to download
+throughput.

-### 3️⃣ Open JupyterLab  
+### Create a host work directory

-Visit [http://localhost:8888](http://localhost:8888) in your browser — the notebook UI will open.  
+This directory will contain the Python virtual environment plus all of the
+downloaded and generated data needed for training and the final trained
+models.  A full environment will need about 150gb of free space but read
+further to see how to reduce this.

---
+Your `<host_data_dir>` will be mounted inside the container as `/data`.

-### 4️⃣ Set Your Wake Word  
+The training container will start a Bash shell so if you have Bash
+aliases or Bashy things you like, create a `.bashrc` file in your
+`<host_data_dir>` and put them in there.  It'll automatically be included
+any time you enter the container.

-At the **top of the notebook**, find this line:  
+### Create and start the container

-```bash
-TARGET_WORD = "hey_tater"  # Change this to your desired wake word
+There are lots of options that control container creation.  The simplest example
+will create the container and give you an interactive shell.  When you exit the
+shell, the container will be stopped and removed leaving your `<host_data_dir>`
+intact.
+
+```shell
+$ docker run -it --rm --gpus=all -v <host_work_directory>:/data microwakeword-cli:latest
 ```

-Change `"hey_tater"` to your desired wake word (phonetic spellings often work best).  
+Options:

---
+* Remove the `--gpus=all` option if you don't have an Nvidia GPU or don't want to use it.
+* Remove the `--rm` and add a `--name=mww-cli` option to keep the container
+  around and give it a name for training more than one wake word.  You
+  can stop and remove it when you're ready.
+* Add a `-d` option to start the container in the background and use `docker
+  attach mww-cli` or `docker exec -it mww-cli /bin/bash` to connect to it.

-### 5️⃣ Run the Notebook  
+When the container starts, you'll see:

-Run all cells in the notebook. This process will:  
- Generate wake word samples  
- Train a detection model  
- Output a quantized `.tflite` model ready for on-device use  
+```text
+=======================================================
+WARNING: A python virtual environment wasn't found
+at /data/.venv.  You'll need to run setup_python_venv
+before you'll be able to use this container for
+training.
+=======================================================
+root@mww-cli:/#
+```

---
+Don't worry about the python WARNING right now.  You'll be creating the
+virtualenv in the next step.

-### 6️⃣ Retrieve the Trained Model & JSON  
+If you've forgotton to create and/or mount your host data directory, you'll
+see an additional warning:

-When training finishes, download links for both the `.tflite` model and its `.json` manifest will be displayed in the last cell.  
+```text
+=======================================================
+WARNING: The /data directory is NOT mounted.
+Running the training process without /data mounted
+could add over 140Gb of python packages and training
+files to this container's storage which is probably
+NOT what you want.

---
+You should remove this container and re-create it with
+a 'docker run' option like '-v <host_work_dir>:/data'
+making sure the host directory is on a device that has
+enough free space.
+=======================================================
+```

-## 🔄 Resetting to a Clean State  
+You can certainly continue but it's a "really bad idea"™ because your
+container storage could grow from a few hundred mb to over 140gb.

-If you need to start fresh:  
+At this point, you're in a Bash shell.

-1. Delete the `data` folder that was mapped to your Docker container.  
-2. Restart the container using the steps above.  
-3. A fresh copy of the notebook will be placed into the `data` directory.  
+### Create the Python virtual environment

---
+The Python virtual environment will contain all the software needed to train.
+It gets created as `/data/.venv` and will take up about 11gb of disk space.

-## 🎤 Optional: Personal Voice Samples
+The scripts that do all the work will be in the container's PATH so to setup
+the virtual environment and install all of the packages, just run:

-In addition to synthetic TTS samples, the trainer can optionally use your own real voice recordings to significantly improve accuracy for your voice and environment.
+```text
+setup_python_venv [ --verbose ]

-### How it works
- If a folder named personal_samples/ exists and contains .wav files, the trainer will:
-  - Automatically extract features from those recordings
-  - Include them during training alongside the synthetic TTS data
-  - Up-weight your personal samples during training for better real-world performance
+Options:

-No extra flags or configuration are required — it is detected automatically.
+--verbose: Print the detailed "pip install" output.

-### How to use it
-1. Create a folder in the repo root:
-   mkdir personal_samples
+```

-2. Record yourself saying the wake word naturally and save the files as .wav:
-   personal_samples/
-     hey_tater_01.wav
-     hey_tater_02.wav
-     hey_tater_03.wav
-     ...
+When the installation is finished, a test of the major components will be
+run.
+
+Once the process is done, you should change to the `/data` directory and
+activate the virtual environment with:
+
+```shell
+root@mww-cli:/# cd /data
+root@mww-cli:/data# source .venv/bin/activate
+(.venv) root@mww-cli:/data#
+```
+
+Technically, you don't need to do either of these since the scripts
+are in the PATH and they know to use the `/data` directory for everything.
+It's more of an "if you're interested" thing.
+
+At this point, you have a container with all software installed.
+
+## Get the reference data
+
+The training process itself relies on a significant amount of audio reference
+data that creates a simulated "audio environment" that your wake word will be
+trained in.  These "training datasets" include things like varying amounts of
+reverberation, background music, background conversations, background noise,
+etc.  All said and done, it amounts to about 30gb of audio but with the
+downloaded archives and extracted intermediate files, you'll need about 85gb
+of free space.  Thankfully, you only need to download the files once no
+matter how many wake words you want to train and since it's stored in
+`/data`,  you can even remove the docker container and recreate it without
+losing any of it.  There are 4 datasets that are required.
+
+This is a three stage process...
+
+1.  Download zipfiles or tarballs.    (about 30gb)
+2.  Extract them.                     (about 50gb)
+3.  Convert them into the final form. (about 31gb)
+
+NOTE: The sizes add up to more than the 85gb stated earlier because one
+of the datasets doesn't need to be covnerted and is counted in both
+steps 2 and 3.  You really do only need 85gb.
+
+To download the archives, unpack them, and convert the audio to what's needed
+by the training process, run:
+
+```text
+setup_training_datasets [ --cleanup-archives ] [ --cleanup-intermediate-files ]
+
+Options:
+--cleanup-archives:           Automatically delete the tarballs or zipfiles after
+                              they've been extracted.
+
+--cleanup-intermediate-files: Automatically delete the intermediate files
+                              after they've been converted.
+
+```
+
+On a 1gb/sec Internet connection, this will take about 25 minutes.
+
+The script detects if the datasets have already been downloaded, extracted
+and/or converted and skips those steps as appropriate so if you've run the
+script without the cleanup options, you can just run it again with those
+options to clean them up.
+
+Now you're ready to train a wake word.  Almost.
+
+## Train a Wake Word
+
+Training is done in 3 stages.
+
+1.  Generate thousands of samples of the wake word with various voices,
+pitches, speeds, inflections, etc.
+2.  Augment the samples with the training datasets to add background noise, etc.
+3.  Run the Tensorflow training.
+
+### Generate a sample for verification
+
+Before you start the full process, you're going to want to generate a single
+wake word sample and play it back to ensure it sounds right.  The wake word
+should be spelled phonetically to give the sample generator the best chance
+of success.
+
+```text
+root@mww-cli:/# wake_word_sample_generator --samples=1 "hey buster"
+===== Generating 1 sample of 'hey buster' =====
+      Loading /data/tools/piper-sample-generator/models/en_US-libritts_r-medium.pt
+      Successfully loaded the model
+      Batch 1/0 complete
+      Done
+Sample available at /data/work/test_sample/hey_buster.wav
+Play it from your host.
+```
+
+You should then play that file from your host.  The reason I used "hey buster"
+as the wake word is to demonstrate why it's important to generate and listen
+to a sample.  If you try that exact input and play it back, you'll notice
+that the generator didn't capture the "er" at the end very well. To get it to
+do so, I had to add a period on the end as a "spacer".
+"hey buster." worked much better.
+
+When you're happy with the sample, you can run the full process.
+
+### Run the full training process
+
+```text
+train_wake_word [ --samples=<samples> ] [ --batch-size=<batch_size> ]
+                [ --training-steps=<steps> ] [ --cleanup-work-dir ]
+                <wake_word> [ <wake_word_title> ]
+
+Options:
+--samples:            The number of samples to generate for the wake word.
+                      Default: 20000
+
+--batch-size:         How many samples should be generated at a time.  The more
+                      samples, the more memory is needed.
+                      Default: 100
+
+--training-steps:     Number of training steps.  More training steps means better
+                      detection and false positive rates but also more time to train.
+                      Default: 25000
+
+--cleanup-work-dir:   Delete the /data/work directory after successful training.
+                      Default: false
+
+<wake_word>           The word to train spelled phonetically.
+                      Required.
+
+<wake_word_title>     An optional pretty name to save to the json metadata file.
+                      Default: The wake word with individual words capitalized
+                               and punctuation removed.
+
+```
+
+By default, the training process creates 20,000 samples of your wake word and
+runs 25,000 training steps.  See [Tensorboard Results](#tensorboard-results)
+in the [Extra Credit](#extra-credit) section below for
+why these are the defaults.  Depending on resources available, this could take
+between 30 and 60 minutes.
+
+The resulting tflite model files and logs will be placed in the
+`/data/output/<timestamp>-<wake_word>-<samples>-<training-steps>` directory
+and will therefore be available from your host in the directory you mapped
+`/data` to.  File names will have non-filename-friendly characters in your
+wake word changed to underscores to make things easier.  You'll need both the
+tflite and json files to load on your device. Exactly how you load them
+depends on the device and is beyond the scope of this project.
+
+The only real measure of success is how well the resulting model works
+on a real device.  If you encounter too many missed or false activations,
+increasing the number of samples would probably improve the results more
+than increasing the number of training steps.  See
+[Tensorboard Results](#tensorboard-results) in the [Extra Credit](#extra-credit) section below.
+
+The output from the last step is filtered some by the script but still quite
+verbose. The full log will be available in the output directory as
+`training.log` if you're interested. Intepreting the log is beyond the scope
+of this project however.
+
+You can train additional wake words or change the number of samples and
+training steps by simply running `train_wake_word` again. No need to repeat
+any of the earlier setup steps.  If you change the wake word or the number of
+wake word samples, the work directory will be deleted and all 3 steps re-run.
+If you only change the number of training steps, the data from the first two
+steps is still valid and only the 3rd step is run.
+
+All of the intermediate data is stored in the `/data/work` directory which will
+grow to about 17gb with 20,000 wake word samples.  Once the tflite model is
+successfully generated and you're happy with the results, you can delete the
+`/data/work` directory.
+
+### Training more than one wake word
+
+Once you have a container running, you
+can easily train multiple wake words from your host:
+
+```shell
+for wp in "hey_alexa" "hey_jenkins" ; do
+  docker exec -it mww-cli train_wake_word --cleanup-work-dir "$wp"
+done
+```
+
+### Training time examples
+
+Training times depend on lots of things.  These are examples only.
+Your Mileage May Vary!!!
+
+```text
+===============================================================================
+                            Training Summary
+
+CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
+GPU: N/A
+
+                 Generate 10000 samples, 100/batch Elapsed time: 0:06:17
+                             Augment 10000 samples Elapsed time: 0:04:05
+                              10000 training steps Elapsed time: 0:15:04
+                              ==================================================
+                                             Total Elapsed time: 0:25:26
+================================================================================
+
+================================================================================
+                            Training Summary
+
+CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
+GPU: NVIDIA GeForce RTX 3060 (3584 cores)  Memory: 11909 mb
+
+                 Generate 10000 samples, 100/batch Elapsed time: 0:00:29
+                             Augment 10000 samples Elapsed time: 0:03:40
+                              10000 training steps Elapsed time: 0:08:00
+                          ======================================================
+                                             Total Elapsed time: 0:12:09
+================================================================================
+
+================================================================================
+                            Training Summary
+
+CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
+GPU: N/A
+
+                 Generate 20000 samples, 100/batch Elapsed time: 0:10:38
+                             Augment 20000 samples Elapsed time: 0:07:04
+                              25000 training steps Elapsed time: 0:25:21
+                          ======================================================
+                                             Total Elapsed time: 0:43:03
+================================================================================
+
+================================================================================
+                            Training Summary
+
+CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
+GPU: NVIDIA GeForce RTX 3060 (3584 cores)  Memory: 11909 mb
+
+                 Generate 20000 samples, 100/batch Elapsed time: 0:00:53
+                             Augment 20000 samples Elapsed time: 0:07:05
+                              25000 training steps Elapsed time: 0:19:13
+                          ======================================================
+                                             Total Elapsed time: 0:27:11
+================================================================================
+
+================================================================================
+                            Training Summary
+
+CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
+GPU: N/A
+
+                 Generate 50000 samples, 100/batch Elapsed time: 0:30:47
+                             Augment 50000 samples Elapsed time: 0:20:22
+                              40000 training steps Elapsed time: 1:01:51
+                              ==================================================
+                                             Total Elapsed time: 1:53:00
+================================================================================
+
+================================================================================
+                            Training Summary
+
+CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
+GPU: NVIDIA GeForce RTX 3060 (3584 cores)  Memory: 11909 mb
+
+                 Generate 50000 samples, 100/batch Elapsed time: 0:02:08
+                             Augment 50000 samples Elapsed time: 0:19:13
+                              40000 training steps Elapsed time: 0:42:23
+                          ======================================================
+                                             Total Elapsed time: 1:03:44
+================================================================================
+
+
+```
+
+The sample generation process is really the only one that uses multiple CPUs so
+having fewer CPU threads available will probably make little difference.
+
+## Extra Credit
+
+### Training defaults
+
+If you plan on training multiple wake words, you can set your own default
+training parameters by creating a `/data/.defaults.env` file with the
+following contents:
+
+```shell
+# Variable names follow the command line parameters converted to upper case
+# and with the dashes ('-') converted to underscores ('_').
+export SAMPLES=10000
+export TRAINING_STEPS=10000
+
+# Don't use the GPU for any operations.  Stick with the CPU only.
+##export CUDA_VISIBLE_DEVICES=-1
+
+```
+
+### Examine your model with Tensorboard
+
+Tensorboard is a web-based graphical model viewer.  You can use it to get an
+idea of how many training steps are needed before accuracy results stop
+improving.  To use it, you'll have to expose port 6006 by adding `-p
+6006:6006` to your `docker run` command line.  If you didn't, don't worry.
+Remember, the /data directory is mapped to a directory on your host so you
+can simply stop and delete the current container and recreate it with the new
+`docker run` command. No need to re-run any of the setup or training steps.
+
+To start Tensorboard, run:
+
+```shell
+root@mww-cli:/# cd /data
+root@mww-cli:/data# source .venv/bin/activate
+(.venv) root@mww-cli:/data# tensorboard --bind_all --logdir ./output
+```
+
+Now on your host, point your browser at `http://localhost:6006/`,
+click "SCALARS" at the top and take a look at the various charts.  You'll see
+a "train" and "validation" item for each training run you've performed.  It's
+the "train" items you're interested in.
+
+<a id="tensorboard-results"></a>
+
+You have to be a Tensorflow expert to decipher most of the charts but
+the "Accuracy" chart for this particular wake word and 50,000 samples would
+seem to idicate that there's very little improvement after about 20,000
+training steps.
+
+![Accuracy Chart, 50000 samples](tensorboard1.png)
+
+In contrast, with only 5,000 wake word samples, there's still improvement to be had after
+20,000 training steps.
+
+![Accuracy Chart, 5000 samples](tensorboard2.png)
+
+Given that it's faster to generate wake word samples than it is to train,
+20,000 samples and 25,000 training steps seems like a good compromise.  This
+chart has a bit less smoothing to show a bit more detail and includes the
+50,000 sample run as well.  This run took only 27 minutes as opposed to the
+63 minutes it took for the 50,000 sample run.  Now you know why 20,000 and
+25,000 are the defaults for these scripts.
+
+![Accuracy Chart, 25000 samples](tensorboard3.png)

-3. Run the training script as normal:

-If personal samples are found, you’ll see a message during training indicating they are being included.

-### Recording tips
- 10–30 recordings is usually enough to see a noticeable improvement
- Vary distance, volume, and tone slightly
- Record in the same environment where the wake word will be used (room noise matters)
- Use 16-bit WAV files if possible (most recorders do this by default)

---

-## 🙌 Credits  

-This project builds upon the excellent work of [kahrendt/microWakeWord](https://github.com/kahrendt/microWakeWord).  
-Huge thanks to the original authors for their contributions to the open-source community!
--- a/cli/.DS_Store
+++ b/cli/.DS_Store
--- a/cli/Dockerfile
+++ b/cli/Dockerfile
@@ -1,27 +0,0 @@
-# Since this is a pure python environment, we don't need to start
-# with a huge CUDA image.  A standard Ubuntu image will do.
-FROM ubuntu:24.04
-
-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PIP_ROOT_USER_ACTION=ignore \
-    HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
-    PATH="/root/mww-scripts:${PATH}"
-
-# System deps
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    python3.12 python3.12-venv python3.12-dev python3-pip python-is-python3 \
-    git wget curl unzip ca-certificates nano less \
- && rm -rf /var/lib/apt/lists/* \
- && mkdir -p /data
-
-COPY --chown=root:root --chmod=0755 .bashrc /root/
-COPY --chown=root:root --chmod=0755 setup_* wake_word_sample* train_wake_word \
-        test_python cudainfo system_summary shell.functions requirements.txt /root/mww-scripts/
-
-# Docker and Podman send the CMD a SIGTERM when you "stop" the container.  Unfortunately, bash
-# normally doesn't exit when it recieves a SIGTERM so docker/podman has to wait for the "stop"
-# to timeout then SIGKILL the container.
-# This little scriptlet causes bash to exit immediately when it receives the SIGTERM.
-CMD ["/usr/bin/bash", "-c", "exec /usr/bin/bash --rcfile <(echo '[ -f ~/.bashrc ] && source ~/.bashrc ; trap exit SIGTERM ;')" ]
--- a/cli/README.md
+++ b/cli/README.md
@@ -1,507 +0,0 @@
-# Run training from the command line
-
-## Overview
-
-With these scripts and Dockerfile, you can train new wake words from the
-command line without using a Jupyter notebook.
-
-Differences between this Docker image and the Jupyter notebook image:
-
-* The Python training environment isn't included in the image.  Instead, a
-  "virtual environment" (venv) is created in the `/data` directory which you
-   will have mounted to a host directory. This cuts about 7gb from the image
-   and allows the virtualenv to persist across container instances.
-
-* The logic from the Jupyter notebook is contained in individual Python
-  and shell scripts
-
-* No ports need to be exposed since the Jupyter notebook server isn't being
-  run.
-
-## TL;DR
-
-For the impatient among you...
-
-```shell
-$ mkdir /some/work/directory  # On a device with more than 150GB free space
-$ docker build -t microwakeword-cli:latest .
-$ docker run -it --rm --gpus=all -v /some/work/directory:/data --name=mww-cli microwakeword-cli:latest
-root@mww-cli:/# cd /data
-root@mww-cli:/data# setup_python_venv
-##### You have about 4 minutes to drink coffee
-
-root@mww-cli:/data# setup_training_datasets --cleanup-archives --cleanup-intermediate-files
-##### You have about 25 minutes for a quick lunch (on a 1gb/sec internet connection)
-
-root@mww-cli:/data# train_wake_word --cleanup-work-dir "wake_word" "Wake Word"
-##### You have about 30-45 minutes for a nap depending on available system resources.
-##### You'll be informed of where to find your trained model.
-```
-
-Load the trained model on your device and give it a try but don't be surprized
-if you get a lot of missed or false activations.  Read on to find out why.
-
-## Get Started
-
-Good, you stuck around!  Now read the rest of the document before doing
-anything.
-
-### Using a GPU
-
-Having an Nvidia GPU available can cut the training time by up to half.  The
-open-source nouveau driver shipped with Linux kernels doesn't support CUDA
-however so if you have an Nvidia GPU and want to use it for training, you'll
-need to install the official Nvidia driver from
-https://www.nvidia.com/en-in/drivers/unix/
-
-### Build the image
-
-You can use either Docker or Podman as your container management tool.
-`docker` is used in the examples but if you have podman, just substitute
-the command.
-
-Start by navigating to the directory that contains this README file and
-the accompanying Dockerfile.  Then...
-
-
-```shell
-docker build -t microwakeword-cli:latest .
-```
-
-This should be fairly quick and result in an image that's about 320mb in size
-as it's basically a standard Ubunbtu24.04 image with a few added tools.
-
-So why isn't a pre-built image available for download?  Because it'll probably
-take longer to download a pre-built image than for you to create it locally.
-GitHub's container registry is notoriously erratic when it comes to download
-throughput.
-
-### Create a host work directory
-
-This directory will contain the Python virtual environment plus all of the
-downloaded and generated data needed for training and the final trained
-models.  A full environment will need about 150gb of free space but read
-further to see how to reduce this.
-
-Your `<host_data_dir>` will be mounted inside the container as `/data`.
-
-The training container will start a Bash shell so if you have Bash
-aliases or Bashy things you like, create a `.bashrc` file in your
-`<host_data_dir>` and put them in there.  It'll automatically be included
-any time you enter the container.
-
-### Create and start the container
-
-There are lots of options that control container creation.  The simplest example
-will create the container and give you an interactive shell.  When you exit the
-shell, the container will be stopped and removed leaving your `<host_data_dir>`
-intact.
-
-```shell
-$ docker run -it --rm --gpus=all -v <host_work_directory>:/data microwakeword-cli:latest
-```
-
-Options:
-
-* Remove the `--gpus=all` option if you don't have an Nvidia GPU or don't want to use it.
-* Remove the `--rm` and add a `--name=mww-cli` option to keep the container
-  around and give it a name for training more than one wake word.  You
-  can stop and remove it when you're ready.
-* Add a `-d` option to start the container in the background and use `docker
-  attach mww-cli` or `docker exec -it mww-cli /bin/bash` to connect to it.
-
-When the container starts, you'll see:
-
-```text
-=======================================================
-WARNING: A python virtual environment wasn't found
-at /data/.venv.  You'll need to run setup_python_venv
-before you'll be able to use this container for
-training.
-=======================================================
-root@mww-cli:/#
-```
-
-Don't worry about the python WARNING right now.  You'll be creating the
-virtualenv in the next step.
-
-If you've forgotton to create and/or mount your host data directory, you'll
-see an additional warning:
-
-```text
-=======================================================
-WARNING: The /data directory is NOT mounted.
-Running the training process without /data mounted
-could add over 140Gb of python packages and training
-files to this container's storage which is probably
-NOT what you want.
-
-You should remove this container and re-create it with
-a 'docker run' option like '-v <host_work_dir>:/data'
-making sure the host directory is on a device that has
-enough free space.
-=======================================================
-```
-
-You can certainly continue but it's a "really bad idea"™ because your
-container storage could grow from a few hundred mb to over 140gb.
-
-At this point, you're in a Bash shell.
-
-### Create the Python virtual environment
-
-The Python virtual environment will contain all the software needed to train.
-It gets created as `/data/.venv` and will take up about 11gb of disk space.
-
-The scripts that do all the work will be in the container's PATH so to setup
-the virtual environment and install all of the packages, just run:
-
-```text
-setup_python_venv [ --verbose ]
-
-Options:
-
--verbose: Print the detailed "pip install" output.
-
-```
-
-When the installation is finished, a test of the major components will be
-run.
-
-Once the process is done, you should change to the `/data` directory and
-activate the virtual environment with:
-
-```shell
-root@mww-cli:/# cd /data
-root@mww-cli:/data# source .venv/bin/activate
-(.venv) root@mww-cli:/data#
-```
-
-Technically, you don't need to do either of these since the scripts
-are in the PATH and they know to use the `/data` directory for everything.
-It's more of an "if you're interested" thing.
-
-At this point, you have a container with all software installed.
-
-## Get the reference data
-
-The training process itself relies on a significant amount of audio reference
-data that creates a simulated "audio environment" that your wake word will be
-trained in.  These "training datasets" include things like varying amounts of
-reverberation, background music, background conversations, background noise,
-etc.  All said and done, it amounts to about 30gb of audio but with the
-downloaded archives and extracted intermediate files, you'll need about 85gb
-of free space.  Thankfully, you only need to download the files once no
-matter how many wake words you want to train and since it's stored in
-`/data`,  you can even remove the docker container and recreate it without
-losing any of it.  There are 4 datasets that are required.
-
-This is a three stage process...
-
-1.  Download zipfiles or tarballs.    (about 30gb)
-2.  Extract them.                     (about 50gb)
-3.  Convert them into the final form. (about 31gb)
-
-NOTE: The sizes add up to more than the 85gb stated earlier because one
-of the datasets doesn't need to be covnerted and is counted in both
-steps 2 and 3.  You really do only need 85gb.
-
-To download the archives, unpack them, and convert the audio to what's needed
-by the training process, run:
-
-```text
-setup_training_datasets [ --cleanup-archives ] [ --cleanup-intermediate-files ]
-
-Options:
--cleanup-archives:           Automatically delete the tarballs or zipfiles after
-                              they've been extracted.
-
--cleanup-intermediate-files: Automatically delete the intermediate files
-                              after they've been converted.
-
-```
-
-On a 1gb/sec Internet connection, this will take about 25 minutes.
-
-The script detects if the datasets have already been downloaded, extracted
-and/or converted and skips those steps as appropriate so if you've run the
-script without the cleanup options, you can just run it again with those
-options to clean them up.
-
-Now you're ready to train a wake word.  Almost.
-
-## Train a Wake Word
-
-Training is done in 3 stages.
-
-1.  Generate thousands of samples of the wake word with various voices,
-pitches, speeds, inflections, etc.
-2.  Augment the samples with the training datasets to add background noise, etc.
-3.  Run the Tensorflow training.
-
-### Generate a sample for verification
-
-Before you start the full process, you're going to want to generate a single
-wake word sample and play it back to ensure it sounds right.  The wake word
-should be spelled phonetically to give the sample generator the best chance
-of success.
-
-```text
-root@mww-cli:/# wake_word_sample_generator --samples=1 "hey buster"
-===== Generating 1 sample of 'hey buster' =====
-      Loading /data/tools/piper-sample-generator/models/en_US-libritts_r-medium.pt
-      Successfully loaded the model
-      Batch 1/0 complete
-      Done
-Sample available at /data/work/test_sample/hey_buster.wav
-Play it from your host.
-```
-
-You should then play that file from your host.  The reason I used "hey buster"
-as the wake word is to demonstrate why it's important to generate and listen
-to a sample.  If you try that exact input and play it back, you'll notice
-that the generator didn't capture the "er" at the end very well. To get it to
-do so, I had to add a period on the end as a "spacer".
-"hey buster." worked much better.
-
-When you're happy with the sample, you can run the full process.
-
-### Run the full training process
-
-```text
-train_wake_word [ --samples=<samples> ] [ --batch-size=<batch_size> ]
-                [ --training-steps=<steps> ] [ --cleanup-work-dir ]
-                <wake_word> [ <wake_word_title> ]
-
-Options:
--samples:            The number of samples to generate for the wake word.
-                      Default: 20000
-
--batch-size:         How many samples should be generated at a time.  The more
-                      samples, the more memory is needed.
-                      Default: 100
-
--training-steps:     Number of training steps.  More training steps means better
-                      detection and false positive rates but also more time to train.
-                      Default: 25000
-
--cleanup-work-dir:   Delete the /data/work directory after successful training.
-                      Default: false
-
-<wake_word>           The word to train spelled phonetically.
-                      Required.
-
-<wake_word_title>     An optional pretty name to save to the json metadata file.
-                      Default: The wake word with individual words capitalized
-                               and punctuation removed.
-
-```
-
-By default, the training process creates 20,000 samples of your wake word and
-runs 25,000 training steps.  See [Tensorboard Results](#tensorboard-results)
-in the [Extra Credit](#extra-credit) section below for
-why these are the defaults.  Depending on resources available, this could take
-between 30 and 60 minutes.
-
-The resulting tflite model files and logs will be placed in the
-`/data/output/<timestamp>-<wake_word>-<samples>-<training-steps>` directory
-and will therefore be available from your host in the directory you mapped
-`/data` to.  File names will have non-filename-friendly characters in your
-wake word changed to underscores to make things easier.  You'll need both the
-tflite and json files to load on your device. Exactly how you load them
-depends on the device and is beyond the scope of this project.
-
-The only real measure of success is how well the resulting model works
-on a real device.  If you encounter too many missed or false activations,
-increasing the number of samples would probably improve the results more
-than increasing the number of training steps.  See
-[Tensorboard Results](#tensorboard-results) in the [Extra Credit](#extra-credit) section below.
-
-The output from the last step is filtered some by the script but still quite
-verbose. The full log will be available in the output directory as
-`training.log` if you're interested. Intepreting the log is beyond the scope
-of this project however.
-
-You can train additional wake words or change the number of samples and
-training steps by simply running `train_wake_word` again. No need to repeat
-any of the earlier setup steps.  If you change the wake word or the number of
-wake word samples, the work directory will be deleted and all 3 steps re-run.
-If you only change the number of training steps, the data from the first two
-steps is still valid and only the 3rd step is run.
-
-All of the intermediate data is stored in the `/data/work` directory which will
-grow to about 17gb with 20,000 wake word samples.  Once the tflite model is
-successfully generated and you're happy with the results, you can delete the
-`/data/work` directory.
-
-### Training more than one wake word
-
-Once you have a container running, you
-can easily train multiple wake words from your host:
-
-```shell
-for wp in "hey_alexa" "hey_jenkins" ; do
-  docker exec -it mww-cli train_wake_word --cleanup-work-dir "$wp"
-done
-```
-
-### Training time examples
-
-Training times depend on lots of things.  These are examples only.
-Your Mileage May Vary!!!
-
-```text
-===============================================================================
-                            Training Summary
-
-CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
-GPU: N/A
-
-                 Generate 10000 samples, 100/batch Elapsed time: 0:06:17
-                             Augment 10000 samples Elapsed time: 0:04:05
-                              10000 training steps Elapsed time: 0:15:04
-                              ==================================================
-                                             Total Elapsed time: 0:25:26
-================================================================================
-
-================================================================================
-                            Training Summary
-
-CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
-GPU: NVIDIA GeForce RTX 3060 (3584 cores)  Memory: 11909 mb
-
-                 Generate 10000 samples, 100/batch Elapsed time: 0:00:29
-                             Augment 10000 samples Elapsed time: 0:03:40
-                              10000 training steps Elapsed time: 0:08:00
-                          ======================================================
-                                             Total Elapsed time: 0:12:09
-================================================================================
-
-================================================================================
-                            Training Summary
-
-CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
-GPU: N/A
-
-                 Generate 20000 samples, 100/batch Elapsed time: 0:10:38
-                             Augment 20000 samples Elapsed time: 0:07:04
-                              25000 training steps Elapsed time: 0:25:21
-                          ======================================================
-                                             Total Elapsed time: 0:43:03
-================================================================================
-
-================================================================================
-                            Training Summary
-
-CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
-GPU: NVIDIA GeForce RTX 3060 (3584 cores)  Memory: 11909 mb
-
-                 Generate 20000 samples, 100/batch Elapsed time: 0:00:53
-                             Augment 20000 samples Elapsed time: 0:07:05
-                              25000 training steps Elapsed time: 0:19:13
-                          ======================================================
-                                             Total Elapsed time: 0:27:11
-================================================================================
-
-================================================================================
-                            Training Summary
-
-CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
-GPU: N/A
-
-                 Generate 50000 samples, 100/batch Elapsed time: 0:30:47
-                             Augment 50000 samples Elapsed time: 0:20:22
-                              40000 training steps Elapsed time: 1:01:51
-                              ==================================================
-                                             Total Elapsed time: 1:53:00
-================================================================================
-
-================================================================================
-                            Training Summary
-
-CPU: Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz (20 cores)  Memory: 64195 mb
-GPU: NVIDIA GeForce RTX 3060 (3584 cores)  Memory: 11909 mb
-
-                 Generate 50000 samples, 100/batch Elapsed time: 0:02:08
-                             Augment 50000 samples Elapsed time: 0:19:13
-                              40000 training steps Elapsed time: 0:42:23
-                          ======================================================
-                                             Total Elapsed time: 1:03:44
-================================================================================
-
-
-```
-
-The sample generation process is really the only one that uses multiple CPUs so
-having fewer CPU threads available will probably make little difference.
-
-## Extra Credit
-
-### Training defaults
-
-If you plan on training multiple wake words, you can set your own default
-training parameters by creating a `/data/.defaults.env` file with the
-following contents:
-
-```shell
-# Variable names follow the command line parameters converted to upper case
-# and with the dashes ('-') converted to underscores ('_').
-export SAMPLES=10000
-export TRAINING_STEPS=10000
-
-# Don't use the GPU for any operations.  Stick with the CPU only.
-##export CUDA_VISIBLE_DEVICES=-1
-
-```
-
-### Examine your model with Tensorboard
-
-Tensorboard is a web-based graphical model viewer.  You can use it to get an
-idea of how many training steps are needed before accuracy results stop
-improving.  To use it, you'll have to expose port 6006 by adding `-p
-6006:6006` to your `docker run` command line.  If you didn't, don't worry.
-Remember, the /data directory is mapped to a directory on your host so you
-can simply stop and delete the current container and recreate it with the new
-`docker run` command. No need to re-run any of the setup or training steps.
-
-To start Tensorboard, run:
-
-```shell
-root@mww-cli:/# cd /data
-root@mww-cli:/data# source .venv/bin/activate
-(.venv) root@mww-cli:/data# tensorboard --bind_all --logdir ./output
-```
-
-Now on your host, point your browser at `http://localhost:6006/`,
-click "SCALARS" at the top and take a look at the various charts.  You'll see
-a "train" and "validation" item for each training run you've performed.  It's
-the "train" items you're interested in.
-
-<a id="tensorboard-results"></a>
-
-You have to be a Tensorflow expert to decipher most of the charts but
-the "Accuracy" chart for this particular wake word and 50,000 samples would
-seem to idicate that there's very little improvement after about 20,000
-training steps.
-
-![Accuracy Chart, 50000 samples](tensorboard1.png)
-
-In contrast, with only 5,000 wake word samples, there's still improvement to be had after
-20,000 training steps.
-
-![Accuracy Chart, 5000 samples](tensorboard2.png)
-
-Given that it's faster to generate wake word samples than it is to train,
-20,000 samples and 25,000 training steps seems like a good compromise.  This
-chart has a bit less smoothing to show a bit more detail and includes the
-50,000 sample run as well.  This run took only 27 minutes as opposed to the
-63 minutes it took for the 50,000 sample run.  Now you know why 20,000 and
-25,000 are the defaults for these scripts.
-
-![Accuracy Chart, 25000 samples](tensorboard3.png)
-
-
-
-
-
-
--- a/cli/requirements.txt
+++ b/cli/requirements.txt
@@ -1,10 +0,0 @@
-# --- Packages needed by our scripts ---
-
-numpy==1.26.4
-scipy==1.12.0
-librosa==0.10.2.post1
-soundfile==0.12.1
-tqdm==4.67.1
-scikit-learn==1.6.0
-numba==0.63.1
-PyYAML==6.0.3
--- a/cli/setup_python_venv
+++ b/cli/setup_python_venv
@@ -1,5 +1,6 @@
 #!/bin/bash
-PROGDIR="$(dirname $(realpath $0))"
+PROGDIR="$(dirname "$(realpath "$0")")"
+ROOTDIR="$(dirname "${PROGDIR}")"

 KNOWN_ARGS=( data-dir python gpu no-gpu )
 source "${PROGDIR}/shell.functions"
@@ -27,7 +28,7 @@ EOF
    exit 1
 fi

-[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath ${DATA_DIR})"
+[ -n "${DATA_DIR}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
 [ -d  "${DATA_DIR}" ] || {
    echo "Data directory '${DATA_DIR}' doesn't exist." >&2
    exit 1
@@ -52,7 +53,8 @@ if [ -n "${PYTHON}" ] ; then
    PYTHONS=( "${PYTHON}" )
    unset PYTHON
 else
-    PYTHONS=( python3.12 python3.10 )
+    # Add 3.11 as a common middle-ground (especially outside Ubuntu 24.04)
+    PYTHONS=( python3.12 python3.11 python3.10 )
 fi

 for p in "${PYTHONS[@]}" ; do
@@ -60,14 +62,14 @@ for p in "${PYTHONS[@]}" ; do
 done

 [ -n "${PYTHON}" ] || {
-    echo "A python 3.12 or 3.10 interpreter wasn't found.  You 'll need to install one before proceeding." >&2
+    echo "A python 3.12/3.11/3.10 interpreter wasn't found. You'll need to install one before proceeding." >&2
    exit 1
 }

-if [ -d  "${VENV}" ] ; then
+if [ -d "${VENV}" ] ; then
    if [ -f "${DATA_DIR}/.mww-data-dir" ] ; then
        source "${VENV}/bin/activate" || {
-            echo "Unable to activate existing virtualenv '${VENV}'.  You should delete it and try again." >&2
+            echo "Unable to activate existing virtualenv '${VENV}'. You should delete it and try again." >&2
            exit 1
        }
    else
@@ -82,24 +84,28 @@ if [ -z "$VIRTUAL_ENV" ] ; then
 else
    echo "   ===== Updating virtualenv at '${VENV}' ====="
 fi
+
 ${PYTHON} -m venv --upgrade-deps "${VENV}"
 source "${VENV}/bin/activate"

 set -euo pipefail

-declare -a progfiles=( $(find ${PROGDIR} -mindepth 1 -maxdepth 1 -executable -type f) )
+# Symlink CLI scripts into .venv/bin
+declare -a progfiles=( $(find "${PROGDIR}" -mindepth 1 -maxdepth 1 -executable -type f) )
 progfiles+=( "${PROGDIR}/shell.functions" )

+# Also symlink the top-level entrypoint if present
+[ -x "${ROOTDIR}/train_wake_word" ] && progfiles+=( "${ROOTDIR}/train_wake_word" )
+
 for f in "${progfiles[@]}" ; do
-    ln -sfr "${f}" ".venv/bin/$(basename ${f})"
+    ln -sfr "${f}" ".venv/bin/$(basename "${f}")"
 done

 #
-# Pip doesn't process packages from requirements.txt in
-# order but order is important because tensorflow, torch,
-# onnxruntime and micro-wake-word all depend on CUDA packages
-# at various versions. They need to be installed in this specific
-# order or they may not be able to use the GPU.
+# Pip doesn't process packages from requirements.txt in order but order is
+# important because tensorflow, torch, onnxruntime and micro-wake-word all
+# depend on CUDA packages at various versions. They need to be installed in
+# this specific order or they may not be able to use the GPU.
 #
 export PIP_PROGRESS_BAR=off
 export PIP_NO_COLOR=1
@@ -117,7 +123,8 @@ pip_install() {
 START_TS=$EPOCHSECONDS

 echo "   ===== Installing common requirements ====="
-pip_install -r "${PROGDIR}/requirements.txt"
+# requirements.txt lives in repo root now
+pip_install -r "${ROOTDIR}/requirements.txt"

 ${GPU} && tfgpu='[and-cuda]' || tfgpu=""
 echo "   ===== Installing Tensorflow${tfgpu} ====="
@@ -140,7 +147,7 @@ pip_install -e "${MWW}"

 echo "   ===== Checking piper-sample-generator ====="
 PSG="${DATA_DIR}/tools/piper-sample-generator"
-if [ ! -d "${PSG}" ] || [ -n "$(git -C ${PSG} status --porcelain)" ] ; then
+if [ ! -d "${PSG}" ] || [ -n "$(git -C "${PSG}" status --porcelain)" ] ; then
    rm -rf "${PSG}" || :
    echo "   Cloning piper-sample-generator to ${DATA_DIR}/tools"
    git clone https://github.com/rhasspy/piper-sample-generator "${PSG}" &>/dev/null
@@ -171,7 +178,7 @@ echo "   ===== Installing keras ====="
 # keras 3.13 has "issues" so we need to back down to 3.12.
 pip_install "keras==3.12.0"

-${PROGDIR}/test_python --data-dir="${DATA_DIR}"
+"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"

 touch .mww-data-dir
 END_TS=$EPOCHSECONDS
@@ -179,5 +186,3 @@ END_TS=$EPOCHSECONDS
 echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."

 print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"
-
-
--- a/cli/setup_training_datasets
+++ b/cli/setup_training_datasets
@@ -1,8 +1,9 @@
 #!/bin/bash
 set -euo pipefail

-PROGPATH=$(realpath "$0")
-PROGDIR=$(dirname "${PROGPATH}")
+PROGPATH="$(realpath "$0")"
+PROGDIR="$(dirname "${PROGPATH}")"
+ROOTDIR="$(dirname "${PROGDIR}")"  # repo root (train_wake_word, requirements.txt, etc.)

 KNOWN_ARGS=( data-dir cleanup-archives cleanup-intermediate-files )
 source "${PROGDIR}/shell.functions"
@@ -27,22 +28,38 @@ EOF
    exit 1
 fi

+# Normalize + validate DATA_DIR (shell.functions typically sets a default,
+# but this makes the script standalone-safe)
+[ -n "${DATA_DIR:-}" ] && DATA_DIR="$(realpath "${DATA_DIR}")"
+[ -d "${DATA_DIR}" ] || {
+    echo "Data directory '${DATA_DIR}' doesn't exist." >&2
+    exit 1
+}
+
 cd "${DATA_DIR}"

 START_TS=$EPOCHSECONDS
 echo -e "\n===== Setting up Training Datasets =====\n"

-${PROGDIR}/setup_negative_datasets --cleanup-archives=${CLEANUP_ARCHIVES} \
-    --cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
+"${PROGDIR}/setup_negative_datasets" \
+    --cleanup-archives="${CLEANUP_ARCHIVES}" \
+    --cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
+    --data-dir="${DATA_DIR}"

-${PROGDIR}/setup_mit_audio --cleanup-archives=${CLEANUP_ARCHIVES} \
-    --cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
+"${PROGDIR}/setup_mit_audio" \
+    --cleanup-archives="${CLEANUP_ARCHIVES}" \
+    --cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
+    --data-dir="${DATA_DIR}"

-${PROGDIR}/setup_audioset --cleanup-archives=${CLEANUP_ARCHIVES} \
-    --cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
+"${PROGDIR}/setup_audioset" \
+    --cleanup-archives="${CLEANUP_ARCHIVES}" \
+    --cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
+    --data-dir="${DATA_DIR}"

-${PROGDIR}/setup_fma --cleanup-archives=${CLEANUP_ARCHIVES} \
-    --cleanup-intermediate-files=${CLEANUP_INTERMEDIATE_FILES} --data-dir="${DATA_DIR}"
+"${PROGDIR}/setup_fma" \
+    --cleanup-archives="${CLEANUP_ARCHIVES}" \
+    --cleanup-intermediate-files="${CLEANUP_INTERMEDIATE_FILES}" \
+    --data-dir="${DATA_DIR}"

-END_TS=$(date +%s.%N)
+END_TS=$EPOCHSECONDS
 print_elapsed_time "${START_TS}" "${END_TS}" "Training dataset setup"
--- a/cli/tensorboard1.png
+++ b/cli/tensorboard1.png
--- a/cli/tensorboard2.png
+++ b/cli/tensorboard2.png
--- a/cli/tensorboard3.png
+++ b/cli/tensorboard3.png
--- a/cli/wake_word_sample_augmenter
+++ b/cli/wake_word_sample_augmenter
--- a/cli/wake_word_sample_trainer
+++ b/cli/wake_word_sample_trainer
--- a/76
+++ b/76
@@ -1,59 +1,37 @@
-# Standard Ubuntu base image.  CUDA base images not needed.
-FROM ubuntu:22.04
+# Base
+FROM ubuntu:24.04

-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PIP_ROOT_USER_ACTION=ignore \
-    HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
-    XLA_FLAGS="--xla_gpu_cuda_data_dir=/usr/local/cuda" \
-    PATH="/usr/local/cuda/bin:${PATH}" \
-    LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
+ENV DEBIAN_FRONTEND=noninteractive

-# System deps (+dev headers for building C/C++ extensions)
+# System deps
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    python3.10 python3.10-venv python3.10-distutils python3.10-dev python3-pip \
-    git wget curl unzip ca-certificates git-lfs \
-    build-essential g++ cmake \
-    libsndfile1 libsndfile1-dev libffi-dev \
-    ffmpeg \
- && rm -rf /var/lib/apt/lists/*
+    python3.12 python3.12-venv python3.12-dev python3-pip python-is-python3 \
+    git wget curl unzip ca-certificates nano less \
+ && rm -rf /var/lib/apt/lists/* \
+ && mkdir -p /data

-# Use python3.10 everywhere
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \
- && update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
+# Recorder port
+EXPOSE 8789

-# ---- No cuDNN repo meddling needed if using TF 2.17.x ----
+# Script root
+WORKDIR /root/mww-scripts

-# Python deps
-# Order is important. onnxruntime, tensorflow and torch have
-# to be installed in the order below or their cuda dependencies
-# will conflict.
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install --upgrade pip \
- && pip install "numpy==1.26.4" "cython>=0.29.36" \
- && pip install -r /tmp/requirements.txt \
- && pip install "onnxruntime-gpu[cuda]>=1.16.0" \
- && pip install "tensorflow[and-cuda]==2.18.0" \
-    "tensorboard==2.18.0" \
-    "tensorboard-data-server==0.7.2" \
-    "tensorflow-io-gcs-filesystem==0.37.1" \
- && pip install \
-      torch==2.7.1 \
-      torchaudio==2.7.1 \
-      --index-url https://download.pytorch.org/whl/cu128
+# Bash environment
+COPY --chown=root:root --chmod=0755 .bashrc /root/

-# Workspace + notebook fallback
-RUN mkdir -p /data
-WORKDIR /data
-COPY microWakeWord_training_notebook.ipynb /root/
+# Root-level entrypoints
+COPY --chown=root:root --chmod=0755 \
+    train_wake_word \
+    run_recorder.sh \
+    recorder_server.py \
+    requirements.txt \
+    /root/mww-scripts/

-# Startup script (copies default notebook if missing)
-COPY startup.sh /usr/local/bin/startup.sh
-RUN chmod +x /usr/local/bin/startup.sh
+# CLI folder (THIS IS THE IMPORTANT CHANGE)
+COPY --chown=root:root cli/ /root/mww-scripts/cli/

-EXPOSE 8888
+# Static UI for recorder
+COPY --chown=root:root --chmod=0644 static/index.html /root/mww-scripts/static/index.html

-CMD ["/bin/bash", "-lc", "/usr/local/bin/startup.sh && \
-     exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root \
-     --ServerApp.token='' --ServerApp.password='' --ServerApp.root_dir=/data"]
+# recorder server
+CMD ["/bin/bash", "-lc", "/root/mww-scripts/run_recorder.sh"]
--- a/microWakeWord_training_notebook.ipynb
+++ b/microWakeWord_training_notebook.ipynb
--- a/mmw.png
+++ b/mmw.png
--- a/recorder_server.py
+++ b/recorder_server.py
@@ -0,0 +1,593 @@
+# recorder_server.py
+import os
+import re
+import subprocess
+import threading
+from pathlib import Path
+from typing import Dict, Any, List, Optional, Tuple
+
+from fastapi import FastAPI, UploadFile, File, Form, Query
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+
+ROOT_DIR = Path(__file__).resolve().parent
+
+# In Docker CLI world, DATA_DIR should be /data
+DATA_DIR = Path(os.environ.get("DATA_DIR", "/data")).resolve()
+
+# UI files live next to this script by default
+STATIC_DIR = Path(os.environ.get("STATIC_DIR", str(ROOT_DIR / "static"))).resolve()
+
+# Personal samples MUST land in /data/personal_samples for your CLI pipeline
+PERSONAL_DIR = Path(os.environ.get("PERSONAL_DIR", str(DATA_DIR / "personal_samples"))).resolve()
+
+# CLI folder inside repo
+CLI_DIR = Path(os.environ.get("CLI_DIR", str(ROOT_DIR / "cli"))).resolve()
+
+# If you want cleanup defaults for auto dataset setup, set these env vars:
+#   REC_DATASET_CLEANUP_ARCHIVES=true/false
+#   REC_DATASET_CLEANUP_INTERMEDIATE_FILES=true/false
+DATASET_CLEANUP_ARCHIVES = os.environ.get("REC_DATASET_CLEANUP_ARCHIVES", "false").lower() in ("1", "true", "yes", "y")
+DATASET_CLEANUP_INTERMEDIATE = os.environ.get("REC_DATASET_CLEANUP_INTERMEDIATE_FILES", "false").lower() in ("1", "true", "yes", "y")
+
+# We want "Start training" to trigger your CLI entrypoint, using the existing venv
+# (train_wake_word should be in /data/.venv/bin via setup_python_venv)
+TRAIN_CMD = os.environ.get(
+    "TRAIN_CMD",
+    f"source '{DATA_DIR}/.venv/bin/activate' && train_wake_word --data-dir '{DATA_DIR}'"
+)
+
+TAKES_PER_SPEAKER_DEFAULT = int(os.environ.get("REC_TAKES_PER_SPEAKER", "10"))
+SPEAKERS_TOTAL_DEFAULT = int(os.environ.get("REC_SPEAKERS_TOTAL", "1"))
+
+# How many lines to show in WebUI (tail)
+TRAIN_LOG_TAIL_LINES = int(os.environ.get("REC_TRAIN_LOG_TAIL_LINES", "400"))
+# If you prefer bytes-based tailing (fast), keep this non-zero.
+TRAIN_LOG_MAX_BYTES = int(os.environ.get("REC_TRAIN_LOG_MAX_BYTES", str(512 * 1024)))  # 512KB
+
+app = FastAPI(title="microWakeWord Personal Recorder")
+
+# Serve static UI
+STATIC_DIR.mkdir(parents=True, exist_ok=True)
+app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
+
+
+def safe_name(raw: str) -> str:
+    s = (raw or "").strip().lower()
+    s = re.sub(r"\s+", "_", s)
+    s = re.sub(r"[^a-z0-9_]+", "", s)
+    s = re.sub(r"^_+|_+$", "", s)
+    return s or "wakeword"
+
+
+# -------------------- In-memory session state --------------------
+STATE: Dict[str, Any] = {
+    "raw_phrase": None,
+    "safe_word": None,
+
+    "speakers_total": SPEAKERS_TOTAL_DEFAULT,
+    "takes_per_speaker": TAKES_PER_SPEAKER_DEFAULT,
+
+    "takes_received": 0,
+    "takes": [],
+
+    "training": {
+        "running": False,
+        "exit_code": None,
+        "log_lines": [],      # legacy in-memory tail (still maintained)
+        "log_path": None,     # path to recorder_training.log
+        "safe_word": None,
+
+        # NEW: byte offset for efficient log tailing
+        "log_offset": 0,
+    },
+}
+
+STATE_LOCK = threading.Lock()
+
+
+def _reset_personal_samples_dir():
+    PERSONAL_DIR.mkdir(parents=True, exist_ok=True)
+    for p in PERSONAL_DIR.glob("*.wav"):
+        try:
+            p.unlink()
+        except Exception:
+            pass
+
+
+def _append_train_log(line: str):
+    line = (line or "").rstrip("\n")
+    with STATE_LOCK:
+        buf: List[str] = STATE["training"]["log_lines"]
+        buf.append(line)
+        if len(buf) > 250:
+            del buf[: (len(buf) - 250)]
+
+
+def _title_from_phrase(raw_phrase: str) -> str:
+    # Keep it human-friendly for the optional <wake_word_title> argument
+    s = re.sub(r"[^a-zA-Z0-9 ]+", " ", raw_phrase or "").strip()
+    s = re.sub(r"\s+", " ", s)
+    return s.title() if s else ""
+
+
+def _run_streamed(
+    cmd: List[str],
+    cwd: Path,
+    log_path: Path,
+    header: Optional[str] = None,
+    env: Optional[Dict[str, str]] = None,
+) -> int:
+    """
+    Run a command streaming stdout/stderr to both:
+      - recorder_training.log (disk)
+      - STATE["training"]["log_lines"] (UI) [best-effort]
+    Returns process exit code.
+    """
+    if header:
+        _append_train_log(header)
+
+    _append_train_log("→ " + " ".join(cmd))
+
+    with open(log_path, "a", encoding="utf-8") as lf:
+        lf.write("\n" + ("=" * 80) + "\n")
+        if header:
+            lf.write(header + "\n")
+        lf.write("→ " + " ".join(cmd) + "\n")
+        lf.flush()
+
+        proc = subprocess.Popen(
+            cmd,
+            cwd=str(cwd),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+            env=env,
+        )
+
+        assert proc.stdout is not None
+        for line in proc.stdout:
+            lf.write(line)
+            lf.flush()
+            _append_train_log(line)
+
+        return proc.wait()
+
+
+def _ensure_training_venv(log_path: Path) -> None:
+    """
+    Ensure /data/.venv exists by running cli/setup_python_venv if needed.
+    """
+    activate = DATA_DIR / ".venv" / "bin" / "activate"
+    if activate.exists():
+        _append_train_log("✅ Training venv found (skipping setup_python_venv)")
+        return
+
+    setup = CLI_DIR / "setup_python_venv"
+    if not setup.exists():
+        raise RuntimeError(f"Missing setup_python_venv at: {setup}")
+
+    rc = _run_streamed(
+        ["bash", "-lc", f"cd '{DATA_DIR}' && '{setup}' --data-dir='{DATA_DIR}'"],
+        cwd=DATA_DIR,
+        log_path=log_path,
+        header="===== Ensuring Python venv (/data/.venv) =====",
+    )
+
+    if rc != 0:
+        raise RuntimeError(f"setup_python_venv failed (exit_code={rc})")
+
+    if not activate.exists():
+        raise RuntimeError(f"setup_python_venv finished, but {activate} is still missing")
+
+
+def _ensure_training_datasets(log_path: Path) -> None:
+    """
+    Always run setup_training_datasets before training.
+    The underlying scripts should skip work when already done.
+    """
+    setup = CLI_DIR / "setup_training_datasets"
+    if not setup.exists():
+        raise RuntimeError(f"Missing setup_training_datasets at: {setup}")
+
+    cleanup_arch = "true" if DATASET_CLEANUP_ARCHIVES else "false"
+    cleanup_inter = "true" if DATASET_CLEANUP_INTERMEDIATE else "false"
+
+    cmd = [
+        "bash",
+        "-lc",
+        (
+            f"cd '{DATA_DIR}' && "
+            f"'{setup}' "
+            f"--cleanup-archives='{cleanup_arch}' "
+            f"--cleanup-intermediate-files='{cleanup_inter}' "
+            f"--data-dir='{DATA_DIR}'"
+        ),
+    ]
+
+    rc = _run_streamed(
+        cmd,
+        cwd=DATA_DIR,
+        log_path=log_path,
+        header="===== Ensuring training datasets (setup_training_datasets) =====",
+    )
+
+    if rc != 0:
+        raise RuntimeError(f"setup_training_datasets failed (exit_code={rc})")
+
+
+def _read_log_tail_by_bytes(log_path: Path, max_bytes: int) -> str:
+    """
+    Read up to the last max_bytes from a file (UTF-8 best effort).
+    """
+    if not log_path.exists():
+        return ""
+
+    try:
+        size = log_path.stat().st_size
+        start = max(0, size - max_bytes)
+        with open(log_path, "rb") as f:
+            f.seek(start)
+            data = f.read()
+        # If we started in the middle of a line, it's ok; UI will show partial.
+        return data.decode("utf-8", errors="replace")
+    except Exception:
+        return ""
+
+
+def _read_log_tail_by_lines(log_path: Path, max_lines: int) -> str:
+    """
+    Read last N lines of a file (simple, may be slower on huge files).
+    """
+    if not log_path.exists():
+        return ""
+    try:
+        # Read by bytes limited first, then line-tail
+        raw = _read_log_tail_by_bytes(log_path, TRAIN_LOG_MAX_BYTES)
+        if not raw:
+            return ""
+        lines = raw.splitlines()
+        if len(lines) <= max_lines:
+            return "\n".join(lines)
+        return "\n".join(lines[-max_lines:])
+    except Exception:
+        return ""
+
+
+def _read_log_since_offset(log_path: Path, offset: int, max_bytes: int = 256 * 1024) -> Tuple[str, int]:
+    """
+    Read log file incrementally starting from `offset`.
+    Returns (new_text, new_offset). Caps bytes read per call.
+    """
+    if not log_path.exists():
+        return ("", offset)
+
+    try:
+        size = log_path.stat().st_size
+        # If file rotated/truncated, reset offset
+        if offset > size:
+            offset = 0
+
+        with open(log_path, "rb") as f:
+            f.seek(offset)
+            data = f.read(max_bytes)
+
+        new_offset = offset + len(data)
+        text = data.decode("utf-8", errors="replace")
+        return (text, new_offset)
+    except Exception:
+        return ("", offset)
+
+
+def _run_training_background(safe_word: str, allow_no_personal: bool):
+    with STATE_LOCK:
+        raw_phrase = STATE.get("raw_phrase") or ""
+
+    wake_word_title = _title_from_phrase(raw_phrase)
+
+    with STATE_LOCK:
+        STATE["training"]["running"] = True
+        STATE["training"]["exit_code"] = None
+        STATE["training"]["log_lines"] = []
+        STATE["training"]["safe_word"] = safe_word
+        log_path = Path(str(DATA_DIR / "recorder_training.log"))
+        STATE["training"]["log_path"] = str(log_path)
+        STATE["training"]["log_offset"] = 0
+
+    # fresh header at the start of a run
+    _append_train_log("================================================================================")
+    _append_train_log("===== Recorder Training Run =====")
+    _append_train_log("================================================================================")
+
+    # Ensure the log exists and starts cleanly with a header separator for this run
+    try:
+        with open(log_path, "a", encoding="utf-8") as lf:
+            lf.write("\n" + ("=" * 80) + "\n")
+            lf.write("===== Recorder Training Run =====\n")
+            lf.write(("=" * 80) + "\n")
+            lf.flush()
+    except Exception:
+        pass
+
+    try:
+        # 1) Ensure venv (auto-installs)
+        _ensure_training_venv(log_path)
+
+        # 2) Ensure datasets (auto-installs / skips if already present)
+        _ensure_training_datasets(log_path)
+
+        # 3) Run training
+        if wake_word_title:
+            cmd_str = f"{TRAIN_CMD} '{safe_word}' '{wake_word_title}'"
+        else:
+            cmd_str = f"{TRAIN_CMD} '{safe_word}'"
+
+        env = os.environ.copy()
+        env["MWW_ALLOW_NO_PERSONAL"] = "true" if allow_no_personal else "false"
+
+        _append_train_log("===== Training (train_wake_word) =====")
+        _append_train_log(f"→ Running: {cmd_str}")
+
+        with open(log_path, "a", encoding="utf-8") as lf:
+            proc = subprocess.Popen(
+                ["bash", "-lc", cmd_str],
+                cwd=str(DATA_DIR),
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,
+                env=env,
+            )
+            assert proc.stdout is not None
+            for line in proc.stdout:
+                lf.write(line)
+                lf.flush()
+                _append_train_log(line)
+
+            rc = proc.wait()
+
+        _append_train_log(f"✓ Training finished (exit_code={rc})")
+        with STATE_LOCK:
+            STATE["training"]["exit_code"] = rc
+
+    except Exception as e:
+        _append_train_log(f"✗ Training crashed: {e!r}")
+        with STATE_LOCK:
+            STATE["training"]["exit_code"] = 999
+
+    finally:
+        with STATE_LOCK:
+            STATE["training"]["running"] = False
+
+
+# -------------------- Routes --------------------
+@app.get("/", response_class=HTMLResponse)
+def index():
+    html_path = STATIC_DIR / "index.html"
+    if not html_path.exists():
+        return HTMLResponse(
+            "<h3>Missing UI</h3><p>Create <code>static/index.html</code>.</p>",
+            status_code=500,
+        )
+    return HTMLResponse(html_path.read_text(encoding="utf-8"))
+
+
+@app.post("/api/start_session")
+def start_session(payload: Dict[str, Any]):
+    raw = (payload.get("phrase") or "").strip()
+    if not raw:
+        return JSONResponse({"ok": False, "error": "phrase is required"}, status_code=400)
+
+    safe = safe_name(raw)
+
+    speakers_total = int(payload.get("speakers_total") or SPEAKERS_TOTAL_DEFAULT)
+    takes_per_speaker = int(payload.get("takes_per_speaker") or TAKES_PER_SPEAKER_DEFAULT)
+
+    speakers_total = max(1, min(10, speakers_total))
+    takes_per_speaker = max(1, min(50, takes_per_speaker))
+
+    with STATE_LOCK:
+        STATE["raw_phrase"] = raw
+        STATE["safe_word"] = safe
+        STATE["speakers_total"] = speakers_total
+        STATE["takes_per_speaker"] = takes_per_speaker
+        STATE["takes_received"] = 0
+        STATE["takes"] = []
+        # do not interrupt training if running
+
+    _reset_personal_samples_dir()
+
+    return {
+        "ok": True,
+        "raw_phrase": raw,
+        "safe_word": safe,
+        "speakers_total": speakers_total,
+        "takes_per_speaker": takes_per_speaker,
+        "takes_total": speakers_total * takes_per_speaker,
+        "personal_dir": str(PERSONAL_DIR),
+        "data_dir": str(DATA_DIR),
+    }
+
+
+@app.get("/api/session")
+def get_session():
+    with STATE_LOCK:
+        return {
+            "ok": True,
+            "raw_phrase": STATE["raw_phrase"],
+            "safe_word": STATE["safe_word"],
+            "speakers_total": STATE["speakers_total"],
+            "takes_per_speaker": STATE["takes_per_speaker"],
+            "takes_received": STATE["takes_received"],
+            "takes": list(STATE["takes"]),
+            "training": dict(STATE["training"]),
+        }
+
+
+@app.post("/api/upload_take")
+async def upload_take(
+    speaker_index: int = Form(...),
+    take_index: int = Form(...),
+    file: UploadFile = File(...),
+):
+    with STATE_LOCK:
+        safe_word = STATE["safe_word"]
+        speakers_total = int(STATE["speakers_total"])
+        takes_per_speaker = int(STATE["takes_per_speaker"])
+
+    if not safe_word:
+        return JSONResponse({"ok": False, "error": "No active session. Call /api/start_session first."}, status_code=400)
+
+    if speaker_index < 1 or speaker_index > speakers_total:
+        return JSONResponse({"ok": False, "error": f"speaker_index must be 1..{speakers_total}"}, status_code=400)
+
+    if take_index < 1 or take_index > takes_per_speaker:
+        return JSONResponse({"ok": False, "error": f"take_index must be 1..{takes_per_speaker}"}, status_code=400)
+
+    PERSONAL_DIR.mkdir(parents=True, exist_ok=True)
+
+    out_name = f"speaker{speaker_index:02d}_take{take_index:02d}.wav"
+    out_path = PERSONAL_DIR / out_name
+
+    data = await file.read()
+    if not data or len(data) < 44:
+        return JSONResponse({"ok": False, "error": "Empty/invalid file"}, status_code=400)
+
+    out_path.write_bytes(data)
+
+    with STATE_LOCK:
+        if out_name not in STATE["takes"]:
+            STATE["takes"].append(out_name)
+            STATE["takes_received"] = len(STATE["takes"])
+
+    return {"ok": True, "saved_as": out_name, "takes_received": STATE["takes_received"]}
+
+
+@app.post("/api/train")
+def train_now(payload: Dict[str, Any] = None):
+    payload = payload or {}
+    allow_no_personal = bool(payload.get("allow_no_personal", False))
+
+    with STATE_LOCK:
+        safe_word = STATE["safe_word"]
+        takes_received = int(STATE["takes_received"])
+        speakers_total = int(STATE["speakers_total"])
+        takes_per_speaker = int(STATE["takes_per_speaker"])
+        training_running = bool(STATE["training"]["running"])
+
+    takes_total = speakers_total * takes_per_speaker
+
+    if training_running:
+        return JSONResponse({"ok": False, "error": "Training already running"}, status_code=400)
+
+    if not safe_word:
+        return JSONResponse({"ok": False, "error": "No active session"}, status_code=400)
+
+    min_required = max(1, min(3, takes_total))
+
+    if takes_received == 0 and not allow_no_personal:
+        return JSONResponse(
+            {
+                "ok": False,
+                "error": f"No personal voice samples recorded (0/{takes_total}).",
+                "code": "NO_PERSONAL_SAMPLES",
+                "message": "You can train without personal voices, or record samples first.",
+                "takes_total": takes_total,
+            },
+            status_code=400,
+        )
+
+    if 0 < takes_received < min_required:
+        return JSONResponse(
+            {
+                "ok": False,
+                "error": f"Not enough takes yet ({takes_received}/{takes_total}).",
+                "code": "NOT_ENOUGH_TAKES",
+                "min_required": min_required,
+                "takes_total": takes_total,
+            },
+            status_code=400,
+        )
+
+    t = threading.Thread(target=_run_training_background, args=(safe_word, allow_no_personal), daemon=True)
+    t.start()
+
+    return {
+        "ok": True,
+        "started": True,
+        "safe_word": safe_word,
+        "personal_samples_used": takes_received >= min_required,
+        "allow_no_personal": allow_no_personal,
+    }
+
+
+@app.get("/api/train_status")
+def train_status(
+    offset: int = Query(0, ge=0),
+    max_bytes: int = Query(65536, ge=1024, le=262144),
+    last_size: int = Query(0, ge=0),
+    last_mtime: float = Query(0.0, ge=0.0),
+):
+    """
+    Stream training output from the log file on disk.
+
+    Robust to log overwrite/truncation:
+      - UI passes offset + last_size + last_mtime
+      - If file shrinks or mtime goes backwards/changes weirdly, reset offset to 0
+    """
+    with STATE_LOCK:
+        tr = dict(STATE["training"])
+        log_path_str = tr.get("log_path")
+
+    log_text = ""
+    next_offset = offset
+    log_size = 0
+    log_mtime = 0.0
+
+    if log_path_str:
+        p = Path(log_path_str)
+        if p.exists():
+            try:
+                st = p.stat()
+                log_size = int(st.st_size)
+                log_mtime = float(st.st_mtime)
+
+                # Detect overwrite/truncate/reset:
+                # - file shrank
+                # - file mtime moved "backwards" (rare) or changed while size reset
+                # If anything indicates a reset, restart from beginning.
+                if (log_size < last_size) or (last_mtime and log_mtime < last_mtime):
+                    offset = 0
+
+                # Clamp offset to current file size
+                if offset > log_size:
+                    offset = log_size
+
+                # Read incrementally from the file
+                with p.open("rb") as f:
+                    f.seek(offset)
+                    chunk = f.read(max_bytes)
+
+                log_text = chunk.decode("utf-8", errors="replace")
+                next_offset = offset + len(chunk)
+
+            except Exception as e:
+                log_text = f"\n[log read error: {e!r}]\n"
+                next_offset = offset
+
+    tr["log_text"] = log_text
+    tr["next_offset"] = next_offset
+    tr["log_size"] = log_size
+    tr["log_mtime"] = log_mtime
+
+    return {"ok": True, "training": tr}
+
+
+@app.post("/api/reset_recordings")
+def reset_recordings():
+    _reset_personal_samples_dir()
+    with STATE_LOCK:
+        STATE["takes_received"] = 0
+        STATE["takes"] = []
+    return {"ok": True}
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,28 +1,10 @@
-# --- Core training (Microwakeword) ---
+# --- Packages needed by our scripts ---

 numpy==1.26.4
 scipy==1.12.0
 librosa==0.10.2.post1
 soundfile==0.12.1
-soxr==0.5.0.post1
-audiomentations==0.38.0
-webrtcvad==2.0.10
 tqdm==4.67.1
 scikit-learn==1.6.0
-numba==0.60.0
-joblib==1.4.2
-pandas==2.2.3
-pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762
-audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f
-bitstruct==8.19.0
-
-# --- Piper sample generation ---
-piper-tts>=1.2.0
-piper-phonemize-cross==1.2.1
-
-# --- Notebook / tooling ---
-ipykernel==6.29.5
-jupyterlab==4.3.4
-ipywidgets==8.1.5
-matplotlib-inline==0.1.7
-rich==13.9.4
+numba==0.63.1
+PyYAML==6.0.3
--- a/run_recorder.sh
+++ b/run_recorder.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOTDIR="$(dirname "$(realpath "$0")")"
+
+# Training convention
+DATA_DIR="${DATA_DIR:-/data}"
+HOST="${REC_HOST:-0.0.0.0}"
+PORT="${REC_PORT:-8888}"
+
+# Keep recorder deps separate from training venv
+VENV_DIR="${DATA_DIR}/.recorder-venv"
+PY="${VENV_DIR}/bin/python"
+PIP="${PY} -m pip"
+PIN_FILE="${VENV_DIR}/.pinned_installed"
+
+FASTAPI_VERSION="${REC_FASTAPI_VERSION:-0.115.6}"
+UVICORN_VERSION="${REC_UVICORN_VERSION:-0.30.6}"
+PY_MULTIPART_VERSION="${REC_PY_MULTIPART_VERSION:-0.0.9}"
+
+echo "microWakeWord Recorder (Docker)"
+echo "-> ROOTDIR:  ${ROOTDIR}"
+echo "-> DATA_DIR: ${DATA_DIR}"
+echo "-> URL:      http://localhost:${PORT}/"
+
+mkdir -p "${DATA_DIR}"
+
+# -----------------------------
+# Recorder venv (separate)
+# -----------------------------
+if [[ ! -x "${PY}" ]]; then
+  echo "Creating recorder venv: ${VENV_DIR}"
+  python3 -m venv "${VENV_DIR}"
+fi
+
+# shellcheck disable=SC1091
+source "${VENV_DIR}/bin/activate"
+
+if [[ ! -f "${PIN_FILE}" ]]; then
+  echo "Installing pinned recorder deps"
+  ${PIP} install -U pip setuptools wheel
+  ${PIP} install \
+    "fastapi==${FASTAPI_VERSION}" \
+    "uvicorn[standard]==${UVICORN_VERSION}" \
+    "python-multipart==${PY_MULTIPART_VERSION}"
+  touch "${PIN_FILE}"
+else
+  echo "Reusing existing recorder venv (no upgrades)"
+fi
+
+# -----------------------------
+# Recorder server env
+# -----------------------------
+export DATA_DIR="${DATA_DIR}"
+export STATIC_DIR="${ROOTDIR}/static"
+export PERSONAL_DIR="${DATA_DIR}/personal_samples"
+
+# IMPORTANT: leave training venv creation to /api/train inside recorder_server.py
+# but still set TRAIN_CMD so the server knows how to invoke training once ready
+export TRAIN_CMD="source '${DATA_DIR}/.venv/bin/activate' && train_wake_word --data-dir='${DATA_DIR}'"
+
+echo "Launching uvicorn on ${HOST}:${PORT}"
+cd "${ROOTDIR}"
+exec "${VENV_DIR}/bin/uvicorn" recorder_server:app --host "${HOST}" --port "${PORT}"
--- a/startup.sh
+++ b/startup.sh
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-: "${NB_UID:=0}"
-: "${NB_GID:=0}"
-umask 002
-
-NOTEBOOK_SRC="/root/microWakeWord_training_notebook.ipynb"
-NOTEBOOK_DST="/data/microWakeWord_training_notebook.ipynb"
-
-mkdir -p /data /data/generated_samples /data/personal_samples
-
-if [[ ! -f "$NOTEBOOK_DST" ]]; then
-  echo "No training notebook found in /data; copying default…"
-  cp -n "$NOTEBOOK_SRC" "$NOTEBOOK_DST"
-fi
-
-# Try to align ownership for convenience (ignore errors if not permitted)
-if [[ "$NB_UID" != "0" || "$NB_GID" != "0" ]]; then
-  chown -R "$NB_UID:$NB_GID" /data || true
-fi
-
-exec "$@"
--- a/static/index.html
+++ b/static/index.html
@@ -0,0 +1,782 @@
+<!doctype html>
+<html>
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <title>microWakeWord Recorder</title>
+  <style>
+    :root{
+      --bg: #070709;
+      --panel: rgba(18, 18, 22, 0.78);
+      --panel2: rgba(24, 24, 30, 0.86);
+      --text: #e9e9ee;
+      --muted: #a2a2ad;
+      --line: rgba(255,255,255,0.10);
+      --orange: #ff8a2a;
+      --orange2:#ffb066;
+      --ok:#38d39f;
+      --warn:#ffb020;
+      --err:#ff4a4a;
+      --shadow: 0 18px 50px rgba(0,0,0,0.45);
+      --radius: 16px;
+    }
+
+    html, body { height: 100%; }
+    body {
+      margin: 0;
+      color: var(--text);
+      font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, sans-serif;
+      background:
+        radial-gradient(900px 500px at 12% 6%, rgba(255, 138, 42, 0.12), transparent 55%),
+        radial-gradient(700px 420px at 80% 14%, rgba(255, 176, 102, 0.09), transparent 60%),
+        radial-gradient(800px 600px at 50% 100%, rgba(255, 138, 42, 0.06), transparent 55%),
+        linear-gradient(180deg, #050506 0%, #09090d 100%);
+    }
+
+    .wrap { max-width: 940px; margin: 0 auto; padding: 26px 18px 42px; }
+
+    h2 { margin: 0 0 8px; font-size: 22px; letter-spacing: 0.2px; }
+    p { margin: 0 0 14px; color: var(--muted); line-height: 1.45; }
+
+    .topbar {
+      display:flex; align-items:center; justify-content:space-between;
+      gap: 12px; margin-bottom: 14px;
+    }
+
+    .brand { display:flex; align-items:center; gap:10px; }
+    .logo {
+      width: 38px; height: 38px; border-radius: 12px;
+      background:
+        radial-gradient(circle at 30% 30%, rgba(255,176,102,0.55), rgba(255,138,42,0.25) 45%, rgba(0,0,0,0) 72%),
+        linear-gradient(180deg, rgba(255,138,42,0.22), rgba(255,138,42,0.06));
+      border: 1px solid rgba(255,138,42,0.30);
+      box-shadow: 0 10px 28px rgba(255,138,42,0.08);
+    }
+
+    .row { display: flex; gap: 12px; flex-wrap: wrap; align-items: center; }
+
+    .card {
+      border: 1px solid var(--line);
+      background: linear-gradient(180deg, var(--panel), var(--panel2));
+      border-radius: var(--radius);
+      padding: 16px;
+      margin-top: 14px;
+      box-shadow: var(--shadow);
+      backdrop-filter: blur(8px);
+    }
+
+    .muted { color: var(--muted); }
+
+    input[type="text"], input[type="number"]{
+      padding: 11px 12px;
+      font-size: 15px;
+      border-radius: 12px;
+      border: 1px solid rgba(255,255,255,0.12);
+      background: rgba(0,0,0,0.35);
+      color: var(--text);
+      outline: none;
+    }
+    input[type="text"] { width: 420px; max-width: 100%; }
+    input[type="number"] { width: 120px; }
+    input::placeholder { color: rgba(233,233,238,0.35); }
+
+    button {
+      padding: 10px 14px;
+      font-size: 13px;
+      cursor: pointer;
+      border-radius: 12px;
+      border: 1px solid rgba(255,255,255,0.14);
+      background: rgba(255,255,255,0.06);
+      color: var(--text);
+      transition: transform 0.04s ease, border-color .15s ease, background .15s ease;
+    }
+    button:hover { border-color: rgba(255,138,42,0.35); background: rgba(255,255,255,0.08); }
+    button:active { transform: translateY(1px); }
+    button:disabled { opacity: 0.45; cursor: not-allowed; }
+
+    .primary {
+      border-color: rgba(255,138,42,0.40);
+      background: linear-gradient(180deg, rgba(255,138,42,0.24), rgba(255,138,42,0.12));
+    }
+    .primary:hover { border-color: rgba(255,138,42,0.65); }
+
+    .pill {
+      display:inline-block;
+      padding: 4px 10px;
+      border-radius: 999px;
+      background: rgba(255,255,255,0.07);
+      border: 1px solid rgba(255,255,255,0.10);
+      color: var(--muted);
+      font-size: 12px;
+    }
+    .pill.ok   { color: var(--ok); border-color: rgba(56,211,159,0.25); background: rgba(56,211,159,0.08); }
+    .pill.warn { color: var(--warn); border-color: rgba(255,176,32,0.25); background: rgba(255,176,32,0.08); }
+    .pill.err  { color: var(--err); border-color: rgba(255,74,74,0.25); background: rgba(255,74,74,0.08); }
+
+    details { margin-top: 10px; }
+    summary { cursor: pointer; color: var(--orange2); }
+    summary:hover { color: var(--orange); }
+
+    label { display:flex; gap:10px; align-items:center; }
+    input[type="range"] { width: 240px; }
+
+    .meter {
+      height: 10px;
+      background: rgba(255,255,255,0.08);
+      border-radius: 999px;
+      overflow: hidden;
+      width: 280px;
+      border: 1px solid rgba(255,255,255,0.10);
+    }
+    .meter > div {
+      height: 10px;
+      width: 0%;
+      background: linear-gradient(90deg, rgba(255,138,42,0.55), rgba(255,176,102,0.85));
+    }
+
+    pre {
+      background: rgba(0,0,0,0.55);
+      color: #e6e6ea;
+      padding: 12px;
+      border-radius: 14px;
+      overflow: auto;
+      max-height: 300px;
+      border: 1px solid rgba(255,255,255,0.10);
+      white-space: pre-wrap;
+      word-break: break-word;
+    }
+
+    .big { font-size: 16px; }
+
+    .divider {
+      height: 1px;
+      width: 100%;
+      background: rgba(255,255,255,0.10);
+      margin: 12px 0;
+    }
+  </style>
+</head>
+
+<body>
+  <div class="wrap">
+    <div class="topbar">
+      <div class="brand">
+        <div class="logo"></div>
+        <div>
+          <h2>🎙️ microWakeWord Personal Recorder</h2>
+          <p class="muted">Enter a wake word, test TTS pronunciation, then record takes. Recording starts when you speak and stops after silence.</p>
+        </div>
+      </div>
+    </div>
+
+    <div class="card">
+      <div class="row">
+        <input id="phrase" type="text" placeholder='e.g. "tater totterson"' />
+        <button id="startSessionBtn" class="primary">Start session</button>
+        <button id="ttsBtn" disabled>🔊 Test TTS</button>
+        <span id="sessionPill" class="pill">No session</span>
+      </div>
+
+      <div class="row" style="margin-top:10px;">
+        <label class="muted">Speakers
+          <input id="speakersTotal" type="number" min="1" max="10" value="1" />
+        </label>
+        <label class="muted">Takes / speaker
+          <input id="takesPerSpeaker" type="number" min="1" max="50" value="10" />
+        </label>
+        <span id="speakerPill" class="pill">Speaker: -</span>
+      </div>
+
+      <details>
+        <summary>Advanced (if it’s too sensitive / not sensitive enough)</summary>
+        <div style="margin-top:10px;">
+          <label>
+            Start sensitivity
+            <input id="startThresh" type="range" min="0.005" max="0.08" step="0.001" value="0.02" />
+            <span id="startThreshVal" class="muted"></span>
+          </label>
+          <label>
+            Silence stop (ms)
+            <input id="silenceMs" type="range" min="300" max="2000" step="50" value="900" />
+            <span id="silenceMsVal" class="muted"></span>
+          </label>
+          <label>
+            Min take length (ms)
+            <input id="minTakeMs" type="range" min="300" max="2000" step="50" value="650" />
+            <span id="minTakeMsVal" class="muted"></span>
+          </label>
+        </div>
+      </details>
+    </div>
+
+    <div class="card">
+      <div class="row">
+        <button id="beginBtn" disabled class="primary">🎬 Begin recording</button>
+        <button id="resetBtn" disabled>🧹 Reset recordings</button>
+        <button id="trainBtn" disabled>🧠 Start training</button>
+        <span id="status" class="pill">Idle</span>
+      </div>
+
+      <div style="margin-top:12px;" class="row">
+        <div class="meter"><div id="meterFill"></div></div>
+        <span class="muted" id="meterText">Mic level</span>
+      </div>
+
+      <div class="divider"></div>
+
+      <p class="big">
+        Speaker: <b id="speakerNum">-</b> / <b id="speakerTotal">-</b>
+        <span id="speakerState" class="pill">Waiting</span>
+      </p>
+
+      <p class="big">
+        Take: <b id="takeNum">0</b> / <b id="takeTotal">10</b>
+        <span id="takeState" class="pill">Not recording</span>
+      </p>
+
+      <div id="takesList" class="muted"></div>
+
+      <h4 style="margin-top: 18px; margin-bottom: 10px;">Training log</h4>
+      <pre id="trainLog">(no training started)</pre>
+    </div>
+  </div>
+
+<script>
+  const $ = (id) => document.getElementById(id);
+
+  function setPill(el, text, cls) {
+    el.className = "pill " + (cls || "");
+    el.textContent = text;
+  }
+
+  async function api(path, opts) {
+    const res = await fetch(path, opts);
+    const ct = res.headers.get("content-type") || "";
+    const data = ct.includes("application/json") ? await res.json() : await res.text();
+    if (!res.ok) {
+      const err = (typeof data === "string") ? { error: data } : (data || {});
+      const msg = err.error || err.message || JSON.stringify(err);
+      const e = new Error(msg);
+      e.details = err;
+      throw e;
+    }
+    return data;
+  }
+
+  // -------------------- log auto-scroll (sticky to bottom) --------------------
+  function isNearBottom(el, px = 40) {
+    return (el.scrollHeight - el.scrollTop - el.clientHeight) <= px;
+  }
+
+  function appendLogChunkAutoScroll(el, chunk) {
+    if (!chunk) return;
+    const stick = isNearBottom(el);
+    el.textContent += chunk;
+    if (stick) el.scrollTop = el.scrollHeight;
+  }
+  // --------------------------------------------------------------------------
+
+  let session = null;
+  let isRunning = false;
+
+  let stream = null;
+  let audioCtx = null;
+  let analyser = null;
+  let source = null;
+
+  let capturing = false;
+  let startedAt = 0;
+  let silenceStart = null;
+  let floatChunks = [];
+  let frameSize = 2048;
+
+  let currentSpeaker = 1;
+  let speakersTotal = 1;
+
+  let currentTake = 0;
+  let takesPerSpeaker = 10;
+
+  // --- incremental log streaming state ---
+  // Polls /api/train_status?offset=<N> and appends training.log_text (reads /data/recorder_training.log)
+  let trainOffset = 0;
+  let trainingPollRunning = false;
+  let trainingPollAbort = false;
+
+  function startThreshold() { return parseFloat($("startThresh").value); }
+  function silenceStopMs() { return parseInt($("silenceMs").value, 10); }
+  function minTakeMs() { return parseInt($("minTakeMs").value, 10); }
+
+  function updateAdvancedLabels() {
+    $("startThreshVal").textContent = startThreshold().toFixed(3);
+    $("silenceMsVal").textContent = silenceStopMs() + "ms";
+    $("minTakeMsVal").textContent = minTakeMs() + "ms";
+  }
+  ["startThresh","silenceMs","minTakeMs"].forEach(id => $(id).addEventListener("input", updateAdvancedLabels));
+  updateAdvancedLabels();
+
+  function refreshUI() {
+    $("speakerNum").textContent = String(currentSpeaker);
+    $("speakerTotal").textContent = String(speakersTotal);
+    $("takeNum").textContent = String(currentTake);
+    $("takeTotal").textContent = String(takesPerSpeaker);
+    setPill($("speakerPill"), `Speaker ${currentSpeaker}/${speakersTotal}`);
+  }
+
+  // -------------------- mic lifecycle --------------------
+  async function ensureMic() {
+    if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
+      throw new Error("Microphone not available here. Use https:// (or http://localhost) to record.");
+    }
+    if (stream) return;
+    stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
+    audioCtx = new (window.AudioContext || window.webkitAudioContext)();
+    analyser = audioCtx.createAnalyser();
+    analyser.fftSize = 2048;
+    source = audioCtx.createMediaStreamSource(stream);
+    source.connect(analyser);
+    requestAnimationFrame(meterLoop);
+  }
+
+  async function stopMicNow() {
+    isRunning = false;
+    capturing = false;
+
+    const proc = window.__mw_proc;
+    if (proc) {
+      try { proc.disconnect(); } catch {}
+      try { source && source.disconnect(proc); } catch {}
+      window.__mw_proc = null;
+    }
+
+    if (stream) {
+      try { stream.getTracks().forEach(t => t.stop()); } catch {}
+      stream = null;
+    }
+
+    if (audioCtx) {
+      try { await audioCtx.close(); } catch {}
+      audioCtx = null;
+    }
+
+    analyser = null;
+    source = null;
+
+    $("meterFill").style.width = "0%";
+    $("meterText").textContent = "Mic stopped";
+  }
+
+  function meterLoop() {
+    if (!analyser) {
+      requestAnimationFrame(meterLoop);
+      return;
+    }
+
+    const data = new Uint8Array(analyser.fftSize);
+    analyser.getByteTimeDomainData(data);
+
+    let sumSq = 0;
+    for (let i=0;i<data.length;i++){
+      const v = (data[i] - 128) / 128;
+      sumSq += v*v;
+    }
+    const rms = Math.sqrt(sumSq / data.length);
+    const pct = Math.min(100, Math.max(0, rms * 600));
+    $("meterFill").style.width = pct + "%";
+    $("meterText").textContent = `Mic level (rms=${rms.toFixed(3)})`;
+
+    if (isRunning) recorderTick(rms);
+
+    requestAnimationFrame(meterLoop);
+  }
+
+  // -------------------- recording state machine --------------------
+  function recorderTick(rms) {
+    const now = performance.now();
+
+    if (!capturing) {
+      if (rms >= startThreshold()) startCapture();
+      return;
+    }
+
+    if (rms < startThreshold() * 0.65) {
+      if (silenceStart === null) silenceStart = now;
+      const silentFor = now - silenceStart;
+      if (silentFor >= silenceStopMs()) {
+        const dur = now - startedAt;
+        if (dur >= minTakeMs()) stopCaptureAndUpload();
+        else silenceStart = now;
+      }
+    } else {
+      silenceStart = null;
+    }
+  }
+
+  async function startCapture() {
+    capturing = true;
+    startedAt = performance.now();
+    silenceStart = null;
+    floatChunks = [];
+
+    setPill($("takeState"), "Recording…", "warn");
+
+    const proc = audioCtx.createScriptProcessor(frameSize, 1, 1);
+    source.connect(proc);
+    proc.connect(audioCtx.destination);
+
+    proc.onaudioprocess = (ev) => {
+      if (!capturing) return;
+      const chan = ev.inputBuffer.getChannelData(0);
+      floatChunks.push(new Float32Array(chan));
+    };
+
+    window.__mw_proc = proc;
+  }
+
+  async function stopCaptureAndUpload() {
+    capturing = false;
+    setPill($("takeState"), "Processing…");
+
+    const proc = window.__mw_proc;
+    if (proc) {
+      try { proc.disconnect(); } catch {}
+      try { source.disconnect(proc); } catch {}
+      window.__mw_proc = null;
+    }
+
+    currentTake += 1;
+    refreshUI();
+
+    let totalLen = 0;
+    for (const c of floatChunks) totalLen += c.length;
+    const merged = new Float32Array(totalLen);
+    let off = 0;
+    for (const c of floatChunks) { merged.set(c, off); off += c.length; }
+
+    const wavBlob = await floatToWav16kMono(merged, audioCtx.sampleRate);
+
+    try {
+      setPill($("status"), `Uploading speaker ${currentSpeaker} take ${currentTake}…`, "warn");
+
+      const fd = new FormData();
+      fd.append("speaker_index", String(currentSpeaker));
+      fd.append("take_index", String(currentTake));
+      fd.append("file", wavBlob, `take_${String(currentTake).padStart(2,"0")}.wav`);
+
+      await api("/api/upload_take", { method:"POST", body: fd });
+
+      $("takesList").textContent = `Saved ${currentTake}/${takesPerSpeaker} takes for speaker ${currentSpeaker}/${speakersTotal}`;
+      setPill($("status"), `Saved speaker ${currentSpeaker} take ${currentTake}/${takesPerSpeaker}`, "ok");
+
+      if (currentTake >= takesPerSpeaker) {
+        if (currentSpeaker >= speakersTotal) {
+          setPill($("takeState"), "Done", "ok");
+          setPill($("speakerState"), "All speakers done ✅", "ok");
+          setPill($("status"), "All takes recorded ✅", "ok");
+
+          await stopMicNow();
+          await autoStartTraining();
+          return;
+        }
+
+        currentSpeaker += 1;
+        currentTake = 0;
+        refreshUI();
+
+        setPill($("speakerState"), `Speaker ${currentSpeaker - 1} complete ✅`, "ok");
+        setPill($("takeState"), "Paused", "warn");
+        setPill($("status"), `Ready for speaker ${currentSpeaker}. Click Begin recording.`, "warn");
+
+        isRunning = false;
+        $("beginBtn").disabled = false;
+
+        await stopMicNow();
+        return;
+      }
+
+      setPill($("speakerState"), `Speaker ${currentSpeaker}/${speakersTotal}`);
+      setPill($("takeState"), "Listening…", "ok");
+
+    } catch (e) {
+      console.error(e);
+      setPill($("status"), "Upload failed", "err");
+      setPill($("takeState"), "Error", "err");
+      isRunning = false;
+      $("beginBtn").disabled = false;
+      alert("Upload failed: " + e.message);
+    }
+  }
+
+  // -------------------- WAV encoding helpers --------------------
+  async function floatToWav16kMono(float32, srcRate) {
+    const buf = audioCtx.createBuffer(1, float32.length, srcRate);
+    buf.copyToChannel(float32, 0);
+
+    const targetRate = 16000;
+    const targetLen = Math.max(1, Math.round(float32.length * targetRate / srcRate));
+    const offline = new OfflineAudioContext(1, targetLen, targetRate);
+
+    const src = offline.createBufferSource();
+    src.buffer = buf;
+    src.connect(offline.destination);
+    src.start(0);
+
+    const rendered = await offline.startRendering();
+    const data = rendered.getChannelData(0);
+
+    const wav = encodeWavPCM16(data, targetRate);
+    return new Blob([wav], { type: "audio/wav" });
+  }
+
+  function encodeWavPCM16(float32, sampleRate) {
+    const numSamples = float32.length;
+    const buffer = new ArrayBuffer(44 + numSamples * 2);
+    const view = new DataView(buffer);
+
+    function writeString(offset, str) {
+      for (let i=0;i<str.length;i++) view.setUint8(offset+i, str.charCodeAt(i));
+    }
+
+    writeString(0, "RIFF");
+    view.setUint32(4, 36 + numSamples * 2, true);
+    writeString(8, "WAVE");
+
+    writeString(12, "fmt ");
+    view.setUint32(16, 16, true);
+    view.setUint16(20, 1, true);
+    view.setUint16(22, 1, true);
+    view.setUint32(24, sampleRate, true);
+    view.setUint32(28, sampleRate * 2, true);
+    view.setUint16(32, 2, true);
+    view.setUint16(34, 16, true);
+
+    writeString(36, "data");
+    view.setUint32(40, numSamples * 2, true);
+
+    let offset = 44;
+    for (let i=0;i<numSamples;i++) {
+      let s = Math.max(-1, Math.min(1, float32[i]));
+      const v = s < 0 ? s * 0x8000 : s * 0x7fff;
+      view.setInt16(offset, v, true);
+      offset += 2;
+    }
+    return buffer;
+  }
+
+  // -------------------- training (manual + auto) --------------------
+  async function startTrainingWithPrompt(auto=false) {
+    const sess = await api("/api/session", { method: "GET" });
+    const takesReceived = sess.takes_received || 0;
+    const total = (sess.speakers_total || 1) * (sess.takes_per_speaker || 10);
+
+    let allowNoPersonal = false;
+
+    if (takesReceived === 0) {
+      const ok = confirm(
+        `No personal voice samples recorded (0/${total}).\n\nTrain anyway WITHOUT personal voices?`
+      );
+      if (!ok) return;
+      allowNoPersonal = true;
+    }
+
+    // lock UI immediately
+    $("trainBtn").disabled = true;
+    $("beginBtn").disabled = true;
+    $("resetBtn").disabled = true;
+
+    setPill($("status"), auto ? "Auto-starting training…" : "Preparing training environment…", "warn");
+
+    // reset streaming log state (we show recorder_training.log from the start of this run)
+    trainOffset = 0;
+    trainingPollAbort = false;
+
+    const logEl = $("trainLog");
+    logEl.textContent = "(preparing…)\n";
+
+    try {
+      // Kick off training first
+      await api("/api/train", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ allow_no_personal: allowNoPersonal })
+      });
+
+      // Only start polling AFTER training was successfully kicked off
+      if (!trainingPollRunning) {
+        trainingPollRunning = true;
+        pollTrainingIncremental();
+      }
+
+      setPill($("status"), "Training running…", "warn");
+    } catch (e) {
+      $("trainBtn").disabled = false;
+      $("resetBtn").disabled = false;
+      $("beginBtn").disabled = false;
+      trainingPollAbort = true;
+      trainingPollRunning = false;
+      throw e;
+    }
+  }
+
+  async function autoStartTraining() {
+    try {
+      await startTrainingWithPrompt(true);
+    } catch (e) {
+      console.error(e);
+      setPill($("status"), "Auto-train failed", "err");
+      alert("Auto-start training failed: " + e.message);
+    }
+  }
+
+  $("trainBtn").addEventListener("click", async () => {
+    try {
+      await startTrainingWithPrompt(false);
+    } catch (e) {
+      alert("Train failed: " + e.message);
+      setPill($("status"), "Train failed", "err");
+    }
+  });
+
+  // Polls /api/train_status?offset=<trainOffset>
+  // Expects JSON: { ok: true, training: { running, exit_code, log_text, next_offset } }
+  async function pollTrainingIncremental() {
+    const logEl = $("trainLog");
+
+    for (;;) {
+      if (trainingPollAbort) {
+        trainingPollRunning = false;
+        break;
+      }
+
+      try {
+        const st = await api(`/api/train_status?offset=${trainOffset}`, { method:"GET" });
+        const tr = st.training || {};
+
+        const chunk = tr.log_text || "";
+        const next = (typeof tr.next_offset === "number") ? tr.next_offset : trainOffset;
+
+        // If we got real output, replace the "(preparing…)" placeholder
+        if (chunk && logEl.textContent.startsWith("(preparing…)")) {
+          logEl.textContent = "";
+        }
+
+        if (chunk) appendLogChunkAutoScroll(logEl, chunk);
+
+        trainOffset = next;
+
+        // Stop polling only when training has ended and exit_code is set
+        const exitCodeIsSet = (tr.exit_code !== null && tr.exit_code !== undefined);
+
+        if (!tr.running && exitCodeIsSet) {
+          $("trainBtn").disabled = false;
+          $("resetBtn").disabled = false;
+          $("beginBtn").disabled = false;
+
+          if (tr.exit_code === 0) setPill($("status"), "Training finished ✅", "ok");
+          else setPill($("status"), `Training ended (exit=${tr.exit_code})`, "err");
+
+          trainingPollRunning = false;
+          break;
+        }
+      } catch (e) {
+        // ignore transient polling errors
+      }
+
+      await new Promise(r => setTimeout(r, 1500));
+    }
+  }
+
+  // -------------------- session + UI wiring --------------------
+  $("ttsBtn").addEventListener("click", () => {
+    const phrase = ($("phrase").value || "").trim();
+    if (!phrase) return;
+    const u = new SpeechSynthesisUtterance(phrase);
+    speechSynthesis.cancel();
+    speechSynthesis.speak(u);
+  });
+
+  $("startSessionBtn").addEventListener("click", async () => {
+    const phrase = ($("phrase").value || "").trim();
+    if (!phrase) { alert("Enter a wake word phrase first."); return; }
+
+    speakersTotal = parseInt($("speakersTotal").value || "1", 10);
+    takesPerSpeaker = parseInt($("takesPerSpeaker").value || "10", 10);
+
+    try {
+      setPill($("sessionPill"), "Starting…", "warn");
+      const data = await api("/api/start_session", {
+        method: "POST",
+        headers: {"Content-Type":"application/json"},
+        body: JSON.stringify({ phrase, speakers_total: speakersTotal, takes_per_speaker: takesPerSpeaker })
+      });
+
+      session = data;
+
+      currentSpeaker = 1;
+      currentTake = 0;
+
+      $("takesList").textContent = "";
+      $("trainLog").textContent = "(no training started)";
+
+      trainOffset = 0;
+
+      // If a previous training poll loop is running, ask it to stop
+      trainingPollAbort = true;
+      trainingPollRunning = false;
+
+      refreshUI();
+
+      await stopMicNow();
+
+      setPill($("sessionPill"), `Session: ${data.safe_word}`, "ok");
+      $("beginBtn").disabled = false;
+      $("resetBtn").disabled = false;
+      $("trainBtn").disabled = false;
+      $("ttsBtn").disabled = false;
+
+      setPill($("status"), "Ready", "ok");
+      setPill($("speakerState"), "Waiting");
+      setPill($("takeState"), "Not recording");
+    } catch (e) {
+      console.error(e);
+      setPill($("sessionPill"), "Session failed", "err");
+      alert("Start session failed: " + e.message);
+    } finally {
+      trainingPollAbort = false;
+    }
+  });
+
+  $("resetBtn").addEventListener("click", async () => {
+    try {
+      await api("/api/reset_recordings", {method:"POST"});
+      currentSpeaker = 1;
+      currentTake = 0;
+      $("takesList").textContent = "";
+      refreshUI();
+      setPill($("status"), "Recordings reset", "ok");
+    } catch (e) {
+      alert("Reset failed: " + e.message);
+    }
+  });
+
+  $("beginBtn").addEventListener("click", async () => {
+    if (!session) { alert("Start a session first."); return; }
+    try {
+      await ensureMic();
+    } catch (e) {
+      alert("Mic permission failed: " + e.message);
+      return;
+    }
+
+    $("takesList").textContent = "";
+    refreshUI();
+
+    isRunning = true;
+    $("beginBtn").disabled = true;
+
+    setPill($("speakerState"), `Speaker ${currentSpeaker}/${speakersTotal}`);
+    setPill($("status"), "Listening… say the wake word now", "ok");
+    setPill($("takeState"), "Listening…", "ok");
+  });
+</script>
+</body>
+</html>
--- a/cli/train_wake_word
+++ b/cli/train_wake_word
@@ -3,9 +3,10 @@ set -e

 PROGPATH=$(realpath "$0")
 PROGDIR=$(dirname "${PROGPATH}")
+CLIDIR="${PROGDIR}/cli"

 KNOWN_ARGS=( samples batch-size training-steps data-dir cleanup-work-dir )
-source "${PROGDIR}/shell.functions"
+source "${CLIDIR}/shell.functions"
 WAKE_WORD=${POSITIONAL_ARGS[0]}

 if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then
@@ -62,7 +63,7 @@ fi

 printf "%-80s\n" "=" | tr ' ' "="
 echo "===== Running '${WAKE_WORD}(${WAKE_WORD_TITLE})' generation, augmentation and training ====="
-"${PROGDIR}/cudainfo"
+"${CLIDIR}/cudainfo"
 echo
 START_TS=$EPOCHSECONDS

@@ -75,17 +76,13 @@ export TF_CUDNN_WORKSPACE_LIMIT_IN_MB=512
 export GLOG_minloglevel=2
 export GRPC_VERBOSITY=ERROR

-
-"${PROGDIR}/wake_word_sample_generator" \
+"${CLIDIR}/wake_word_sample_generator" \
    --samples=${SAMPLES} \
    --batch-size=${BATCH_SIZE} \
    --data-dir="${DATA_DIR}" "${WAKE_WORD}"

 POST_GEN_TS=$EPOCHSECONDS

-ww="${WAKE_WORD// /_}"
-ww="${ww//./}"
-
 AUGMENT=false
 GENERATED_DIR="${DATA_DIR}/work/wake_word_samples"
 AUGMENTED_DIR="${DATA_DIR}/work/wake_word_samples_augmented"
@@ -96,7 +93,7 @@ AUGMENTED_DIR="${DATA_DIR}/work/wake_word_samples_augmented"
 if ${AUGMENT} ; then
    rm -rf "${AUGMENTED_DIR}" || :
    mkdir -p "${AUGMENTED_DIR}" || :
-    "${PROGDIR}/wake_word_sample_augmenter" --data-dir="${DATA_DIR}" || { rm -rf "${AUGMENTED_DIR}" ; exit 1 ; }
+    "${CLIDIR}/wake_word_sample_augmenter" --data-dir="${DATA_DIR}" || { rm -rf "${AUGMENTED_DIR}" ; exit 1 ; }
 else
    echo "Augmentation not required"
    echo
@@ -104,18 +101,26 @@ fi

 POST_AUGMENT_TS=$EPOCHSECONDS

-"${PROGDIR}/wake_word_sample_trainer" --samples=${SAMPLES} --training-steps=${TRAINING_STEPS} --data-dir="${DATA_DIR}" \
-        "${WAKE_WORD}" "${WAKE_WORD_TITLE}"
+"${CLIDIR}/wake_word_sample_trainer" \
+    --samples=${SAMPLES} \
+    --training-steps=${TRAINING_STEPS} \
+    --data-dir="${DATA_DIR}" \
+    "${WAKE_WORD}" "${WAKE_WORD_TITLE}"

 if ${CLEANUP_WORK_DIR} ; then
-    rm -rf "${DATA_DIR}/work/trained_models" "${DATA_DIR}/work/wake_word_samples" \
-    "${DATA_DIR}/work/wake_word_samples_augmented" "${DATA_DIR}/work/last_wake_word" || :
+    rm -rf \
+      "${DATA_DIR}/work/trained_models" \
+      "${DATA_DIR}/work/wake_word_samples" \
+      "${DATA_DIR}/work/wake_word_samples_augmented" \
+      "${DATA_DIR}/work/personal_augmented_features" \
+      "${DATA_DIR}/work/last_wake_word" || :
 fi
+
 END_TS=$EPOCHSECONDS

 python -c $'print(f"{\'=\' * 80}")'
 printf "%44s\n\n" "Training Summary"
-"${PROGDIR}/system_summary"
+"${CLIDIR}/system_summary"
 echo
 print_elapsed_time --no-separators "${START_TS}" "${POST_GEN_TS}" "Generate ${SAMPLES} samples, ${BATCH_SIZE}/batch"
 print_elapsed_time --no-separators "${POST_GEN_TS}" "${POST_AUGMENT_TS}" "Augment ${SAMPLES} samples"