mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
874f273d0b | ||
|
|
04249f414d | ||
|
|
6a0d60d569 | ||
|
|
8df17599c2 | ||
|
|
280e8f8de4 | ||
|
|
b582a6cade | ||
|
|
196ab8c0e7 | ||
|
|
134f607bef | ||
|
|
4a9e2f2cde | ||
|
|
7c246856df | ||
|
|
3705dabc09 | ||
|
|
1dcf48209f | ||
|
|
4f44bef8d5 | ||
|
|
98fa879db1 | ||
|
|
dfac549430 | ||
|
|
775a78326b | ||
|
|
429be4cc67 | ||
|
|
2e6179ec32 | ||
|
|
ee6ff6e9d5 | ||
|
|
251b0280b6 | ||
|
|
dd2bdda431 | ||
|
|
9d8e0afe1b | ||
|
|
318a4ad3b5 |
48
.github/workflows/docker-publish.yml
vendored
Normal file
48
.github/workflows/docker-publish.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
name: Publish Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: docker-publish-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: tatertotterson/microwakeword
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
docker:
|
||||||
|
name: Docker image
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Check out repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to GHCR
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push image
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: dockerfile
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: true
|
||||||
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
|
cache-from: type=gha,scope=mww-trainer-nvidia-docker
|
||||||
|
cache-to: type=gha,mode=max,scope=mww-trainer-nvidia-docker
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
personal_samples/*
|
personal_samples/*
|
||||||
data/
|
data/
|
||||||
|
trim_history/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
219
README.md
219
README.md
@@ -1,16 +1,15 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<h1>microWakeWord NVIDIA Docker Trainer UI</h1>
|
<a href="https://taterassistant.com">
|
||||||
<img width="800" alt="Screenshot 2026-04-14 at 11 02 06 PM" src="https://github.com/user-attachments/assets/694f4cb7-e4d8-4e2b-80ec-b40fb41cbfff" />
|
<img src="images/tater-repo-logo.png" alt="microWakeWord Trainer" width="460"/>
|
||||||
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
<h3 align="center">
|
||||||
|
<a href="https://taterassistant.com">taterassistant.com</a>
|
||||||
|
</h3>
|
||||||
|
|
||||||
Train custom microWakeWord models in Docker with:
|
Train custom microWakeWord models in Docker with NVIDIA/CUDA acceleration, generated Piper samples, device-captured samples, reviewed false-wake negatives, live training logs, and ESPHome firmware flashing.
|
||||||
|
|
||||||
- uploaded personal voice samples
|
Real samples come from device-captured wake audio, close misses, or manual uploads. Every saved sample is normalized to `16 kHz / mono / 16-bit PCM WAV` before training.
|
||||||
- automatically generated Piper TTS samples
|
|
||||||
- a browser-based trainer UI
|
|
||||||
- live training logs in a popup console
|
|
||||||
|
|
||||||
This project no longer records audio in the browser. The UI is now upload-first: users add their own audio files, the app validates or converts them, and training runs from the same page.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -27,41 +26,92 @@ docker pull ghcr.io/tatertotterson/microwakeword:latest
|
|||||||
```bash
|
```bash
|
||||||
docker run -d \
|
docker run -d \
|
||||||
--gpus all \
|
--gpus all \
|
||||||
-p 8888:8888 \
|
--network host \
|
||||||
|
-e REC_PORT=8789 \
|
||||||
-v $(pwd):/data \
|
-v $(pwd):/data \
|
||||||
ghcr.io/tatertotterson/microwakeword:latest
|
ghcr.io/tatertotterson/microwakeword:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
What these flags do:
|
The flags:
|
||||||
|
|
||||||
- `--gpus all` enables GPU acceleration
|
- `--gpus all` enables GPU acceleration.
|
||||||
- `-p 8888:8888` exposes the trainer UI
|
- `--network host` lets the container receive mDNS/zeroconf traffic for ESPHome auto-detect.
|
||||||
- `-v $(pwd):/data` persists models, downloaded voices, datasets, and personal samples
|
- `-e REC_PORT=8789` sets the trainer web UI and captured-audio port. Change this value if `8789` is already in use.
|
||||||
|
- `-v $(pwd):/data` persists models, downloaded voices, datasets, samples, and firmware caches.
|
||||||
|
|
||||||
Then open:
|
Host networking is recommended for the Firmware tab's mDNS device discovery. Manual IP flashing and captured-audio uploads can still work without host networking if the trainer port is reachable, but auto-detect may not see devices from Docker bridge networking.
|
||||||
|
|
||||||
|
Open:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
http://localhost:8888
|
http://localhost:8789
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you change `REC_PORT`, open that port instead and use the same port in the ESPHome `Trainer App URL`.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## What The UI Does
|
## What The UI Does
|
||||||
|
|
||||||
- Start a wake word session
|
- `Trainer` starts a wake-word session, shows positive/negative sample counts, and launches training.
|
||||||
- Test TTS pronunciation
|
- `Captured Audio` reviews clips sent by ESPHome sats, including wake hits, close misses, and false wakes.
|
||||||
- Upload one or many personal samples
|
- `Samples` plays, removes, clears, and manually imports personal or negative samples.
|
||||||
- Normalize uploads to `16 kHz / mono / 16-bit PCM WAV`
|
- `Firmware` builds the latest `microWakeWords` ESPHome YAMLs from GitHub and flashes VoicePE or Satellite1 over OTA.
|
||||||
- Train with or without personal samples
|
- Popup consoles show colorized training and firmware logs while long-running jobs are active.
|
||||||
- Show a popup console with live progress and logs
|
|
||||||
|
|
||||||
Personal samples are optional. If none are uploaded, the trainer can still proceed with TTS-only data after confirmation.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Personal Samples
|
## Captured Audio Workflow
|
||||||
|
|
||||||
Accepted upload formats include:
|
To collect samples from a sat, flash it with the Tater firmware from [TaterTotterson/microWakeWords](https://github.com/TaterTotterson/microWakeWords). The `Firmware` tab can build and flash the VoicePE or Satellite1 YAMLs directly from that repo.
|
||||||
|
|
||||||
|
After flashing, the device exposes ESPHome entities for capture setup:
|
||||||
|
|
||||||
|
- `Capture Wake Audio` toggles upload of wake-word triggers.
|
||||||
|
- `Capture Close Misses` toggles upload of near misses.
|
||||||
|
- `Trainer App URL` sets the trainer address, for example `http://<trainer-ip>:8789`.
|
||||||
|
|
||||||
|
ESPHome devices can send raw captured audio to:
|
||||||
|
|
||||||
|
```text
|
||||||
|
/api/upload_captured_audio_raw
|
||||||
|
```
|
||||||
|
|
||||||
|
Keep the training app running and reachable at the `Trainer App URL` while capture is enabled. The sats upload clips live; if the app is stopped or the URL is wrong, captured audio will not be saved.
|
||||||
|
|
||||||
|
In the `Captured Audio` tab:
|
||||||
|
|
||||||
|
- play each clip from the inbox
|
||||||
|
- mark good wake-word clips as `This is good`
|
||||||
|
- mark bad triggers as `False wake`
|
||||||
|
- discard clips that should not be used
|
||||||
|
|
||||||
|
Approved clips move into:
|
||||||
|
|
||||||
|
```text
|
||||||
|
/data/personal_samples/
|
||||||
|
```
|
||||||
|
|
||||||
|
False wakes move into:
|
||||||
|
|
||||||
|
```text
|
||||||
|
/data/negative_samples/
|
||||||
|
```
|
||||||
|
|
||||||
|
Captured audio is boosted for easier playback in the UI, then kept in the correct training format.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Samples
|
||||||
|
|
||||||
|
The `Samples` tab is the sample library.
|
||||||
|
|
||||||
|
- `Personal` samples are positive examples of the wake word.
|
||||||
|
- `Negative` samples are reviewed false wakes or hard negatives.
|
||||||
|
- Both can be played back and removed one at a time.
|
||||||
|
- Manual upload is available here as an optional seed path.
|
||||||
|
|
||||||
|
Accepted manual upload formats include:
|
||||||
|
|
||||||
- WAV
|
- WAV
|
||||||
- MP3
|
- MP3
|
||||||
@@ -72,97 +122,120 @@ Accepted upload formats include:
|
|||||||
- OPUS
|
- OPUS
|
||||||
- WEBM
|
- WEBM
|
||||||
|
|
||||||
The backend validates or converts uploads with `ffmpeg` and stores the normalized files in:
|
Uploads are validated or converted with `ffmpeg` into:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
/data/personal_samples/
|
16 kHz / mono / 16-bit PCM WAV
|
||||||
```
|
```
|
||||||
|
|
||||||
Notes:
|
Starting a new session does not clear samples. Use the clear buttons in `Samples` if you want to remove saved personal or negative clips.
|
||||||
|
|
||||||
- starting a new session does not clear personal samples
|
---
|
||||||
- use the `Clear personal samples` button if you want to wipe them
|
|
||||||
- any uploaded personal samples are automatically included in training
|
## Training Flow
|
||||||
|
|
||||||
|
1. Enter the wake phrase in `Trainer`.
|
||||||
|
2. Choose the language.
|
||||||
|
3. Optionally test pronunciation with `Test TTS`.
|
||||||
|
4. Review the positive and negative sample counts.
|
||||||
|
5. Click `Start training`.
|
||||||
|
6. Watch the popup training console.
|
||||||
|
|
||||||
|
Personal samples are optional. Training can run with zero personal samples after confirmation, using generated TTS samples and the stock negative datasets.
|
||||||
|
|
||||||
|
Reviewed negative samples are converted into `/data/work/reviewed_negative_features/` and inserted into the training YAML as a hard-negative feature set when present.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Language Support
|
## Language Support
|
||||||
|
|
||||||
The language selector is dynamic.
|
The language picker is dynamic.
|
||||||
|
|
||||||
- `en` is always available
|
- `en` is always available.
|
||||||
- non-English languages are populated from Piper voice metadata
|
- English keeps the existing dedicated generator model path.
|
||||||
- when you train with a non-English language, the backend downloads all Piper ONNX voices for that selected language only
|
- Non-English languages are discovered from the Piper voices catalog and any local Piper voice metadata.
|
||||||
- it does not pre-download every language
|
- When a non-English language is selected, the trainer downloads all voices for that selected language only.
|
||||||
- already-downloaded voices are reused on later runs
|
- Already-downloaded voices are reused.
|
||||||
|
- It does not download every language up front.
|
||||||
|
|
||||||
English stays on its existing dedicated generator model path. Non-English languages use the selected language's ONNX Piper voices.
|
If the upstream Piper catalog is unavailable, already-installed local voices are used when available.
|
||||||
|
|
||||||
If the Piper catalog is unavailable, already-installed local voices can still be used.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Training Behavior
|
## Dataset Behavior
|
||||||
|
|
||||||
1. Enter the wake word
|
The first training run downloads and prepares missing training assets into `/data`, including:
|
||||||
2. Optionally test pronunciation
|
|
||||||
3. Optionally upload personal samples
|
|
||||||
4. Click `Start training`
|
|
||||||
5. Watch the popup console for:
|
|
||||||
- selected-language voice downloads when needed
|
|
||||||
- sample generation progress
|
|
||||||
- dataset setup
|
|
||||||
- training progress and completion
|
|
||||||
|
|
||||||
The `Open console` button lets you reopen the log window after closing it.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## First Run Notes
|
|
||||||
|
|
||||||
The first real training run may download large training assets into `/data`, such as:
|
|
||||||
|
|
||||||
- Piper voices for the selected language
|
- Piper voices for the selected language
|
||||||
- training datasets and background data
|
- negative datasets and background data
|
||||||
- Python training environment dependencies
|
- the Python training environment
|
||||||
|
- generated samples and augmented feature caches
|
||||||
|
|
||||||
These are reused later unless you delete `/data`.
|
After those assets are prepared, later runs reuse the local copies unless the mounted `/data` contents are deleted.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Firmware Flashing
|
||||||
|
|
||||||
|
The `Firmware` tab builds and flashes Tater firmware for supported ESPHome sats.
|
||||||
|
|
||||||
|
- Downloads the latest firmware YAML templates from `TaterTotterson/microWakeWords` on GitHub.
|
||||||
|
- Lets you choose `VoicePE` or `Satellite1`.
|
||||||
|
- Auto-detects ESPHome devices with mDNS when the container is running with host networking.
|
||||||
|
- Allows manual IP or hostname entry if discovery does not find the device.
|
||||||
|
- Saves firmware form values so you do not re-enter sounds and URLs every run.
|
||||||
|
- Lists locally trained wake words from `/data/trained_wake_words/` for easy model selection.
|
||||||
|
- Builds with ESPHome and flashes OTA.
|
||||||
|
- Streams ESPHome output in a colorized firmware console.
|
||||||
|
|
||||||
|
Firmware YAMLs are intentionally pulled from GitHub each time. There is no local fallback path in the trainer UI.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Output Files
|
## Output Files
|
||||||
|
|
||||||
Successful runs produce:
|
Successful runs produce timestamped training output folders such as:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
/data/output/<wake_word>.tflite
|
/data/output/<timestamp>-<wake_word>-<samples>-<steps>/<wake_word>.tflite
|
||||||
/data/output/<wake_word>.json
|
/data/output/<timestamp>-<wake_word>-<samples>-<steps>/<wake_word>.json
|
||||||
```
|
```
|
||||||
|
|
||||||
If those files already exist, the trainer creates timestamped backups before replacing them.
|
The trainer also syncs firmware-ready artifacts into:
|
||||||
|
|
||||||
|
```text
|
||||||
|
/data/trained_wake_words/<wake_word>.tflite
|
||||||
|
/data/trained_wake_words/<wake_word>.json
|
||||||
|
```
|
||||||
|
|
||||||
|
The firmware tab uses `/data/trained_wake_words/` to populate the wake-word dropdown.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Resetting Everything
|
## Resetting Everything
|
||||||
|
|
||||||
If you want a clean slate, stop the container and remove the contents of your mounted `/data` directory.
|
If you want a clean slate, stop the container and remove the contents of the mounted `/data` directory.
|
||||||
|
|
||||||
That will remove:
|
That removes:
|
||||||
|
|
||||||
- personal samples
|
- personal samples
|
||||||
|
- negative samples
|
||||||
|
- captured inbox clips
|
||||||
- downloaded Piper voices
|
- downloaded Piper voices
|
||||||
- cached datasets
|
- cached datasets
|
||||||
- training environments
|
- training environments
|
||||||
- trained models
|
- trained models
|
||||||
|
- firmware build caches
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Notes
|
## Important Notes
|
||||||
|
|
||||||
- browser microphone recording has been removed
|
- Personal samples are optional.
|
||||||
- personal samples are optional
|
- Negative samples are optional but useful for reducing false wakes.
|
||||||
- the server module is now `trainer_server.py`
|
- The UI server is `trainer_server.py`.
|
||||||
- the launcher script is now `run.sh`
|
- The launcher is `run.sh`.
|
||||||
|
- Firmware capture settings live on the ESPHome device and can be toggled from the device entities after flashing.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ parser.add_argument("--output-dir", type=str, help="Wake word output dir. Defaul
|
|||||||
# Personal inputs/outputs (NEW)
|
# Personal inputs/outputs (NEW)
|
||||||
parser.add_argument("--personal-dir", type=str, help="Personal WAV dir. Default: <data-dir>/personal_samples", required=False)
|
parser.add_argument("--personal-dir", type=str, help="Personal WAV dir. Default: <data-dir>/personal_samples", required=False)
|
||||||
parser.add_argument("--personal-output-dir", type=str, help="Personal features output dir. Default: <data-dir>/work/personal_augmented_features", required=False)
|
parser.add_argument("--personal-output-dir", type=str, help="Personal features output dir. Default: <data-dir>/work/personal_augmented_features", required=False)
|
||||||
|
parser.add_argument("--negative-dir", type=str, help="Reviewed negative WAV dir. Default: <data-dir>/negative_samples", required=False)
|
||||||
|
parser.add_argument("--negative-output-dir", type=str, help="Reviewed negative features output dir. Default: <data-dir>/work/reviewed_negative_features", required=False)
|
||||||
|
|
||||||
# Dataset dirs
|
# Dataset dirs
|
||||||
parser.add_argument("--mit-rirs-16k-dir", type=str, help="MIT RIR input directory. Default: <data-dir>/training_datasets/mit_rirs_16k", required=False)
|
parser.add_argument("--mit-rirs-16k-dir", type=str, help="MIT RIR input directory. Default: <data-dir>/training_datasets/mit_rirs_16k", required=False)
|
||||||
@@ -57,6 +59,17 @@ if not args.personal_output_dir:
|
|||||||
else:
|
else:
|
||||||
args.personal_output_dir = os.path.realpath(args.personal_output_dir)
|
args.personal_output_dir = os.path.realpath(args.personal_output_dir)
|
||||||
|
|
||||||
|
# Reviewed negative defaults
|
||||||
|
if not args.negative_dir:
|
||||||
|
args.negative_dir = os.path.join(args.data_dir, "negative_samples")
|
||||||
|
else:
|
||||||
|
args.negative_dir = os.path.realpath(args.negative_dir)
|
||||||
|
|
||||||
|
if not args.negative_output_dir:
|
||||||
|
args.negative_output_dir = os.path.join(work_dir, "reviewed_negative_features")
|
||||||
|
else:
|
||||||
|
args.negative_output_dir = os.path.realpath(args.negative_output_dir)
|
||||||
|
|
||||||
# Dataset defaults
|
# Dataset defaults
|
||||||
if not args.mit_rirs_16k_dir:
|
if not args.mit_rirs_16k_dir:
|
||||||
args.mit_rirs_16k_dir = os.path.join(args.data_dir, "training_datasets", "mit_rirs_16k")
|
args.mit_rirs_16k_dir = os.path.join(args.data_dir, "training_datasets", "mit_rirs_16k")
|
||||||
@@ -205,7 +218,7 @@ def bind_wav_generator(clips_obj: Clips, wav_dir: str):
|
|||||||
|
|
||||||
clips_obj.audio_generator = types.MethodType(audio_generator_from_wavs, clips_obj)
|
clips_obj.audio_generator = types.MethodType(audio_generator_from_wavs, clips_obj)
|
||||||
|
|
||||||
def generate_feature_set(input_wav_dir: str, out_root_dir: str, label: str):
|
def generate_feature_set(input_wav_dir: str, out_root_dir: str, label: str, *, remove_silence: bool = True):
|
||||||
files = glob.glob(os.path.join(input_wav_dir, "*.wav"))
|
files = glob.glob(os.path.join(input_wav_dir, "*.wav"))
|
||||||
if not files:
|
if not files:
|
||||||
print(f"ℹ️ No WAVs found for {label} in: {input_wav_dir} (skipping)")
|
print(f"ℹ️ No WAVs found for {label} in: {input_wav_dir} (skipping)")
|
||||||
@@ -218,7 +231,7 @@ def generate_feature_set(input_wav_dir: str, out_root_dir: str, label: str):
|
|||||||
input_directory=input_wav_dir,
|
input_directory=input_wav_dir,
|
||||||
file_pattern="*.wav",
|
file_pattern="*.wav",
|
||||||
max_clip_duration_s=5,
|
max_clip_duration_s=5,
|
||||||
remove_silence=True,
|
remove_silence=remove_silence,
|
||||||
random_split_seed=10,
|
random_split_seed=10,
|
||||||
split_count=0.1,
|
split_count=0.1,
|
||||||
)
|
)
|
||||||
@@ -263,9 +276,12 @@ def generate_feature_set(input_wav_dir: str, out_root_dir: str, label: str):
|
|||||||
# Wake word generated/TTS features (existing behavior)
|
# Wake word generated/TTS features (existing behavior)
|
||||||
generate_feature_set(args.input_dir, args.output_dir, "generated")
|
generate_feature_set(args.input_dir, args.output_dir, "generated")
|
||||||
|
|
||||||
# Personal features (NEW)
|
# Personal features
|
||||||
generate_feature_set(args.personal_dir, args.personal_output_dir, "personal")
|
generate_feature_set(args.personal_dir, args.personal_output_dir, "personal")
|
||||||
|
|
||||||
|
# Reviewed false-positive / hard-negative features
|
||||||
|
generate_feature_set(args.negative_dir, args.negative_output_dir, "reviewed negatives", remove_silence=False)
|
||||||
|
|
||||||
END_TIME = datetime.now(timezone.utc).replace(microsecond=0)
|
END_TIME = datetime.now(timezone.utc).replace(microsecond=0)
|
||||||
et = END_TIME - START_TIME
|
et = END_TIME - START_TIME
|
||||||
print(f"\n{'=' * 80}")
|
print(f"\n{'=' * 80}")
|
||||||
|
|||||||
@@ -111,6 +111,16 @@ else
|
|||||||
echo "ℹ️ No personal features found at ${PERSONAL_FEATURES_DIR}/training (continuing without personal weighting)"
|
echo "ℹ️ No personal features found at ${PERSONAL_FEATURES_DIR}/training (continuing without personal weighting)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Reviewed false-positive features are optional hard negatives.
|
||||||
|
REVIEWED_NEGATIVE_FEATURES_DIR="${WORK_DIR}/reviewed_negative_features"
|
||||||
|
HAS_REVIEWED_NEGATIVE="false"
|
||||||
|
if [ -d "${REVIEWED_NEGATIVE_FEATURES_DIR}/training" ] ; then
|
||||||
|
HAS_REVIEWED_NEGATIVE="true"
|
||||||
|
echo "✅ Found reviewed negative features: ${REVIEWED_NEGATIVE_FEATURES_DIR}/training (will weight as hard negatives)"
|
||||||
|
else
|
||||||
|
echo "ℹ️ No reviewed negative features found at ${REVIEWED_NEGATIVE_FEATURES_DIR}/training (continuing with stock negatives)"
|
||||||
|
fi
|
||||||
|
|
||||||
cd "${WORK_DIR}"
|
cd "${WORK_DIR}"
|
||||||
|
|
||||||
echo "===== Starting ${TRAINING_STEPS} training steps ====="
|
echo "===== Starting ${TRAINING_STEPS} training steps ====="
|
||||||
@@ -133,6 +143,7 @@ features:
|
|||||||
truth: true
|
truth: true
|
||||||
type: mmap
|
type: mmap
|
||||||
__PERSONAL_FEATURE_MARKER__
|
__PERSONAL_FEATURE_MARKER__
|
||||||
|
__REVIEWED_NEGATIVE_FEATURE_MARKER__
|
||||||
- features_dir: __NEG_SPEECH__
|
- features_dir: __NEG_SPEECH__
|
||||||
penalty_weight: 1.0
|
penalty_weight: 1.0
|
||||||
sampling_weight: 12.0
|
sampling_weight: 12.0
|
||||||
@@ -208,6 +219,22 @@ else
|
|||||||
sed -i -e "/__PERSONAL_FEATURE_MARKER__/d" "${YAML_PATH}"
|
sed -i -e "/__PERSONAL_FEATURE_MARKER__/d" "${YAML_PATH}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Insert/remove reviewed hard-negative block
|
||||||
|
if [ "${HAS_REVIEWED_NEGATIVE}" = "true" ]; then
|
||||||
|
reviewed_negative_block="$(cat <<EOF
|
||||||
|
- features_dir: ${REVIEWED_NEGATIVE_FEATURES_DIR}
|
||||||
|
penalty_weight: 1.25
|
||||||
|
sampling_weight: 8.0
|
||||||
|
truncation_strategy: random
|
||||||
|
truth: false
|
||||||
|
type: mmap
|
||||||
|
EOF
|
||||||
|
)"
|
||||||
|
perl -0777 -i -pe "s#__REVIEWED_NEGATIVE_FEATURE_MARKER__#${reviewed_negative_block}#g" "${YAML_PATH}"
|
||||||
|
else
|
||||||
|
sed -i -e "/__REVIEWED_NEGATIVE_FEATURE_MARKER__/d" "${YAML_PATH}"
|
||||||
|
fi
|
||||||
|
|
||||||
echo " Wrote training_parameters.yaml"
|
echo " Wrote training_parameters.yaml"
|
||||||
rm -rf "${WORK_DIR}/trained_models/wakeword"
|
rm -rf "${WORK_DIR}/trained_models/wakeword"
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|||||||
# System deps
|
# System deps
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
python3.12 python3.12-venv python3.12-dev python3-pip python-is-python3 \
|
python3.12 python3.12-venv python3.12-dev python3-pip python-is-python3 \
|
||||||
git wget curl unzip ca-certificates nano less \
|
git wget curl unzip patch ninja-build ca-certificates nano less \
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& mkdir -p /data
|
&& mkdir -p /data
|
||||||
|
|
||||||
|
|||||||
BIN
images/tater-repo-logo.png
Normal file
BIN
images/tater-repo-logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 590 KiB |
65
run.sh
65
run.sh
@@ -6,7 +6,7 @@ ROOTDIR="$(dirname "$(realpath "$0")")"
|
|||||||
# Training convention
|
# Training convention
|
||||||
DATA_DIR="${DATA_DIR:-/data}"
|
DATA_DIR="${DATA_DIR:-/data}"
|
||||||
HOST="${REC_HOST:-0.0.0.0}"
|
HOST="${REC_HOST:-0.0.0.0}"
|
||||||
PORT="${REC_PORT:-8888}"
|
PORT="${REC_PORT:-8789}"
|
||||||
|
|
||||||
# Keep trainer UI deps separate from the training venv
|
# Keep trainer UI deps separate from the training venv
|
||||||
VENV_DIR="${DATA_DIR}/.recorder-venv"
|
VENV_DIR="${DATA_DIR}/.recorder-venv"
|
||||||
@@ -17,6 +17,7 @@ PIN_FILE="${VENV_DIR}/.pinned_installed"
|
|||||||
FASTAPI_VERSION="${REC_FASTAPI_VERSION:-0.115.6}"
|
FASTAPI_VERSION="${REC_FASTAPI_VERSION:-0.115.6}"
|
||||||
UVICORN_VERSION="${REC_UVICORN_VERSION:-0.30.6}"
|
UVICORN_VERSION="${REC_UVICORN_VERSION:-0.30.6}"
|
||||||
PY_MULTIPART_VERSION="${REC_PY_MULTIPART_VERSION:-0.0.9}"
|
PY_MULTIPART_VERSION="${REC_PY_MULTIPART_VERSION:-0.0.9}"
|
||||||
|
ESPHOME_VERSION="${REC_ESPHOME_VERSION:-2026.5.1}"
|
||||||
|
|
||||||
echo "microWakeWord Trainer UI (Docker)"
|
echo "microWakeWord Trainer UI (Docker)"
|
||||||
echo "-> ROOTDIR: ${ROOTDIR}"
|
echo "-> ROOTDIR: ${ROOTDIR}"
|
||||||
@@ -25,6 +26,16 @@ echo "-> URL: http://localhost:${PORT}/"
|
|||||||
|
|
||||||
mkdir -p "${DATA_DIR}"
|
mkdir -p "${DATA_DIR}"
|
||||||
|
|
||||||
|
install_ui_deps() {
|
||||||
|
${PIP} install \
|
||||||
|
"fastapi==${FASTAPI_VERSION}" \
|
||||||
|
"uvicorn[standard]==${UVICORN_VERSION}" \
|
||||||
|
"python-multipart==${PY_MULTIPART_VERSION}" \
|
||||||
|
"esphome==${ESPHOME_VERSION}" \
|
||||||
|
"silero-vad>=5.0.0" \
|
||||||
|
"numpy>=1.24.0"
|
||||||
|
}
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Trainer UI venv (separate)
|
# Trainer UI venv (separate)
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
@@ -39,21 +50,63 @@ source "${VENV_DIR}/bin/activate"
|
|||||||
if [[ ! -f "${PIN_FILE}" ]]; then
|
if [[ ! -f "${PIN_FILE}" ]]; then
|
||||||
echo "Installing pinned trainer UI deps"
|
echo "Installing pinned trainer UI deps"
|
||||||
${PIP} install -U pip setuptools wheel
|
${PIP} install -U pip setuptools wheel
|
||||||
${PIP} install \
|
install_ui_deps
|
||||||
"fastapi==${FASTAPI_VERSION}" \
|
|
||||||
"uvicorn[standard]==${UVICORN_VERSION}" \
|
|
||||||
"python-multipart==${PY_MULTIPART_VERSION}"
|
|
||||||
touch "${PIN_FILE}"
|
touch "${PIN_FILE}"
|
||||||
else
|
else
|
||||||
echo "Reusing existing trainer UI venv (no upgrades)"
|
echo "Reusing existing trainer UI venv (no upgrades)"
|
||||||
fi
|
if ! "${PY}" - "${FASTAPI_VERSION}" "${UVICORN_VERSION}" "${PY_MULTIPART_VERSION}" "${ESPHOME_VERSION}" <<'PY' >/dev/null 2>&1
|
||||||
|
import importlib.metadata as md
|
||||||
|
import sys
|
||||||
|
|
||||||
|
fastapi_version, uvicorn_version, multipart_version, esphome_version = sys.argv[1:5]
|
||||||
|
|
||||||
|
def version_tuple(value):
|
||||||
|
parts = []
|
||||||
|
for token in str(value).replace("-", ".").split("."):
|
||||||
|
if token.isdigit():
|
||||||
|
parts.append(int(token))
|
||||||
|
else:
|
||||||
|
digits = "".join(ch for ch in token if ch.isdigit())
|
||||||
|
if digits:
|
||||||
|
parts.append(int(digits))
|
||||||
|
break
|
||||||
|
return tuple(parts)
|
||||||
|
|
||||||
|
exact = {
|
||||||
|
"fastapi": fastapi_version,
|
||||||
|
"uvicorn": uvicorn_version,
|
||||||
|
"python-multipart": multipart_version,
|
||||||
|
"esphome": esphome_version,
|
||||||
|
}
|
||||||
|
minimum = {
|
||||||
|
"silero-vad": "5.0.0",
|
||||||
|
"numpy": "1.24.0",
|
||||||
|
}
|
||||||
|
present = ("torch", "zeroconf")
|
||||||
|
|
||||||
|
for package, expected in exact.items():
|
||||||
|
if md.version(package) != expected:
|
||||||
|
raise SystemExit(1)
|
||||||
|
for package, minimum_version in minimum.items():
|
||||||
|
if version_tuple(md.version(package)) < version_tuple(minimum_version):
|
||||||
|
raise SystemExit(1)
|
||||||
|
for package in present:
|
||||||
|
md.version(package)
|
||||||
|
PY
|
||||||
|
then
|
||||||
|
echo "UI dependencies missing or stale; installing recorder dependencies"
|
||||||
|
install_ui_deps
|
||||||
|
fi
|
||||||
|
fi
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Trainer server env
|
# Trainer server env
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
export DATA_DIR="${DATA_DIR}"
|
export DATA_DIR="${DATA_DIR}"
|
||||||
export STATIC_DIR="${ROOTDIR}/static"
|
export STATIC_DIR="${ROOTDIR}/static"
|
||||||
export PERSONAL_DIR="${DATA_DIR}/personal_samples"
|
export PERSONAL_DIR="${DATA_DIR}/personal_samples"
|
||||||
|
export CAPTURED_DIR="${DATA_DIR}/captured_audio"
|
||||||
|
export NEGATIVE_DIR="${DATA_DIR}/negative_samples"
|
||||||
|
export TRAINED_WAKE_WORDS_DIR="${DATA_DIR}/trained_wake_words"
|
||||||
|
|
||||||
# IMPORTANT: leave training venv creation to /api/train inside trainer_server.py
|
# IMPORTANT: leave training venv creation to /api/train inside trainer_server.py
|
||||||
# but still set TRAIN_CMD so the server knows how to invoke training once ready
|
# but still set TRAIN_CMD so the server knows how to invoke training once ready
|
||||||
|
|||||||
3039
static/index.html
3039
static/index.html
File diff suppressed because it is too large
Load Diff
@@ -150,6 +150,7 @@ if ${CLEANUP_WORK_DIR} ; then
|
|||||||
"${DATA_DIR}/work/wake_word_samples" \
|
"${DATA_DIR}/work/wake_word_samples" \
|
||||||
"${DATA_DIR}/work/wake_word_samples_augmented" \
|
"${DATA_DIR}/work/wake_word_samples_augmented" \
|
||||||
"${DATA_DIR}/work/personal_augmented_features" \
|
"${DATA_DIR}/work/personal_augmented_features" \
|
||||||
|
"${DATA_DIR}/work/reviewed_negative_features" \
|
||||||
"${DATA_DIR}/work/last_wake_word" || :
|
"${DATA_DIR}/work/last_wake_word" || :
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
2511
trainer_server.py
2511
trainer_server.py
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user