diff --git a/cli/run_generator_with_progress.py b/cli/run_generator_with_progress.py new file mode 100644 index 0000000..61d0cf2 --- /dev/null +++ b/cli/run_generator_with_progress.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +import argparse +import queue +import subprocess +import sys +import threading +from pathlib import Path + + +def _model_args(generator_args): + values = [] + for idx, arg in enumerate(generator_args): + if arg == "--model" and idx + 1 < len(generator_args): + values.append(generator_args[idx + 1]) + return values + + +def _is_onnx_run(generator_args): + return any(str(value).endswith(".onnx") for value in _model_args(generator_args)) + + +def _format_line(line): + if line.startswith("DEBUG:piper.voice:"): + return None + for prefix in ("DEBUG:__main__:", "INFO:__main__:", "WARNING:__main__:", "ERROR:__main__:"): + if line.startswith(prefix): + return " " + line[len(prefix):].strip() + return line + + +def _reader(stdout, sink): + try: + for raw in stdout: + sink.put(raw.rstrip("\n")) + finally: + sink.put(None) + + +def _progress_step(max_samples): + if max_samples <= 20: + return 1 + if max_samples <= 100: + return 5 + return 10 + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--generator", required=True) + parser.add_argument("--output-dir", required=True) + parser.add_argument("--max-samples", required=True, type=int) + parser.add_argument("generator_args", nargs=argparse.REMAINDER) + args = parser.parse_args() + + generator_args = list(args.generator_args) + if generator_args and generator_args[0] == "--": + generator_args = generator_args[1:] + + cmd = [sys.executable, args.generator, *generator_args] + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + assert proc.stdout is not None + + line_queue = queue.Queue() + reader = threading.Thread(target=_reader, args=(proc.stdout, line_queue), daemon=True) + reader.start() + + output_dir = Path(args.output_dir) + use_sample_progress = _is_onnx_run(generator_args) + step = _progress_step(args.max_samples) + last_reported = 0 + stream_done = False + + while not stream_done or proc.poll() is None: + try: + line = line_queue.get(timeout=0.2) + except queue.Empty: + line = None + + if line is None: + if not stream_done and not line_queue.empty(): + continue + stream_done = proc.poll() is not None or stream_done + else: + formatted = _format_line(line) + if formatted: + print(formatted, flush=True) + + if use_sample_progress: + current = len(list(output_dir.glob("*.wav"))) + should_report = current > last_reported and ( + current >= args.max_samples + or current - last_reported >= step + ) + if should_report: + print(f" Generated {current}/{args.max_samples} samples...", flush=True) + last_reported = current + + rc = proc.wait() + final_count = len(list(output_dir.glob("*.wav"))) if use_sample_progress else 0 + if use_sample_progress and final_count > last_reported: + print(f" Generated {final_count}/{args.max_samples} samples...", flush=True) + return rc + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cli/wake_word_sample_generator b/cli/wake_word_sample_generator index 4166e75..6dd4be0 100755 --- a/cli/wake_word_sample_generator +++ b/cli/wake_word_sample_generator @@ -138,11 +138,16 @@ export GRPC_VERBOSITY=ERROR echo " Generating samples" rm -rf "${SAMPLES_DIR}" || : mkdir -p "${SAMPLES_DIR}" || : -"${PSG}/generate_samples.py" "${WAKE_WORD}" \ +python "${PROGDIR}/run_generator_with_progress.py" \ + --generator "${PSG}/generate_samples.py" \ + --output-dir "${SAMPLES_DIR}" \ + --max-samples ${SAMPLES} \ + -- \ + "${WAKE_WORD}" \ "${MODEL_ARGS[@]}" \ --max-samples ${SAMPLES} \ --batch-size ${BATCH_SIZE} \ - --output-dir "${SAMPLES_DIR}" 2>&1 | sed -r -e "s/(DEBUG|INFO):__main__:/ /g" + --output-dir "${SAMPLES_DIR}" generated_files=$(find "${SAMPLES_DIR}" -name '*.wav' | wc -l) if [ "${generated_files}" -ne "${SAMPLES}" ] ; then diff --git a/dockerfile b/dockerfile index 5778ead..bf3bfeb 100644 --- a/dockerfile +++ b/dockerfile @@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && rm -rf /var/lib/apt/lists/* \ && mkdir -p /data -# Recorder port +# Trainer UI port EXPOSE 8789 # Script root @@ -23,7 +23,7 @@ COPY --chown=root:root --chmod=0755 .bashrc /root/ COPY --chown=root:root --chmod=0755 \ train_wake_word \ run_recorder.sh \ - recorder_server.py \ + trainer_server.py \ requirements.txt \ /root/mww-scripts/ @@ -33,8 +33,8 @@ COPY --chown=root:root cli/ /root/mww-scripts/cli/ # Make all CLI scripts executable (avoids "Permission denied") RUN chmod -R a+x /root/mww-scripts/cli -# Static UI for recorder +# Static UI for trainer COPY --chown=root:root --chmod=0644 static/index.html /root/mww-scripts/static/index.html -# recorder server +# trainer server CMD ["/bin/bash", "-lc", "/root/mww-scripts/run_recorder.sh"] diff --git a/run_recorder.sh b/run_recorder.sh index 9ac94c5..75d1ec9 100644 --- a/run_recorder.sh +++ b/run_recorder.sh @@ -18,7 +18,7 @@ FASTAPI_VERSION="${REC_FASTAPI_VERSION:-0.115.6}" UVICORN_VERSION="${REC_UVICORN_VERSION:-0.30.6}" PY_MULTIPART_VERSION="${REC_PY_MULTIPART_VERSION:-0.0.9}" -echo "microWakeWord Recorder (Docker)" +echo "microWakeWord Trainer UI (Docker)" echo "-> ROOTDIR: ${ROOTDIR}" echo "-> DATA_DIR: ${DATA_DIR}" echo "-> URL: http://localhost:${PORT}/" @@ -26,10 +26,10 @@ echo "-> URL: http://localhost:${PORT}/" mkdir -p "${DATA_DIR}" # ----------------------------- -# Recorder venv (separate) +# Trainer UI venv (separate) # ----------------------------- if [[ ! -x "${PY}" ]]; then - echo "Creating recorder venv: ${VENV_DIR}" + echo "Creating trainer UI venv: ${VENV_DIR}" python3 -m venv "${VENV_DIR}" fi @@ -37,7 +37,7 @@ fi source "${VENV_DIR}/bin/activate" if [[ ! -f "${PIN_FILE}" ]]; then - echo "Installing pinned recorder deps" + echo "Installing pinned trainer UI deps" ${PIP} install -U pip setuptools wheel ${PIP} install \ "fastapi==${FASTAPI_VERSION}" \ @@ -45,20 +45,20 @@ if [[ ! -f "${PIN_FILE}" ]]; then "python-multipart==${PY_MULTIPART_VERSION}" touch "${PIN_FILE}" else - echo "Reusing existing recorder venv (no upgrades)" + echo "Reusing existing trainer UI venv (no upgrades)" fi # ----------------------------- -# Recorder server env +# Trainer server env # ----------------------------- export DATA_DIR="${DATA_DIR}" export STATIC_DIR="${ROOTDIR}/static" export PERSONAL_DIR="${DATA_DIR}/personal_samples" -# IMPORTANT: leave training venv creation to /api/train inside recorder_server.py +# IMPORTANT: leave training venv creation to /api/train inside trainer_server.py # but still set TRAIN_CMD so the server knows how to invoke training once ready export TRAIN_CMD="source '${DATA_DIR}/.venv/bin/activate' && train_wake_word --data-dir='${DATA_DIR}'" echo "Launching uvicorn on ${HOST}:${PORT}" cd "${ROOTDIR}" -exec "${VENV_DIR}/bin/uvicorn" recorder_server:app --host "${HOST}" --port "${PORT}" \ No newline at end of file +exec "${VENV_DIR}/bin/uvicorn" trainer_server:app --host "${HOST}" --port "${PORT}" diff --git a/static/index.html b/static/index.html index 4b57012..d0a668a 100644 --- a/static/index.html +++ b/static/index.html @@ -3,71 +3,71 @@
-Enter a wake word, test TTS pronunciation, then record takes. Recording starts when you speak and stops after silence.
+Start a session, upload your own recorded voice samples, and the app will validate or convert them into the training format used by the existing pipeline.
Personal samples are optional. You can train with TTS only, or upload your own audio here and it will be saved into personal_samples/ as 16 kHz mono 16-bit PCM WAV.
WAV, MP3, M4A, FLAC, OGG, AAC, OPUS, and WEBM are all fine when ffmpeg is available. Files already in the correct format are kept as-is.
+- Speaker: - / - - Waiting -
+personal_samples/.- Take: 0 / 10 - Not recording -
+(no training started)+