mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Merge pull request #38 from joopdo/main
Add --language flag for non-English TTS voice support (Dutch)
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
|||||||
personal_samples/*
|
personal_samples/*
|
||||||
|
data/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
@@ -157,6 +157,10 @@ pip_install -e "${PSG}"
|
|||||||
git -C tools/piper-sample-generator clean -fd &>/dev/null
|
git -C tools/piper-sample-generator clean -fd &>/dev/null
|
||||||
|
|
||||||
MODELS_DIR="${PSG}/models"
|
MODELS_DIR="${PSG}/models"
|
||||||
|
VOICES_DIR="${PSG}/voices"
|
||||||
|
mkdir -p "${VOICES_DIR}"
|
||||||
|
|
||||||
|
# --- English generator model (multi-speaker, used with --language=en) ---
|
||||||
MODEL_NAME="en_US-libritts_r-medium.pt"
|
MODEL_NAME="en_US-libritts_r-medium.pt"
|
||||||
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
|
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
|
||||||
MODEL_URL="https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/${MODEL_NAME}"
|
MODEL_URL="https://github.com/rhasspy/piper-sample-generator/releases/download/v2.0.0/${MODEL_NAME}"
|
||||||
@@ -170,6 +174,30 @@ if [ ! -f "${MODEL_FILE}.json" ] ; then
|
|||||||
curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json"
|
curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# --- Dutch ONNX voices (single-speaker, used with --language=nl) ---
|
||||||
|
# Working Dutch voices: pim, ronnie (nl_NL) and nathalie (nl_BE).
|
||||||
|
# nl_NL-mls-medium is intentionally excluded (known Piper issue: outputs gibberish).
|
||||||
|
HF_VOICES="https://huggingface.co/rhasspy/piper-voices/resolve/main"
|
||||||
|
declare -a NL_VOICES=(
|
||||||
|
"nl/nl_NL/pim/medium/nl_NL-pim-medium"
|
||||||
|
"nl/nl_NL/ronnie/medium/nl_NL-ronnie-medium"
|
||||||
|
"nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium"
|
||||||
|
)
|
||||||
|
echo " ===== Checking Dutch Piper voices ====="
|
||||||
|
for voice_path in "${NL_VOICES[@]}" ; do
|
||||||
|
voice_name="$(basename "${voice_path}")"
|
||||||
|
onnx_file="${VOICES_DIR}/${voice_name}.onnx"
|
||||||
|
json_file="${VOICES_DIR}/${voice_name}.onnx.json"
|
||||||
|
if [ ! -f "${onnx_file}" ] ; then
|
||||||
|
echo " Downloading ${voice_name}.onnx"
|
||||||
|
curl -sfL "${HF_VOICES}/${voice_path}.onnx?download=true" -o "${onnx_file}"
|
||||||
|
fi
|
||||||
|
if [ ! -f "${json_file}" ] ; then
|
||||||
|
echo " Downloading ${voice_name}.onnx.json"
|
||||||
|
curl -sfL "${HF_VOICES}/${voice_path}.onnx.json?download=true" -o "${json_file}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu=""
|
${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu=""
|
||||||
echo " ===== Installing onnxruntime${onnxgpu} ====="
|
echo " ===== Installing onnxruntime${onnxgpu} ====="
|
||||||
pip_install "onnxruntime${onnxgpu}>=1.16.0"
|
pip_install "onnxruntime${onnxgpu}>=1.16.0"
|
||||||
|
|||||||
@@ -11,12 +11,14 @@ fi
|
|||||||
DEFAULT_SAMPLES=50000
|
DEFAULT_SAMPLES=50000
|
||||||
DEFAULT_BATCH_SIZE=100
|
DEFAULT_BATCH_SIZE=100
|
||||||
DEFAULT_TRAINING_STEPS=40000
|
DEFAULT_TRAINING_STEPS=40000
|
||||||
|
DEFAULT_LANGUAGE=en
|
||||||
|
|
||||||
[ -f "${DATA_DIR}/.defaults.env" ] && source "${DATA_DIR}/.defaults.env" || :
|
[ -f "${DATA_DIR}/.defaults.env" ] && source "${DATA_DIR}/.defaults.env" || :
|
||||||
|
|
||||||
: "${SAMPLES:=${DEFAULT_SAMPLES}}"
|
: "${SAMPLES:=${DEFAULT_SAMPLES}}"
|
||||||
: "${BATCH_SIZE:=${DEFAULT_BATCH_SIZE}}"
|
: "${BATCH_SIZE:=${DEFAULT_BATCH_SIZE}}"
|
||||||
: "${TRAINING_STEPS:=${DEFAULT_TRAINING_STEPS}}"
|
: "${TRAINING_STEPS:=${DEFAULT_TRAINING_STEPS}}"
|
||||||
|
: "${LANGUAGE:=${DEFAULT_LANGUAGE}}"
|
||||||
: "${CLEANUP_WORK_DIR:=false}"
|
: "${CLEANUP_WORK_DIR:=false}"
|
||||||
: "${CLEANUP_ARCHIVES:=false}"
|
: "${CLEANUP_ARCHIVES:=false}"
|
||||||
: "${CLEANUP_INTERMEDIATE_FILES:=false}"
|
: "${CLEANUP_INTERMEDIATE_FILES:=false}"
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ set -e
|
|||||||
PROGPATH=$(realpath "$0")
|
PROGPATH=$(realpath "$0")
|
||||||
PROGDIR=$(dirname "${PROGPATH}")
|
PROGDIR=$(dirname "${PROGPATH}")
|
||||||
|
|
||||||
KNOWN_ARGS=( samples batch-size data-dir )
|
KNOWN_ARGS=( samples batch-size data-dir language )
|
||||||
source "${PROGDIR}/shell.functions"
|
source "${PROGDIR}/shell.functions"
|
||||||
WAKE_WORD="${POSITIONAL_ARGS[0]}"
|
WAKE_WORD="${POSITIONAL_ARGS[0]}"
|
||||||
|
|
||||||
@@ -15,7 +15,8 @@ fi
|
|||||||
|
|
||||||
if [ "${HELP}" == "true" ] || [ -z "${WAKE_WORD}" ] ; then
|
if [ "${HELP}" == "true" ] || [ -z "${WAKE_WORD}" ] ; then
|
||||||
cat <<EOF >&2
|
cat <<EOF >&2
|
||||||
Usage: $0 [ --samples=<samples> ] [ --batch-size=<batch_size> ] <wake_word>
|
Usage: $0 [ --samples=<samples> ] [ --batch-size=<batch_size> ]
|
||||||
|
[ --language=<lang> ] <wake_word>
|
||||||
|
|
||||||
--samples: The number of samples to generate for the wake word.
|
--samples: The number of samples to generate for the wake word.
|
||||||
Default: ${DEFAULT_SAMPLES}
|
Default: ${DEFAULT_SAMPLES}
|
||||||
@@ -24,6 +25,12 @@ Usage: $0 [ --samples=<samples> ] [ --batch-size=<batch_size> ] <wake_word>
|
|||||||
samples, the more memory is needed.
|
samples, the more memory is needed.
|
||||||
Default: ${DEFAULT_BATCH_SIZE}
|
Default: ${DEFAULT_BATCH_SIZE}
|
||||||
|
|
||||||
|
--language: Language for TTS voice selection.
|
||||||
|
"en" uses the multi-speaker LibriTTS-R generator.
|
||||||
|
Other languages (e.g. "nl") use single-speaker ONNX
|
||||||
|
voices and cycle between them for variety.
|
||||||
|
Default: ${DEFAULT_LANGUAGE}
|
||||||
|
|
||||||
<wake_word> The word to generate samples for.
|
<wake_word> The word to generate samples for.
|
||||||
Required.
|
Required.
|
||||||
|
|
||||||
@@ -40,21 +47,60 @@ cd "${WORK_DIR}"
|
|||||||
|
|
||||||
PSG="${DATA_DIR}/tools/piper-sample-generator"
|
PSG="${DATA_DIR}/tools/piper-sample-generator"
|
||||||
MODELS_DIR="${PSG}/models"
|
MODELS_DIR="${PSG}/models"
|
||||||
MODEL_NAME=en_US-libritts_r-medium.pt
|
VOICES_DIR="${PSG}/voices"
|
||||||
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
|
|
||||||
SAMPLES_DIR="${WORK_DIR}/wake_word_samples"
|
SAMPLES_DIR="${WORK_DIR}/wake_word_samples"
|
||||||
|
|
||||||
mkdir -p "${SAMPLES_DIR}" || :
|
mkdir -p "${SAMPLES_DIR}" || :
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Build the --model argument(s) based on language
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
declare -a MODEL_ARGS=()
|
||||||
|
MODEL_TAG=""
|
||||||
|
|
||||||
|
if [ "${LANGUAGE}" == "en" ] ; then
|
||||||
|
# English: use the multi-speaker LibriTTS-R generator (.pt)
|
||||||
|
MODEL_NAME="en_US-libritts_r-medium.pt"
|
||||||
|
MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}"
|
||||||
|
if [ ! -f "${MODEL_FILE}" ] ; then
|
||||||
|
echo "ERROR: English model ${MODEL_FILE} not found. Run setup_python_venv first." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
MODEL_ARGS=( --model "${MODEL_FILE}" )
|
||||||
|
MODEL_TAG="${MODEL_NAME}"
|
||||||
|
else
|
||||||
|
# Non-English: find all ONNX voices matching the language prefix
|
||||||
|
# e.g. LANGUAGE=nl matches nl_NL-pim-medium.onnx, nl_BE-nathalie-medium.onnx, etc.
|
||||||
|
shopt -s nullglob
|
||||||
|
voice_files=( "${VOICES_DIR}/${LANGUAGE}"_*.onnx )
|
||||||
|
shopt -u nullglob
|
||||||
|
|
||||||
|
if [ ${#voice_files[@]} -eq 0 ] ; then
|
||||||
|
echo "ERROR: No ONNX voice files found for language '${LANGUAGE}' in ${VOICES_DIR}/" >&2
|
||||||
|
echo " Expected files matching: ${LANGUAGE}_*.onnx" >&2
|
||||||
|
echo " Run setup_python_venv to download voice models." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " Using ${#voice_files[@]} voice(s) for language '${LANGUAGE}':"
|
||||||
|
MODEL_TAG="${LANGUAGE}"
|
||||||
|
for vf in "${voice_files[@]}" ; do
|
||||||
|
vname="$(basename "${vf}")"
|
||||||
|
echo " - ${vname}"
|
||||||
|
MODEL_ARGS+=( --model "${vf}" )
|
||||||
|
MODEL_TAG="${MODEL_TAG}+${vname}"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
REGENERATE=false
|
REGENERATE=false
|
||||||
|
|
||||||
if [ "${SAMPLES}" -eq 1 ] ; then
|
if [ "${SAMPLES}" -eq 1 ] ; then
|
||||||
echo "===== Generating ${SAMPLES} sample of '${WAKE_WORD}' ====="
|
echo "===== Generating ${SAMPLES} sample of '${WAKE_WORD}' (language=${LANGUAGE}) ====="
|
||||||
wake_word_filename="${WAKE_WORD//[ \`~\!\$&*\(\)\{\}\[\]\|\;\'\"<>.?\/]/_}"
|
wake_word_filename="${WAKE_WORD//[ \`~\!@#\$%^&*\(\)\{\}\[\]\|\;\'\"<>.?\/]/_}"
|
||||||
|
|
||||||
mkdir -p "${WORK_DIR}/test_sample" || :
|
mkdir -p "${WORK_DIR}/test_sample" || :
|
||||||
"${PSG}/generate_samples.py" "${WAKE_WORD}" \
|
"${PSG}/generate_samples.py" "${WAKE_WORD}" \
|
||||||
--model "${MODEL_FILE}" \
|
"${MODEL_ARGS[@]}" \
|
||||||
--max-samples ${SAMPLES} \
|
--max-samples ${SAMPLES} \
|
||||||
--batch-size ${BATCH_SIZE} \
|
--batch-size ${BATCH_SIZE} \
|
||||||
--output-dir "${WORK_DIR}/test_sample" \
|
--output-dir "${WORK_DIR}/test_sample" \
|
||||||
@@ -65,9 +111,9 @@ if [ "${SAMPLES}" -eq 1 ] ; then
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
grep -q "${WAKE_WORD}:${SAMPLES}:${MODEL_NAME}" "${WORK_DIR}/last_wake_word" &>/dev/null || REGENERATE=true
|
grep -q "${WAKE_WORD}:${SAMPLES}:${MODEL_TAG}" "${WORK_DIR}/last_wake_word" &>/dev/null || REGENERATE=true
|
||||||
|
|
||||||
# Double check that the number of existing samples matches SAMPLES"
|
# Double check that the number of existing samples matches SAMPLES
|
||||||
existing_samples=$(find "${SAMPLES_DIR}" -name '*.wav' | wc -l)
|
existing_samples=$(find "${SAMPLES_DIR}" -name '*.wav' | wc -l)
|
||||||
[ "${existing_samples}" -eq "${SAMPLES}" ] || REGENERATE=true
|
[ "${existing_samples}" -eq "${SAMPLES}" ] || REGENERATE=true
|
||||||
|
|
||||||
@@ -79,7 +125,7 @@ if ! ${REGENERATE} ; then
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo -e "\n===== Generating ${SAMPLES} wake word samples in batches of ${BATCH_SIZE} ====="
|
echo -e "\n===== Generating ${SAMPLES} wake word samples in batches of ${BATCH_SIZE} (language=${LANGUAGE}) ====="
|
||||||
export TF_CPP_MIN_LOG_LEVEL=9
|
export TF_CPP_MIN_LOG_LEVEL=9
|
||||||
export TF_FORCE_GPU_ALLOW_GROWTH=true
|
export TF_FORCE_GPU_ALLOW_GROWTH=true
|
||||||
export TF_GPU_ALLOCATOR=cuda_malloc_async
|
export TF_GPU_ALLOCATOR=cuda_malloc_async
|
||||||
@@ -93,7 +139,7 @@ echo " Generating samples"
|
|||||||
rm -rf "${SAMPLES_DIR}" || :
|
rm -rf "${SAMPLES_DIR}" || :
|
||||||
mkdir -p "${SAMPLES_DIR}" || :
|
mkdir -p "${SAMPLES_DIR}" || :
|
||||||
"${PSG}/generate_samples.py" "${WAKE_WORD}" \
|
"${PSG}/generate_samples.py" "${WAKE_WORD}" \
|
||||||
--model "${MODEL_FILE}" \
|
"${MODEL_ARGS[@]}" \
|
||||||
--max-samples ${SAMPLES} \
|
--max-samples ${SAMPLES} \
|
||||||
--batch-size ${BATCH_SIZE} \
|
--batch-size ${BATCH_SIZE} \
|
||||||
--output-dir "${SAMPLES_DIR}" 2>&1 | sed -r -e "s/(DEBUG|INFO):__main__:/ /g"
|
--output-dir "${SAMPLES_DIR}" 2>&1 | sed -r -e "s/(DEBUG|INFO):__main__:/ /g"
|
||||||
@@ -103,8 +149,7 @@ if [ "${generated_files}" -ne "${SAMPLES}" ] ; then
|
|||||||
echo "ERROR: only generated ${generated_files} files" >&2
|
echo "ERROR: only generated ${generated_files} files" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
END_TS=$(date +%s.%N)
|
echo "${WAKE_WORD}:${SAMPLES}:${MODEL_TAG}" > "${WORK_DIR}/last_wake_word"
|
||||||
echo "${WAKE_WORD}:${SAMPLES}:${MODEL_NAME}" > "${WORK_DIR}/last_wake_word"
|
|
||||||
echo
|
echo
|
||||||
END_TS=$EPOCHSECONDS
|
END_TS=$EPOCHSECONDS
|
||||||
print_elapsed_time "${START_TS}" "${END_TS}" "Generated ${SAMPLES} wake word samples."
|
print_elapsed_time "${START_TS}" "${END_TS}" "Generated ${SAMPLES} wake word samples."
|
||||||
|
|||||||
@@ -35,6 +35,8 @@ TRAIN_CMD = os.environ.get(
|
|||||||
f"source '{DATA_DIR}/.venv/bin/activate' && train_wake_word --data-dir '{DATA_DIR}'"
|
f"source '{DATA_DIR}/.venv/bin/activate' && train_wake_word --data-dir '{DATA_DIR}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
DEFAULT_LANGUAGE = os.environ.get("MWW_LANGUAGE", "en")
|
||||||
|
|
||||||
TAKES_PER_SPEAKER_DEFAULT = int(os.environ.get("REC_TAKES_PER_SPEAKER", "10"))
|
TAKES_PER_SPEAKER_DEFAULT = int(os.environ.get("REC_TAKES_PER_SPEAKER", "10"))
|
||||||
SPEAKERS_TOTAL_DEFAULT = int(os.environ.get("REC_SPEAKERS_TOTAL", "1"))
|
SPEAKERS_TOTAL_DEFAULT = int(os.environ.get("REC_SPEAKERS_TOTAL", "1"))
|
||||||
|
|
||||||
@@ -60,6 +62,7 @@ def safe_name(raw: str) -> str:
|
|||||||
STATE: Dict[str, Any] = {
|
STATE: Dict[str, Any] = {
|
||||||
"raw_phrase": None,
|
"raw_phrase": None,
|
||||||
"safe_word": None,
|
"safe_word": None,
|
||||||
|
"language": DEFAULT_LANGUAGE,
|
||||||
|
|
||||||
"speakers_total": SPEAKERS_TOTAL_DEFAULT,
|
"speakers_total": SPEAKERS_TOTAL_DEFAULT,
|
||||||
"takes_per_speaker": TAKES_PER_SPEAKER_DEFAULT,
|
"takes_per_speaker": TAKES_PER_SPEAKER_DEFAULT,
|
||||||
@@ -372,6 +375,7 @@ def _normalize_output_artifacts(safe_word: str, log_path: Path) -> None:
|
|||||||
def _run_training_background(safe_word: str, allow_no_personal: bool):
|
def _run_training_background(safe_word: str, allow_no_personal: bool):
|
||||||
with STATE_LOCK:
|
with STATE_LOCK:
|
||||||
raw_phrase = STATE.get("raw_phrase") or ""
|
raw_phrase = STATE.get("raw_phrase") or ""
|
||||||
|
language = STATE.get("language") or DEFAULT_LANGUAGE
|
||||||
|
|
||||||
wake_word_title = _title_from_phrase(raw_phrase)
|
wake_word_title = _title_from_phrase(raw_phrase)
|
||||||
|
|
||||||
@@ -403,9 +407,9 @@ def _run_training_background(safe_word: str, allow_no_personal: bool):
|
|||||||
_ensure_training_datasets(log_path)
|
_ensure_training_datasets(log_path)
|
||||||
|
|
||||||
if wake_word_title:
|
if wake_word_title:
|
||||||
cmd_str = f"{TRAIN_CMD} '{safe_word}' '{wake_word_title}'"
|
cmd_str = f"{TRAIN_CMD} --language='{language}' '{safe_word}' '{wake_word_title}'"
|
||||||
else:
|
else:
|
||||||
cmd_str = f"{TRAIN_CMD} '{safe_word}'"
|
cmd_str = f"{TRAIN_CMD} --language='{language}' '{safe_word}'"
|
||||||
|
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env["MWW_ALLOW_NO_PERSONAL"] = "true" if allow_no_personal else "false"
|
env["MWW_ALLOW_NO_PERSONAL"] = "true" if allow_no_personal else "false"
|
||||||
@@ -470,6 +474,7 @@ def start_session(payload: Dict[str, Any]):
|
|||||||
|
|
||||||
speakers_total = int(payload.get("speakers_total") or SPEAKERS_TOTAL_DEFAULT)
|
speakers_total = int(payload.get("speakers_total") or SPEAKERS_TOTAL_DEFAULT)
|
||||||
takes_per_speaker = int(payload.get("takes_per_speaker") or TAKES_PER_SPEAKER_DEFAULT)
|
takes_per_speaker = int(payload.get("takes_per_speaker") or TAKES_PER_SPEAKER_DEFAULT)
|
||||||
|
language = (payload.get("language") or DEFAULT_LANGUAGE).strip().lower()
|
||||||
|
|
||||||
speakers_total = max(1, min(10, speakers_total))
|
speakers_total = max(1, min(10, speakers_total))
|
||||||
takes_per_speaker = max(1, min(50, takes_per_speaker))
|
takes_per_speaker = max(1, min(50, takes_per_speaker))
|
||||||
@@ -477,6 +482,7 @@ def start_session(payload: Dict[str, Any]):
|
|||||||
with STATE_LOCK:
|
with STATE_LOCK:
|
||||||
STATE["raw_phrase"] = raw
|
STATE["raw_phrase"] = raw
|
||||||
STATE["safe_word"] = safe
|
STATE["safe_word"] = safe
|
||||||
|
STATE["language"] = language
|
||||||
STATE["speakers_total"] = speakers_total
|
STATE["speakers_total"] = speakers_total
|
||||||
STATE["takes_per_speaker"] = takes_per_speaker
|
STATE["takes_per_speaker"] = takes_per_speaker
|
||||||
STATE["takes_received"] = 0
|
STATE["takes_received"] = 0
|
||||||
@@ -491,6 +497,7 @@ def start_session(payload: Dict[str, Any]):
|
|||||||
"ok": True,
|
"ok": True,
|
||||||
"raw_phrase": raw,
|
"raw_phrase": raw,
|
||||||
"safe_word": safe,
|
"safe_word": safe,
|
||||||
|
"language": language,
|
||||||
"speakers_total": speakers_total,
|
"speakers_total": speakers_total,
|
||||||
"takes_per_speaker": takes_per_speaker,
|
"takes_per_speaker": takes_per_speaker,
|
||||||
"takes_total": speakers_total * takes_per_speaker,
|
"takes_total": speakers_total * takes_per_speaker,
|
||||||
@@ -506,6 +513,7 @@ def get_session():
|
|||||||
"ok": True,
|
"ok": True,
|
||||||
"raw_phrase": STATE["raw_phrase"],
|
"raw_phrase": STATE["raw_phrase"],
|
||||||
"safe_word": STATE["safe_word"],
|
"safe_word": STATE["safe_word"],
|
||||||
|
"language": STATE["language"],
|
||||||
"speakers_total": STATE["speakers_total"],
|
"speakers_total": STATE["speakers_total"],
|
||||||
"takes_per_speaker": STATE["takes_per_speaker"],
|
"takes_per_speaker": STATE["takes_per_speaker"],
|
||||||
"takes_received": STATE["takes_received"],
|
"takes_received": STATE["takes_received"],
|
||||||
|
|||||||
@@ -178,6 +178,12 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="row" style="margin-top:10px;">
|
<div class="row" style="margin-top:10px;">
|
||||||
|
<label class="muted">Language
|
||||||
|
<select id="language" style="padding:10px 12px;font-size:15px;border-radius:12px;border:1px solid rgba(255,255,255,0.12);background:rgba(0,0,0,0.35);color:var(--text);outline:none;">
|
||||||
|
<option value="en" selected>English (en)</option>
|
||||||
|
<option value="nl">Dutch (nl)</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
<label class="muted">Speakers
|
<label class="muted">Speakers
|
||||||
<input id="speakersTotal" type="number" min="1" max="10" value="1" />
|
<input id="speakersTotal" type="number" min="1" max="10" value="1" />
|
||||||
</label>
|
</label>
|
||||||
@@ -727,13 +733,14 @@
|
|||||||
|
|
||||||
speakersTotal = parseInt($("speakersTotal").value || "1", 10);
|
speakersTotal = parseInt($("speakersTotal").value || "1", 10);
|
||||||
takesPerSpeaker = parseInt($("takesPerSpeaker").value || "10", 10);
|
takesPerSpeaker = parseInt($("takesPerSpeaker").value || "10", 10);
|
||||||
|
const language = $("language").value || "en";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
setPill($("sessionPill"), "Starting…", "warn");
|
setPill($("sessionPill"), "Starting…", "warn");
|
||||||
const data = await api("/api/start_session", {
|
const data = await api("/api/start_session", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {"Content-Type":"application/json"},
|
headers: {"Content-Type":"application/json"},
|
||||||
body: JSON.stringify({ phrase, speakers_total: speakersTotal, takes_per_speaker: takesPerSpeaker })
|
body: JSON.stringify({ phrase, speakers_total: speakersTotal, takes_per_speaker: takesPerSpeaker, language })
|
||||||
});
|
});
|
||||||
|
|
||||||
session = data;
|
session = data;
|
||||||
@@ -755,7 +762,7 @@
|
|||||||
|
|
||||||
await stopMicNow();
|
await stopMicNow();
|
||||||
|
|
||||||
setPill($("sessionPill"), `Session: ${data.safe_word}`, "ok");
|
setPill($("sessionPill"), `Session: ${data.safe_word} (${data.language || "en"})`, "ok");
|
||||||
$("beginBtn").disabled = false;
|
$("beginBtn").disabled = false;
|
||||||
$("resetBtn").disabled = false;
|
$("resetBtn").disabled = false;
|
||||||
$("trainBtn").disabled = false;
|
$("trainBtn").disabled = false;
|
||||||
|
|||||||
7
train_wake_word
Normal file → Executable file
7
train_wake_word
Normal file → Executable file
@@ -5,7 +5,7 @@ PROGPATH=$(realpath "$0")
|
|||||||
PROGDIR=$(dirname "${PROGPATH}")
|
PROGDIR=$(dirname "${PROGPATH}")
|
||||||
CLIDIR="${PROGDIR}/cli"
|
CLIDIR="${PROGDIR}/cli"
|
||||||
|
|
||||||
KNOWN_ARGS=( samples batch-size training-steps data-dir cleanup-work-dir )
|
KNOWN_ARGS=( samples batch-size training-steps data-dir cleanup-work-dir language )
|
||||||
source "${CLIDIR}/shell.functions"
|
source "${CLIDIR}/shell.functions"
|
||||||
WAKE_WORD=${POSITIONAL_ARGS[0]}
|
WAKE_WORD=${POSITIONAL_ARGS[0]}
|
||||||
|
|
||||||
@@ -18,6 +18,7 @@ if [ "${HELP}" == "true" ] || [ -z "${WAKE_WORD}" ] ; then
|
|||||||
cat <<EOF >&2
|
cat <<EOF >&2
|
||||||
Usage: train_wake_word [ --samples=<samples> ] [ --batch-size=<batch_size> ]
|
Usage: train_wake_word [ --samples=<samples> ] [ --batch-size=<batch_size> ]
|
||||||
[ --training-steps=<steps> ] [ --cleanup-work-dir ]
|
[ --training-steps=<steps> ] [ --cleanup-work-dir ]
|
||||||
|
[ --language=<lang> ]
|
||||||
<wake_word> [ <wake_word_title> ]
|
<wake_word> [ <wake_word_title> ]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
@@ -35,6 +36,9 @@ Options:
|
|||||||
--cleanup-work-dir: Delete the /data/work directory after successful training.
|
--cleanup-work-dir: Delete the /data/work directory after successful training.
|
||||||
Default: false
|
Default: false
|
||||||
|
|
||||||
|
--language: Language for TTS voice selection (e.g. "en", "nl").
|
||||||
|
Default: ${DEFAULT_LANGUAGE}
|
||||||
|
|
||||||
<wake_word> The word to train spelled phonetically.
|
<wake_word> The word to train spelled phonetically.
|
||||||
Required.
|
Required.
|
||||||
|
|
||||||
@@ -88,6 +92,7 @@ export GRPC_VERBOSITY=ERROR
|
|||||||
"${CLIDIR}/wake_word_sample_generator" \
|
"${CLIDIR}/wake_word_sample_generator" \
|
||||||
--samples=${SAMPLES} \
|
--samples=${SAMPLES} \
|
||||||
--batch-size=${BATCH_SIZE} \
|
--batch-size=${BATCH_SIZE} \
|
||||||
|
--language="${LANGUAGE}" \
|
||||||
--data-dir="${DATA_DIR}" "${WAKE_WORD}"
|
--data-dir="${DATA_DIR}" "${WAKE_WORD}"
|
||||||
|
|
||||||
POST_GEN_TS=$EPOCHSECONDS
|
POST_GEN_TS=$EPOCHSECONDS
|
||||||
|
|||||||
Reference in New Issue
Block a user