#!/bin/bash set -e PROGPATH=$(realpath "$0") PROGDIR=$(dirname "${PROGPATH}") KNOWN_ARGS=( samples batch-size data-dir language ) source "${PROGDIR}/shell.functions" WAKE_WORD="${POSITIONAL_ARGS[0]}" if [ ${#UNKNOWN_ARGS[@]} -gt 0 ] ; then echo "Unknown argument(s): ${UNKNOWN_ARGS[*]}" >&2 HELP=true fi if [ "${HELP}" == "true" ] || [ -z "${WAKE_WORD}" ] ; then cat <&2 Usage: $0 [ --samples= ] [ --batch-size= ] [ --language= ] --samples: The number of samples to generate for the wake word. Default: ${DEFAULT_SAMPLES} --batch-size: How many samples should be generated at a time. The more samples, the more memory is needed. Default: ${DEFAULT_BATCH_SIZE} --language: Language for TTS voice selection. "en" uses the multi-speaker LibriTTS-R generator. Other languages (e.g. "nl") use single-speaker ONNX voices and cycle between them for variety. Default: ${DEFAULT_LANGUAGE} The word to generate samples for. Required. EOF exit 1 fi # shellcheck source=/dev/null source "${DATA_DIR}/.venv/bin/activate" WORK_DIR="${DATA_DIR}/work" mkdir -p "${WORK_DIR}" || : cd "${WORK_DIR}" PSG="${DATA_DIR}/tools/piper-sample-generator" MODELS_DIR="${PSG}/models" VOICES_DIR="${PSG}/voices" SAMPLES_DIR="${WORK_DIR}/wake_word_samples" mkdir -p "${SAMPLES_DIR}" || : # --------------------------------------------------------------------------- # Build the --model argument(s) based on language # --------------------------------------------------------------------------- declare -a MODEL_ARGS=() MODEL_TAG="" if [ "${LANGUAGE}" == "en" ] ; then # English: use the multi-speaker LibriTTS-R generator (.pt) MODEL_NAME="en_US-libritts_r-medium.pt" MODEL_FILE="${MODELS_DIR}/${MODEL_NAME}" if [ ! -f "${MODEL_FILE}" ] ; then echo "ERROR: English model ${MODEL_FILE} not found. Run setup_python_venv first." >&2 exit 1 fi MODEL_ARGS=( --model "${MODEL_FILE}" ) MODEL_TAG="${MODEL_NAME}" else # Non-English: find all ONNX voices matching the language prefix # e.g. LANGUAGE=nl matches nl_NL-pim-medium.onnx, nl_BE-nathalie-medium.onnx, etc. shopt -s nullglob voice_files=( "${VOICES_DIR}/${LANGUAGE}"_*.onnx ) shopt -u nullglob if [ ${#voice_files[@]} -eq 0 ] ; then echo "ERROR: No ONNX voice files found for language '${LANGUAGE}' in ${VOICES_DIR}/" >&2 echo " Expected files matching: ${LANGUAGE}_*.onnx" >&2 echo " Run setup_python_venv to download voice models." >&2 exit 1 fi echo " Using ${#voice_files[@]} voice(s) for language '${LANGUAGE}':" MODEL_TAG="${LANGUAGE}" for vf in "${voice_files[@]}" ; do vname="$(basename "${vf}")" echo " - ${vname}" MODEL_ARGS+=( --model "${vf}" ) MODEL_TAG="${MODEL_TAG}+${vname}" done fi REGENERATE=false if [ "${SAMPLES}" -eq 1 ] ; then echo "===== Generating ${SAMPLES} sample of '${WAKE_WORD}' (language=${LANGUAGE}) =====" wake_word_filename="${WAKE_WORD//[ \`~\!@#\$%^&*\(\)\{\}\[\]\|\;\'\"<>.?\/]/_}" mkdir -p "${WORK_DIR}/test_sample" || : "${PSG}/generate_samples.py" "${WAKE_WORD}" \ "${MODEL_ARGS[@]}" \ --max-samples ${SAMPLES} \ --batch-size ${BATCH_SIZE} \ --output-dir "${WORK_DIR}/test_sample" \ --max-speakers 100 2>&1 | sed -r -e "s/(DEBUG|INFO):__main__:/ /g" mv "${WORK_DIR}/test_sample/0.wav" "${WORK_DIR}/test_sample/${wake_word_filename}.wav" echo "Sample available at ${WORK_DIR}/test_sample/${wake_word_filename}.wav" echo "Play it from your host." exit 0 fi grep -q "${WAKE_WORD}:${SAMPLES}:${MODEL_TAG}" "${WORK_DIR}/last_wake_word" &>/dev/null || REGENERATE=true # Double check that the number of existing samples matches SAMPLES existing_samples=$(find "${SAMPLES_DIR}" -name '*.wav' | wc -l) [ "${existing_samples}" -eq "${SAMPLES}" ] || REGENERATE=true START_TS=$EPOCHSECONDS if ! ${REGENERATE} ; then echo "Sample generation not required" echo exit 0 fi echo -e "\n===== Generating ${SAMPLES} wake word samples in batches of ${BATCH_SIZE} (language=${LANGUAGE}) =====" export TF_CPP_MIN_LOG_LEVEL=9 export TF_FORCE_GPU_ALLOW_GROWTH=true export TF_GPU_ALLOCATOR=cuda_malloc_async export TF_XLA_FLAGS="--tf_xla_auto_jit=0" export NVIDIA_TF32_OVERRIDE=1 export TF_CUDNN_WORKSPACE_LIMIT_IN_MB=512 export GLOG_minloglevel=2 export GRPC_VERBOSITY=ERROR echo " Generating samples" rm -rf "${SAMPLES_DIR}" || : mkdir -p "${SAMPLES_DIR}" || : python "${PROGDIR}/run_generator_with_progress.py" \ --generator "${PSG}/generate_samples.py" \ --output-dir "${SAMPLES_DIR}" \ --max-samples ${SAMPLES} \ -- \ "${WAKE_WORD}" \ "${MODEL_ARGS[@]}" \ --max-samples ${SAMPLES} \ --batch-size ${BATCH_SIZE} \ --output-dir "${SAMPLES_DIR}" generated_files=$(find "${SAMPLES_DIR}" -name '*.wav' | wc -l) if [ "${generated_files}" -ne "${SAMPLES}" ] ; then echo "ERROR: only generated ${generated_files} files" >&2 exit 1 fi echo "${WAKE_WORD}:${SAMPLES}:${MODEL_TAG}" > "${WORK_DIR}/last_wake_word" echo END_TS=$EPOCHSECONDS print_elapsed_time "${START_TS}" "${END_TS}" "Generated ${SAMPLES} wake word samples." exit 0