piper speaking speeds

2026-06-12 20:10:19 -06:00 · 2025-12-22 19:44:09 -06:00
parent 99eaaa39cd
commit 1c954eb253
1 changed files with 74 additions and 55 deletions
--- a/microWakeWord_training_notebook.ipynb
+++ b/microWakeWord_training_notebook.ipynb
@@ -232,7 +232,7 @@
   },
   "outputs": [],
   "source": [
-    "# Generate a large number of wake word samples for training\n",
+    "# Generate a large number of wake word samples for training (with length-scale sweep)\n",
    "import sys, subprocess\n",
    "from pathlib import Path\n",
    "\n",
@@ -240,14 +240,21 @@
    "MODELS_DIR = REPO_DIR / \"models\"\n",
    "MODEL_NAME = \"en_US-libritts_r-medium.pt\"\n",
    "\n",
+    "MAX_SAMPLES = 50000\n",
+    "BATCH_SIZE = 100\n",
+    "\n",
+    "# Piper \"speed\" control via piper-sample-generator is length_scale(s)\n",
+    "LENGTH_SCALES = [\"0.85\", \"0.95\", \"1.00\", \"1.05\", \"1.15\"]\n",
+    "\n",
    "cmd = [\n",
    "    sys.executable,\n",
    "    str(REPO_DIR / \"generate_samples.py\"),\n",
    "    TARGET_WORD,\n",
    "    \"--model\", str(MODELS_DIR / MODEL_NAME),\n",
-    "    \"--max-samples\", \"50000\",\n",
-    "    \"--batch-size\", \"100\",\n",
+    "    \"--max-samples\", str(MAX_SAMPLES),\n",
+    "    \"--batch-size\", str(BATCH_SIZE),\n",
    "    \"--output-dir\", \"generated_samples\",\n",
+    "    \"--length-scales\", *LENGTH_SCALES,\n",
    "]\n",
    "\n",
    "print(\"→\", \" \".join(cmd))\n",
@@ -742,9 +749,36 @@
   },
   "outputs": [],
   "source": [
+    "# GPU memory config (set env BEFORE importing TF)\n",
+    "import os, sys, gc\n",
+    "\n",
+    "if \"tensorflow\" not in sys.modules:\n",
+    "    os.environ[\"TF_FORCE_GPU_ALLOW_GROWTH\"] = \"true\"              # grow as needed\n",
+    "    os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\"          # modern CUDA allocator\n",
+    "    os.environ[\"XLA_FLAGS\"] = \"--xla_gpu_cuda_data_dir=/usr/local/cuda\"\n",
+    "    os.environ[\"TF_XLA_FLAGS\"] = \"--tf_xla_auto_jit=0\"            # disable XLA JIT (more stable mem)\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "# Per-device memory growth (belt + suspenders)\n",
+    "for g in tf.config.list_physical_devices(\"GPU\"):\n",
+    "    try:\n",
+    "        tf.config.experimental.set_memory_growth(g, True)\n",
+    "    except Exception:\n",
+    "        pass\n",
+    "print(\"GPUs:\", tf.config.list_physical_devices(\"GPU\"))\n",
+    "gc.collect()\n",
+    "\n",
+    "# Optional but recommended: mixed precision halves activation memory\n",
+    "try:\n",
+    "    from tensorflow.keras import mixed_precision\n",
+    "    mixed_precision.set_global_policy(\"mixed_float16\")\n",
+    "    print(\"Mixed precision policy:\", mixed_precision.global_policy())\n",
+    "except Exception as e:\n",
+    "    print(\"Mixed precision not enabled:\", e)\n",
+    "\n",
    "# --- Save a yaml config that controls the training process ---\n",
    "\n",
-    "import os, sys, yaml\n",
+    "import yaml\n",
    "\n",
    "config = {}\n",
    "\n",
@@ -782,7 +816,7 @@
    "with open(\"training_parameters.yaml\", \"w\") as f:\n",
    "    yaml.dump(config, f)\n",
    "\n",
-    "print(\"✅ Wrote training_parameters.yaml (batch_size=16)\")"
+    "print(\"✅ Wrote training_parameters.yaml (batch_size=16) with allow_growth, cuda_malloc_async, XLA JIT OFF, mixed precision ON.\")"
   ]
  },
  {
@@ -795,59 +829,44 @@
   "source": [
    "# Train + export (GPU-friendly env + stable flags)\n",
    "\n",
-    "import os, sys, gc, runpy\n",
+    "import os, sys\n",
+    "\n",
+    "# --- Runtime env (inherited by the subprocess we're about to launch) ---\n",
+    "os.environ.setdefault(\"LD_LIBRARY_PATH\",\n",
+    "    \"/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/lib/x86_64-linux-gnu:\" +\n",
+    "    os.environ.get(\"LD_LIBRARY_PATH\",\"\")\n",
+    ")\n",
+    "os.environ.setdefault(\"TF_CPP_MIN_LOG_LEVEL\", \"2\")            # quieter logs\n",
+    "os.environ.setdefault(\"TF_FORCE_GPU_ALLOW_GROWTH\", \"true\")    # grow VRAM as needed\n",
+    "os.environ.setdefault(\"TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")# modern allocator\n",
+    "os.environ.setdefault(\"XLA_FLAGS\", \"--xla_gpu_cuda_data_dir=/usr/local/cuda\")\n",
+    "os.environ.setdefault(\"TF_XLA_FLAGS\", \"--tf_xla_auto_jit=0\")  # disable XLA JIT (more stable)\n",
+    "os.environ.setdefault(\"NVIDIA_TF32_OVERRIDE\", \"1\")            # allow TF32 (perf/VRAM win on Ampere+)\n",
    "\n",
-    "if \"tensorflow\" not in sys.modules:\n",
-    "    os.environ[\"TF_FORCE_GPU_ALLOW_GROWTH\"] = \"true\"              # grow as needed\n",
-    "    os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\"          # modern CUDA allocator\n",
-    "    os.environ[\"TF_XLA_FLAGS\"] = \"--tf_xla_auto_jit=0\"            # disable XLA JIT (more stable mem)\n",
-    "    os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"                      # quieter logs\n",
-    "    os.environ[\"NVIDIA_TF32_OVERRIDE\"] = \"1\"                      # allow TF32 (perf/VRAM win on Ampere+)\n",
    "# If you still hit GPU memory errors, uncomment to force a smaller workspace:\n",
    "# os.environ[\"TF_CUDNN_WORKSPACE_LIMIT_IN_MB\"] = \"256\"\n",
    "\n",
-    "import tensorflow as tf\n",
-    "\n",
-    "allow_growth = \"\"\n",
-    "# Per-device memory growth (belt + suspenders)\n",
-    "for g in tf.config.list_physical_devices(\"GPU\"):\n",
-    "    try:\n",
-    "        tf.config.experimental.set_memory_growth(g, True)\n",
-    "        allow_growth = \"gpu_allow_growth, \"\n",
-    "    except Exception:\n",
-    "        pass\n",
-    "print(\"GPUs:\", tf.config.list_physical_devices(\"GPU\"))\n",
-    "gc.collect()\n",
-    "\n",
-    "print(f\"✅ Set environment with {allow_growth}cuda_malloc_async, xla_auto_jit=0, min_log_level=2, nvidia_tf2_override\")\n",
-    "print(\"   Starting training...\")\n",
-    "\n",
-    "original_argv = list(sys.argv)\n",
-    "try:\n",
-    "    sys.argv = [\n",
-    "        'model_train_eval.py',\n",
-    "        '--training_config', 'training_parameters.yaml',\n",
-    "        '--train', '1',\n",
-    "        '--restore_checkpoint', '1',\n",
-    "        '--test_tf_nonstreaming', '0',\n",
-    "        '--test_tflite_nonstreaming', '0',\n",
-    "        '--test_tflite_nonstreaming_quantized', '0',\n",
-    "        '--test_tflite_streaming', '0',\n",
-    "        '--test_tflite_streaming_quantized', '1',\n",
-    "        '--use_weights', 'best_weights',\n",
-    "        'mixednet',\n",
-    "        '--pointwise_filters', '64,64,64,64',\n",
-    "        '--repeat_in_block', '1,1,1,1',\n",
-    "        '--mixconv_kernel_sizes', '[5], [7,11], [9,15], [23]',\n",
-    "        '--residual_connection', '0,0,0,0',\n",
-    "        '--first_conv_filters', '32',\n",
-    "        '--first_conv_kernel_size', '5',\n",
-    "        '--stride', '2'\n",
-    "    ]\n",
-    "    runpy.run_module(\"microwakeword.model_train_eval\", run_name=\"__main__\", alter_sys=True)\n",
-    "finally:\n",
-    "    sys.argv = original_argv\n",
-    "print(\"✅ Training and testing complete.\")\n"
+    "# --- Kick off training ---\n",
+    "cmd = f'''\"{sys.executable}\" -m microwakeword.model_train_eval \\\n",
+    "  --training_config=\"training_parameters.yaml\" \\\n",
+    "  --train 1 \\\n",
+    "  --restore_checkpoint 1 \\\n",
+    "  --test_tf_nonstreaming 0 \\\n",
+    "  --test_tflite_nonstreaming 0 \\\n",
+    "  --test_tflite_nonstreaming_quantized 0 \\\n",
+    "  --test_tflite_streaming 0 \\\n",
+    "  --test_tflite_streaming_quantized 1 \\\n",
+    "  --use_weights \"best_weights\" \\\n",
+    "  mixednet \\\n",
+    "  --pointwise_filters \"64,64,64,64\" \\\n",
+    "  --repeat_in_block \"1,1,1,1\" \\\n",
+    "  --mixconv_kernel_sizes \"[5], [7,11], [9,15], [23]\" \\\n",
+    "  --residual_connection \"0,0,0,0\" \\\n",
+    "  --first_conv_filters 32 \\\n",
+    "  --first_conv_kernel_size 5 \\\n",
+    "  --stride 2'''\n",
+    "print(\"Running:\\n\", cmd)\n",
+    "!$cmd"
   ]
  },
  {