Update advanced_training_notebook.ipynb

2026-06-12 20:10:19 -06:00 · 2025-01-05 09:04:58 -06:00
parent 6dc57f201b
commit ece7219f4f
1 changed files with 6 additions and 7 deletions
--- a/advanced_training_notebook.ipynb
+++ b/advanced_training_notebook.ipynb
@@ -156,6 +156,7 @@
    "from datasets import load_dataset\n",
    "\n",
    "# Function to download and process RIR dataset\n",
+    "# Function to download and process RIR dataset\n",
    "def download_rir_dataset(dataset_name, output_dir, split=\"train\"):\n",
    "    output_dir = Path(output_dir)\n",
    "    if not output_dir.exists():\n",
@@ -165,11 +166,9 @@
    "            print(f\"Downloading {dataset_name} to {output_dir}...\")\n",
    "            for row in tqdm(rir_dataset):\n",
    "                name = Path(row['audio']['path']).name\n",
-    "                scipy.io.wavfile.write(\n",
-    "                    output_dir / name,\n",
-    "                    16000,\n",
-    "                    (row['audio']['array'] * 32767).astype(np.int16)\n",
-    "                )\n",
+    "                # Save the original audio file\n",
+    "                with open(output_dir / name, \"wb\") as audio_file:\n",
+    "                    audio_file.write(row[\"audio\"][\"bytes\"])\n",
    "            print(f\"Finished downloading {dataset_name} to {output_dir}.\\n\")\n",
    "        except Exception as e:\n",
    "            print(f\"Error downloading {dataset_name}: {e}\")\n",
@@ -713,7 +712,7 @@
    "# Define the JSON metadata for the model\n",
    "json_data = {\n",
    "    \"type\": \"micro\",\n",
-    "    \"wake_word\": \"hey_norman\",  # Adjust based on your target wake word\n",
+    "    \"wake_word\": \"khum_puter\",  # Adjust based on your target wake word\n",
    "    \"author\": \"master phooey\",\n",
    "    \"website\": \"https://github.com/MasterPhooey/MicroWakeWord-Trainer-Docker\",\n",
    "    \"model\": \"stream_state_internal_quant.tflite\",\n",