diff --git a/advanced_training_notebook.ipynb b/advanced_training_notebook.ipynb index 3b1a5f7..a138bf6 100644 --- a/advanced_training_notebook.ipynb +++ b/advanced_training_notebook.ipynb @@ -156,6 +156,7 @@ "from datasets import load_dataset\n", "\n", "# Function to download and process RIR dataset\n", + "# Function to download and process RIR dataset\n", "def download_rir_dataset(dataset_name, output_dir, split=\"train\"):\n", " output_dir = Path(output_dir)\n", " if not output_dir.exists():\n", @@ -165,17 +166,15 @@ " print(f\"Downloading {dataset_name} to {output_dir}...\")\n", " for row in tqdm(rir_dataset):\n", " name = Path(row['audio']['path']).name\n", - " scipy.io.wavfile.write(\n", - " output_dir / name,\n", - " 16000,\n", - " (row['audio']['array'] * 32767).astype(np.int16)\n", - " )\n", + " # Save the original audio file\n", + " with open(output_dir / name, \"wb\") as audio_file:\n", + " audio_file.write(row[\"audio\"][\"bytes\"])\n", " print(f\"Finished downloading {dataset_name} to {output_dir}.\\n\")\n", " except Exception as e:\n", " print(f\"Error downloading {dataset_name}: {e}\")\n", " else:\n", " print(f\"{output_dir} already exists. Skipping download.\\n\")\n", - "\n", + " \n", "# Download MIT RIRs\n", "download_rir_dataset(\n", " \"davidscripka/MIT_environmental_impulse_responses\",\n", @@ -713,7 +712,7 @@ "# Define the JSON metadata for the model\n", "json_data = {\n", " \"type\": \"micro\",\n", - " \"wake_word\": \"hey_norman\", # Adjust based on your target wake word\n", + " \"wake_word\": \"khum_puter\", # Adjust based on your target wake word\n", " \"author\": \"master phooey\",\n", " \"website\": \"https://github.com/MasterPhooey/MicroWakeWord-Trainer-Docker\",\n", " \"model\": \"stream_state_internal_quant.tflite\",\n",