Update advanced_training_notebook.ipynb

This commit is contained in:
MasterPhooey
2025-01-05 12:42:53 -06:00
committed by GitHub
parent 65adcdbc8f
commit 9cead7813a

View File

@@ -136,7 +136,7 @@
"id": "YJRG4Qvo9nXG"
},
"outputs": [],
"source": [
"source": [
"# Downloads audio data for augmentation. This can be slow!\n",
"# Borrowed from openWakeWord's automatic_model_training.ipynb, accessed March 4, 2024\n",
"#\n",
@@ -205,8 +205,8 @@
" audioset_dataset = audioset_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
"\n",
" corrupted_files = []\n",
" print(\"Converting Audioset files to 16kHz WAV...\")\n",
"\n",
" print(\"Converting Audioset files to 16kHz WAV...\")\n",
" for row in tqdm(audioset_dataset, desc=\"Processing Audioset files\"):\n",
" try:\n",
" # Define output file name and path\n",
@@ -215,6 +215,8 @@
" \n",
" # Read and convert audio\n",
" data = row[\"audio\"][\"array\"]\n",
" if data is None or len(data) == 0:\n",
" raise ValueError(f\"Empty audio data in file: {row['audio']['path']}\")\n",
" scipy.io.wavfile.write(output_path, 16000, (data * 32767).astype(np.int16))\n",
" \n",
" except Exception as e:\n",