mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Update advanced_training_notebook.ipynb
This commit is contained in:
@@ -205,8 +205,8 @@
|
|||||||
" audioset_dataset = audioset_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
" audioset_dataset = audioset_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
||||||
"\n",
|
"\n",
|
||||||
" corrupted_files = []\n",
|
" corrupted_files = []\n",
|
||||||
" print(\"Converting Audioset files to 16kHz WAV...\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
|
" print(\"Converting Audioset files to 16kHz WAV...\")\n",
|
||||||
" for row in tqdm(audioset_dataset, desc=\"Processing Audioset files\"):\n",
|
" for row in tqdm(audioset_dataset, desc=\"Processing Audioset files\"):\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" # Define output file name and path\n",
|
" # Define output file name and path\n",
|
||||||
@@ -215,6 +215,8 @@
|
|||||||
" \n",
|
" \n",
|
||||||
" # Read and convert audio\n",
|
" # Read and convert audio\n",
|
||||||
" data = row[\"audio\"][\"array\"]\n",
|
" data = row[\"audio\"][\"array\"]\n",
|
||||||
|
" if data is None or len(data) == 0:\n",
|
||||||
|
" raise ValueError(f\"Empty audio data in file: {row['audio']['path']}\")\n",
|
||||||
" scipy.io.wavfile.write(output_path, 16000, (data * 32767).astype(np.int16))\n",
|
" scipy.io.wavfile.write(output_path, 16000, (data * 32767).astype(np.int16))\n",
|
||||||
" \n",
|
" \n",
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
|
|||||||
Reference in New Issue
Block a user