mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Update advanced_training_notebook.ipynb
This commit is contained in:
@@ -202,31 +202,28 @@
|
||||
"print(f\"Number of FLAC files found: {len(audioset_files)}\")\n",
|
||||
"\n",
|
||||
"if audioset_files:\n",
|
||||
" audioset_dataset = Dataset.from_dict({\"audio\": [str(file) for file in audioset_files]})\n",
|
||||
" audioset_dataset = audioset_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
||||
"\n",
|
||||
" corrupted_files = []\n",
|
||||
"\n",
|
||||
" print(\"Converting Audioset files to 16kHz WAV...\")\n",
|
||||
" for row in tqdm(audioset_dataset, desc=\"Processing Audioset files\"):\n",
|
||||
" for file_path in tqdm(audioset_files, desc=\"Processing Audioset files\"):\n",
|
||||
" try:\n",
|
||||
" # Define output file name and path\n",
|
||||
" name = Path(row[\"audio\"][\"path\"]).stem + \".wav\"\n",
|
||||
" output_path = Path(output_dir) / name\n",
|
||||
"\n",
|
||||
" # Check if audio data is valid before writing\n",
|
||||
" if row[\"audio\"][\"array\"] is None or len(row[\"audio\"][\"array\"]) == 0:\n",
|
||||
" raise ValueError(f\"Empty or invalid audio data in file: {row['audio']['path']}\")\n",
|
||||
" # Attempt to load the file and handle any errors\n",
|
||||
" audio, sampling_rate = sf.read(file_path)\n",
|
||||
" \n",
|
||||
" if audio is None or len(audio) == 0:\n",
|
||||
" raise ValueError(f\"Empty or invalid audio data in file: {file_path}\")\n",
|
||||
"\n",
|
||||
" # Resample audio to 16kHz\n",
|
||||
" output_path = Path(output_dir) / (file_path.stem + \".wav\")\n",
|
||||
" scipy.io.wavfile.write(\n",
|
||||
" output_path,\n",
|
||||
" 16000,\n",
|
||||
" (row[\"audio\"][\"array\"] * 32767).astype(np.int16),\n",
|
||||
" (audio * 32767).astype(np.int16),\n",
|
||||
" )\n",
|
||||
" except (sf.LibsndfileError, ValueError, Exception) as e:\n",
|
||||
" # Log the error and skip the file\n",
|
||||
" print(f\"Error converting {row['audio']['path']}: {e}\")\n",
|
||||
" corrupted_files.append(row[\"audio\"][\"path\"])\n",
|
||||
" print(f\"Error converting {file_path}: {e}\")\n",
|
||||
" corrupted_files.append(str(file_path))\n",
|
||||
"\n",
|
||||
" # Log corrupted files\n",
|
||||
" if corrupted_files:\n",
|
||||
|
||||
Reference in New Issue
Block a user