mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Update advanced_training_notebook.ipynb
This commit is contained in:
@@ -150,6 +150,7 @@
|
||||
"from datasets import Dataset, Audio, load_dataset\n",
|
||||
"from pathlib import Path\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import soundfile as sf\n",
|
||||
"\n",
|
||||
"# -----------------------------\n",
|
||||
"# Download and Process MIT RIR\n",
|
||||
@@ -173,11 +174,6 @@
|
||||
"# -----------------------------\n",
|
||||
"# Download and Process Audioset\n",
|
||||
"# -----------------------------\n",
|
||||
"from datasets import Dataset, Audio\n",
|
||||
"import os\n",
|
||||
"from pathlib import Path\n",
|
||||
"import scipy.io.wavfile\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"\n",
|
||||
"# Directory setup\n",
|
||||
"audioset_dir = \"./audioset\"\n",
|
||||
@@ -223,12 +219,17 @@
|
||||
" if row[\"audio\"][\"array\"] is None or len(row[\"audio\"][\"array\"]) == 0:\n",
|
||||
" raise ValueError(f\"Empty or invalid audio data in file: {row['audio']['path']}\")\n",
|
||||
"\n",
|
||||
" # Ensure array data is valid before writing\n",
|
||||
" array_data = row[\"audio\"][\"array\"]\n",
|
||||
" if not isinstance(array_data, np.ndarray) or len(array_data.shape) == 0:\n",
|
||||
" raise ValueError(f\"Invalid array data in file: {row['audio']['path']}\")\n",
|
||||
"\n",
|
||||
" scipy.io.wavfile.write(\n",
|
||||
" output_path,\n",
|
||||
" 16000,\n",
|
||||
" (row[\"audio\"][\"array\"] * 32767).astype(np.int16),\n",
|
||||
" (array_data * 32767).astype(np.int16),\n",
|
||||
" )\n",
|
||||
" except Exception as e:\n",
|
||||
" except (sf.LibsndfileError, ValueError, Exception) as e:\n",
|
||||
" # Log and skip problematic files\n",
|
||||
" print(f\"Error converting {row['audio']['path']}: {e}\")\n",
|
||||
" corrupted_files.append(row[\"audio\"][\"path\"])\n",
|
||||
|
||||
Reference in New Issue
Block a user