Update advanced_training_notebook.ipynb

This commit is contained in:
MasterPhooey
2025-01-05 13:50:19 -06:00
committed by GitHub
parent 23ca2da7f4
commit 16f8e78846

View File

@@ -150,6 +150,7 @@
"from datasets import Dataset, Audio, load_dataset\n", "from datasets import Dataset, Audio, load_dataset\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"from tqdm import tqdm\n", "from tqdm import tqdm\n",
"import soundfile as sf\n",
"\n", "\n",
"# -----------------------------\n", "# -----------------------------\n",
"# Download and Process MIT RIR\n", "# Download and Process MIT RIR\n",
@@ -173,11 +174,6 @@
"# -----------------------------\n", "# -----------------------------\n",
"# Download and Process Audioset\n", "# Download and Process Audioset\n",
"# -----------------------------\n", "# -----------------------------\n",
"from datasets import Dataset, Audio\n",
"import os\n",
"from pathlib import Path\n",
"import scipy.io.wavfile\n",
"from tqdm import tqdm\n",
"\n", "\n",
"# Directory setup\n", "# Directory setup\n",
"audioset_dir = \"./audioset\"\n", "audioset_dir = \"./audioset\"\n",
@@ -223,12 +219,17 @@
" if row[\"audio\"][\"array\"] is None or len(row[\"audio\"][\"array\"]) == 0:\n", " if row[\"audio\"][\"array\"] is None or len(row[\"audio\"][\"array\"]) == 0:\n",
" raise ValueError(f\"Empty or invalid audio data in file: {row['audio']['path']}\")\n", " raise ValueError(f\"Empty or invalid audio data in file: {row['audio']['path']}\")\n",
"\n", "\n",
" # Ensure array data is valid before writing\n",
" array_data = row[\"audio\"][\"array\"]\n",
" if not isinstance(array_data, np.ndarray) or len(array_data.shape) == 0:\n",
" raise ValueError(f\"Invalid array data in file: {row['audio']['path']}\")\n",
"\n",
" scipy.io.wavfile.write(\n", " scipy.io.wavfile.write(\n",
" output_path,\n", " output_path,\n",
" 16000,\n", " 16000,\n",
" (row[\"audio\"][\"array\"] * 32767).astype(np.int16),\n", " (array_data * 32767).astype(np.int16),\n",
" )\n", " )\n",
" except Exception as e:\n", " except (sf.LibsndfileError, ValueError, Exception) as e:\n",
" # Log and skip problematic files\n", " # Log and skip problematic files\n",
" print(f\"Error converting {row['audio']['path']}: {e}\")\n", " print(f\"Error converting {row['audio']['path']}: {e}\")\n",
" corrupted_files.append(row[\"audio\"][\"path\"])\n", " corrupted_files.append(row[\"audio\"][\"path\"])\n",