From 2137c503204fa7108492ea426fab804d411c3dcc Mon Sep 17 00:00:00 2001 From: MasterPhooey <106418429+MasterPhooey@users.noreply.github.com> Date: Sun, 5 Jan 2025 14:22:44 -0600 Subject: [PATCH] Update advanced_training_notebook.ipynb --- advanced_training_notebook.ipynb | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/advanced_training_notebook.ipynb b/advanced_training_notebook.ipynb index fb479e4..fbf49c3 100644 --- a/advanced_training_notebook.ipynb +++ b/advanced_training_notebook.ipynb @@ -177,9 +177,8 @@ "\n", "# Directory setup\n", "audioset_dir = \"./audioset\"\n", - "os.makedirs(audioset_dir, exist_ok=True)\n", - "\n", "output_dir = \"./audioset_16k\"\n", + "os.makedirs(audioset_dir, exist_ok=True)\n", "os.makedirs(output_dir, exist_ok=True)\n", "\n", "# Full-scale dataset download links\n", @@ -215,22 +214,17 @@ " name = Path(row[\"audio\"][\"path\"]).stem + \".wav\"\n", " output_path = Path(output_dir) / name\n", "\n", - " # Read and convert audio\n", + " # Check if audio data is valid before writing\n", " if row[\"audio\"][\"array\"] is None or len(row[\"audio\"][\"array\"]) == 0:\n", " raise ValueError(f\"Empty or invalid audio data in file: {row['audio']['path']}\")\n", "\n", - " # Ensure array data is valid before writing\n", - " array_data = row[\"audio\"][\"array\"]\n", - " if not isinstance(array_data, np.ndarray) or len(array_data.shape) == 0:\n", - " raise ValueError(f\"Invalid array data in file: {row['audio']['path']}\")\n", - "\n", " scipy.io.wavfile.write(\n", " output_path,\n", " 16000,\n", - " (array_data * 32767).astype(np.int16),\n", + " (row[\"audio\"][\"array\"] * 32767).astype(np.int16),\n", " )\n", " except (sf.LibsndfileError, ValueError, Exception) as e:\n", - " # Log and skip problematic files\n", + " # Log the error and skip the file\n", " print(f\"Error converting {row['audio']['path']}: {e}\")\n", " corrupted_files.append(row[\"audio\"][\"path\"])\n", "\n", @@ -245,6 +239,7 @@ "\n", "print(\"Audioset processing complete!\")\n", "\n", + "\n", "# -----------------------------\n", "# Download and Process FMA\n", "# -----------------------------\n",