diff --git a/advanced_training_notebook.ipynb b/advanced_training_notebook.ipynb index 51c3046..7c8a779 100644 --- a/advanced_training_notebook.ipynb +++ b/advanced_training_notebook.ipynb @@ -173,9 +173,19 @@ "# -----------------------------\n", "# Download and Process Audioset\n", "# -----------------------------\n", + "from datasets import Dataset, Audio\n", + "import os\n", + "from pathlib import Path\n", + "import scipy.io.wavfile\n", + "from tqdm import tqdm\n", + "\n", + "# Directory setup\n", "audioset_dir = \"./audioset\"\n", "os.makedirs(audioset_dir, exist_ok=True)\n", "\n", + "output_dir = \"./audioset_16k\"\n", + "os.makedirs(output_dir, exist_ok=True)\n", + "\n", "# Full-scale dataset download links\n", "dataset_links = [\n", " f\"https://huggingface.co/datasets/agkphysics/AudioSet/resolve/main/data/bal_train0{i}.tar\"\n", @@ -192,10 +202,6 @@ " print(f\"Extracting {file_name}...\")\n", " os.system(f\"tar -xf {out_path} -C {audioset_dir}\")\n", "\n", - "# Directory for 16kHz WAV files\n", - "output_dir = \"./audioset_16k\"\n", - "os.makedirs(output_dir, exist_ok=True)\n", - "\n", "# Collect all FLAC files for processing\n", "audioset_files = list(Path(audioset_dir).glob(\"**/*.flac\"))\n", "print(f\"Number of FLAC files found: {len(audioset_files)}\")\n", @@ -212,14 +218,18 @@ " # Define output file name and path\n", " name = Path(row[\"audio\"][\"path\"]).stem + \".wav\"\n", " output_path = Path(output_dir) / name\n", - " \n", + "\n", " # Read and convert audio\n", - " data = row[\"audio\"][\"array\"]\n", - " if data is None or len(data) == 0:\n", - " raise ValueError(f\"Empty audio data in file: {row['audio']['path']}\")\n", - " scipy.io.wavfile.write(output_path, 16000, (data * 32767).astype(np.int16))\n", - " \n", + " if row[\"audio\"][\"array\"] is None or len(row[\"audio\"][\"array\"]) == 0:\n", + " raise ValueError(f\"Empty or invalid audio data in file: {row['audio']['path']}\")\n", + "\n", + " scipy.io.wavfile.write(\n", + " output_path,\n", + " 16000,\n", + " (row[\"audio\"][\"array\"] * 32767).astype(np.int16),\n", + " )\n", " except Exception as e:\n", + " # Log and skip problematic files\n", " print(f\"Error converting {row['audio']['path']}: {e}\")\n", " corrupted_files.append(row[\"audio\"][\"path\"])\n", "\n",