mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Update advanced_training_notebook.ipynb
This commit is contained in:
@@ -174,8 +174,7 @@
|
|||||||
"# Download and Process Audioset\n",
|
"# Download and Process Audioset\n",
|
||||||
"# -----------------------------\n",
|
"# -----------------------------\n",
|
||||||
"audioset_dir = \"./audioset\"\n",
|
"audioset_dir = \"./audioset\"\n",
|
||||||
"if not os.path.exists(audioset_dir):\n",
|
"os.makedirs(audioset_dir, exist_ok=True)\n",
|
||||||
" os.mkdir(audioset_dir)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Full-scale dataset download links\n",
|
"# Full-scale dataset download links\n",
|
||||||
"dataset_links = [\n",
|
"dataset_links = [\n",
|
||||||
@@ -183,46 +182,56 @@
|
|||||||
" for i in range(10)\n",
|
" for i in range(10)\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Download and extract each dataset part\n",
|
||||||
"for link in dataset_links:\n",
|
"for link in dataset_links:\n",
|
||||||
" file_name = link.split(\"/\")[-1]\n",
|
" file_name = link.split(\"/\")[-1]\n",
|
||||||
" out_dir = os.path.join(audioset_dir, file_name)\n",
|
" out_path = os.path.join(audioset_dir, file_name)\n",
|
||||||
" if not os.path.exists(out_dir):\n",
|
" if not os.path.exists(out_path):\n",
|
||||||
" print(f\"Downloading {file_name}...\")\n",
|
" print(f\"Downloading {file_name}...\")\n",
|
||||||
" os.system(f\"wget --quiet -O {out_dir} {link}\")\n",
|
" os.system(f\"wget --quiet -O {out_path} {link}\")\n",
|
||||||
" print(f\"Extracting {file_name}...\")\n",
|
" print(f\"Extracting {file_name}...\")\n",
|
||||||
" os.system(f\"cd {audioset_dir} && tar -xf {file_name}\")\n",
|
" os.system(f\"tar -xf {out_path} -C {audioset_dir}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Directory for 16kHz WAV files\n",
|
||||||
"output_dir = \"./audioset_16k\"\n",
|
"output_dir = \"./audioset_16k\"\n",
|
||||||
"if not os.path.exists(output_dir):\n",
|
"os.makedirs(output_dir, exist_ok=True)\n",
|
||||||
" os.mkdir(output_dir)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Save clips to 16-bit PCM wav files\n",
|
"# Collect all FLAC files for processing\n",
|
||||||
"audioset_files = list(Path(\"audioset/audio\").glob(\"**/*.flac\"))\n",
|
"audioset_files = list(Path(audioset_dir).glob(\"**/*.flac\"))\n",
|
||||||
"print(f\"Number of FLAC files found: {len(audioset_files)}\")\n",
|
"print(f\"Number of FLAC files found: {len(audioset_files)}\")\n",
|
||||||
|
"\n",
|
||||||
"if audioset_files:\n",
|
"if audioset_files:\n",
|
||||||
" audioset_dataset = Dataset.from_dict({\"audio\": [str(file) for file in audioset_files]})\n",
|
" audioset_dataset = Dataset.from_dict({\"audio\": [str(file) for file in audioset_files]})\n",
|
||||||
" audioset_dataset = audioset_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
" audioset_dataset = audioset_dataset.cast_column(\"audio\", Audio(sampling_rate=16000))\n",
|
||||||
"\n",
|
"\n",
|
||||||
" corrupted_files = []\n",
|
" corrupted_files = []\n",
|
||||||
" print(\"Converting Audioset files to 16kHz WAV...\")\n",
|
" print(\"Converting Audioset files to 16kHz WAV...\")\n",
|
||||||
" for row in tqdm(audioset_dataset):\n",
|
"\n",
|
||||||
|
" for row in tqdm(audioset_dataset, desc=\"Processing Audioset files\"):\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" name = row[\"audio\"][\"path\"].split(\"/\")[-1].replace(\".flac\", \".wav\")\n",
|
" # Define output file name and path\n",
|
||||||
" scipy.io.wavfile.write(\n",
|
" name = Path(row[\"audio\"][\"path\"]).stem + \".wav\"\n",
|
||||||
" os.path.join(output_dir, name), \n",
|
" output_path = Path(output_dir) / name\n",
|
||||||
" 16000, \n",
|
" \n",
|
||||||
" (row[\"audio\"][\"array\"] * 32767).astype(np.int16)\n",
|
" # Read and convert audio\n",
|
||||||
" )\n",
|
" data = row[\"audio\"][\"array\"]\n",
|
||||||
|
" scipy.io.wavfile.write(output_path, 16000, (data * 32767).astype(np.int16))\n",
|
||||||
|
" \n",
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
" print(f\"Error converting {row['audio']['path']}: {e}\")\n",
|
" print(f\"Error converting {row['audio']['path']}: {e}\")\n",
|
||||||
" corrupted_files.append(row[\"audio\"][\"path\"])\n",
|
" corrupted_files.append(row[\"audio\"][\"path\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" # Log corrupted files\n",
|
||||||
" if corrupted_files:\n",
|
" if corrupted_files:\n",
|
||||||
" with open(\"audioset_corrupted_files.log\", \"w\") as log_file:\n",
|
" log_path = Path(output_dir) / \"audioset_corrupted_files.log\"\n",
|
||||||
|
" with open(log_path, \"w\") as log_file:\n",
|
||||||
" log_file.writelines(f\"{file}\\n\" for file in corrupted_files)\n",
|
" log_file.writelines(f\"{file}\\n\" for file in corrupted_files)\n",
|
||||||
|
" print(f\"Logged corrupted files to {log_path}\")\n",
|
||||||
"else:\n",
|
"else:\n",
|
||||||
" print(\"No FLAC files found in Audioset.\")\n",
|
" print(\"No FLAC files found in Audioset.\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"print(\"Audioset processing complete!\")\n",
|
||||||
|
"\n",
|
||||||
"# -----------------------------\n",
|
"# -----------------------------\n",
|
||||||
"# Download and Process FMA\n",
|
"# Download and Process FMA\n",
|
||||||
"# -----------------------------\n",
|
"# -----------------------------\n",
|
||||||
|
|||||||
Reference in New Issue
Block a user