mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Automatic Calibration
This commit is contained in:
@@ -130,6 +130,73 @@ print(f" AudioSet complete ({ok} ok, {skipped} skipped, {len(audioset_bad)} fa
|
||||
EOF
|
||||
}
|
||||
|
||||
converter_from_dataset_api() {
|
||||
# shellcheck source=/dev/null
|
||||
source "${DATA_DIR}/.venv/bin/activate"
|
||||
|
||||
python - "${AUDIO16K_DIR}" <<-'EOF'
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import scipy.io.wavfile
|
||||
from datasets import load_dataset
|
||||
|
||||
def write_wav(dst: Path, data: np.ndarray, sr: int):
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
x = np.clip(data, -1.0, 1.0)
|
||||
scipy.io.wavfile.write(dst, sr, (x * 32767).astype(np.int16))
|
||||
|
||||
audioset_out = Path(sys.argv[1])
|
||||
|
||||
print(" AudioSet FLAC tarballs are unavailable; using Hugging Face datasets API instead.")
|
||||
dataset = load_dataset(
|
||||
"agkphysics/AudioSet",
|
||||
"balanced",
|
||||
split="train",
|
||||
streaming=True,
|
||||
)
|
||||
|
||||
audioset_bad = []
|
||||
ok = 0
|
||||
skipped = 0
|
||||
heartbeat_every = 250
|
||||
|
||||
for idx, sample in enumerate(dataset, start=1):
|
||||
try:
|
||||
video_id = str(sample.get("video_id") or f"audioset_{idx:06d}")
|
||||
outfile = audioset_out / f"{video_id}.wav"
|
||||
if outfile.exists():
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
audio = sample.get("audio") or {}
|
||||
y = np.asarray(audio.get("array"))
|
||||
sr = int(audio.get("sampling_rate") or 0)
|
||||
if y.size == 0 or sr <= 0:
|
||||
raise ValueError("missing decoded audio")
|
||||
if y.ndim > 1:
|
||||
y = np.mean(y, axis=-1)
|
||||
if sr != 16000:
|
||||
y = librosa.resample(y.astype(np.float32), orig_sr=sr, target_sr=16000)
|
||||
if y.size == 0:
|
||||
raise ValueError("empty audio")
|
||||
write_wav(outfile, y, 16000)
|
||||
ok += 1
|
||||
except Exception as exc:
|
||||
audioset_bad.append(f"{sample.get('video_id', idx)}:{exc}")
|
||||
|
||||
if idx == 1 or (idx % heartbeat_every) == 0:
|
||||
print(f" AudioSet API progress: {idx} clips processed (ok={ok}, skipped={skipped}, failed={len(audioset_bad)})")
|
||||
|
||||
if audioset_bad:
|
||||
(audioset_out / "audioset_corrupted_files.log").write_text("\n".join(audioset_bad))
|
||||
|
||||
print(f" AudioSet complete via datasets API ({ok} ok, {skipped} skipped, {len(audioset_bad)} failed)")
|
||||
EOF
|
||||
}
|
||||
|
||||
expected_filecount=$(get_total_filecount filecounts)
|
||||
actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || :
|
||||
write_filecount=false
|
||||
@@ -139,40 +206,44 @@ if [ "${actual_filecount}" -ne 0 ] ; then
|
||||
echo " Existing ${AUDIO16K_DIR} present (${actual_filecount} wav); skipping extract/convert"
|
||||
else
|
||||
dl=$(find_rev)
|
||||
[ -n "$dl" ] || { echo " Could not locate an AudioSet revision with FLAC tarballs still present on HF." ; exit 1 ; }
|
||||
rev=${dl%%,*}
|
||||
pattern=${dl##*,}
|
||||
if [ -z "$dl" ] ; then
|
||||
rm -rf "${AUDIO16K_DIR}/audioset_corrupted_files.log" || :
|
||||
converter_from_dataset_api
|
||||
else
|
||||
rev=${dl%%,*}
|
||||
pattern=${dl##*,}
|
||||
|
||||
echo " Checking 10 tarballs"
|
||||
for i in {0..9} ; do
|
||||
fname="downloads/bal_train0${i}.tar"
|
||||
if [ ! -f "${fname}" ] ; then
|
||||
echo " Downloading bal_train0${i}.tar"
|
||||
url="${AUDIO_URL}/${rev}/${pattern}${i}.tar"
|
||||
curl -L -s --fail "${url}" -o "${fname}" || { echo "Could not fetch ${fname} at rev ${rev}; continuing." ; continue ; }
|
||||
echo " Checking 10 tarballs"
|
||||
for i in {0..9} ; do
|
||||
fname="downloads/bal_train0${i}.tar"
|
||||
if [ ! -f "${fname}" ] ; then
|
||||
echo " Downloading bal_train0${i}.tar"
|
||||
url="${AUDIO_URL}/${rev}/${pattern}${i}.tar"
|
||||
curl -L -s --fail "${url}" -o "${fname}" || { echo "Could not fetch ${fname} at rev ${rev}; continuing." ; continue ; }
|
||||
fi
|
||||
|
||||
tarball_filecount=$(tar -tvf "${fname}" | wc -l )
|
||||
filecounts["bal_train0${i}.tar"]=${tarball_filecount}
|
||||
write_filecount=true
|
||||
|
||||
echo " Untarring bal_train0${i}.tar"
|
||||
tar -xf "${fname}" -C "${AUDIO_DIR}"
|
||||
if "${CLEANUP_ARCHIVES}" && [ -f "${fname}" ] ; then
|
||||
echo " Cleaning up bal_train0${i}.tar"
|
||||
rm -rf "${fname}"
|
||||
fi
|
||||
done
|
||||
|
||||
rm -rf "${AUDIO16K_DIR}/audioset_corrupted_files.log" || :
|
||||
converter
|
||||
|
||||
# Recompute counts and warn (but do not fail)
|
||||
expected_filecount=$(get_total_filecount filecounts)
|
||||
actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || :
|
||||
if [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then
|
||||
echo " Converted file count(${actual_filecount}) != expected file count(${expected_filecount})" >&2
|
||||
echo " WARNING: mismatch is expected if some AudioSet files are corrupted; continuing." >&2
|
||||
fi
|
||||
|
||||
tarball_filecount=$(tar -tvf "${fname}" | wc -l )
|
||||
filecounts["bal_train0${i}.tar"]=${tarball_filecount}
|
||||
write_filecount=true
|
||||
|
||||
echo " Untarring bal_train0${i}.tar"
|
||||
tar -xf "${fname}" -C "${AUDIO_DIR}"
|
||||
if "${CLEANUP_ARCHIVES}" && [ -f "${fname}" ] ; then
|
||||
echo " Cleaning up bal_train0${i}.tar"
|
||||
rm -rf "${fname}"
|
||||
fi
|
||||
done
|
||||
|
||||
rm -rf "${AUDIO16K_DIR}/audioset_corrupted_files.log" || :
|
||||
converter
|
||||
|
||||
# Recompute counts and warn (but do not fail)
|
||||
expected_filecount=$(get_total_filecount filecounts)
|
||||
actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || :
|
||||
if [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then
|
||||
echo " Converted file count(${actual_filecount}) != expected file count(${expected_filecount})" >&2
|
||||
echo " WARNING: mismatch is expected if some AudioSet files are corrupted; continuing." >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -196,4 +267,4 @@ if "${CLEANUP_INTERMEDIATE_FILES}" && [ -d "${AUDIO_DIR}" ] ; then
|
||||
fi
|
||||
|
||||
echo " Audioset complete"
|
||||
exit 0
|
||||
exit 0
|
||||
|
||||
Reference in New Issue
Block a user