#!/bin/bash set -euo pipefail PROGPATH=$(realpath "$0") PROGDIR=$(dirname "${PROGPATH}") source "${PROGDIR}/shell.functions" if [ "${HELP}" == "true" ] ; then cat <&2 Usage: $0 [ --cleanup-archives ] [ --cleanup-input-files ] [ --data-dir= ] --cleanup-archives : Automatically clean up any downloaded archvies after extraction. --cleanup-intermediate-files : Automatically clean up the intermediate files after they've : converted to 16k. : Path to the data directory. : Default: ${DATA_DIR} EOF exit 1 fi mkdir -p "${DATA_DIR}/training_datasets/downloads" || : cd "${DATA_DIR}/training_datasets" AUDIO_URL="https://mcdermottlab.mit.edu/Reverb/IRMAudio/Audio.zip" AUDIO_ZIPFILE="MIT_RIR_Audio.zip" AUDIO_ZIP="./downloads/${AUDIO_ZIPFILE}" AUDIO_DIR="./mit_rirs" mkdir -p "${AUDIO_DIR}" || : AUDIO16K_DIR="./mit_rirs_16k" mkdir -p "${AUDIO16K_DIR}" || : AUDIO_FILECOUNT="./downloads/mit_rir_filecount" AUDIO_IN_GLOB="*.wav" declare -A filecounts=( [${AUDIO_ZIPFILE}]=0 ) get_filecounts filecounts "${AUDIO_FILECOUNT}" echo "===== Checking MIT_RIR =====" converter() { source ${DATA_DIR}/.venv/bin/activate python - "${AUDIO_DIR}" "${AUDIO16K_DIR}" <<-EOF import os, sys, subprocess, scipy.io.wavfile, numpy as np from pathlib import Path import soundfile as sf import librosa from tqdm import tqdm def write_wav(dst: Path, data: np.ndarray, sr: int): x = np.clip(data, -1.0, 1.0) scipy.io.wavfile.write(dst, sr, (x * 32767).astype(np.int16)) rir_in = Path(sys.argv[1]) rir_out = Path(sys.argv[2]) waves = list(rir_in.rglob("*.wav")) try: print(" MIT RIR normalizing to 16k…") # Normalize to 16k mono for p in tqdm(waves, desc=" MIT_RIR (resample 16k mono)"): outfile = Path(rir_out / p.name) if outfile.exists(): continue a, sr = sf.read(p, always_2d=False) if a.ndim > 1: a = a[:, 0] if sr != 16000: a, _ = librosa.load(p, sr=16000, mono=True) write_wav(outfile, a, 16000) print(" MIT RIR normalization complete") except Exception as e2: print(f" MIT RIR fallback failed: {e2}") raise EOF } expected_filecount=${filecounts[${AUDIO_ZIPFILE}]} actual_filecount=$(find "${AUDIO16K_DIR}" -name '*.wav' 2>/dev/null | wc -l) || : write_filecount=false if [ "${actual_filecount}" -ne 0 ] && [ "${actual_filecount}" -eq "${expected_filecount}" ] ; then echo " Existing ${AUDIO16K_DIR} valid" else actual_filecount=$(find "${AUDIO_DIR}" -name "${AUDIO_IN_GLOB}" 2>/dev/null | wc -l) || : if [ "${actual_filecount}" -eq 0 ] || [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then if [ ! -f "${AUDIO_ZIP}" ] ; then echo " Downloading ${AUDIO_ZIPFILE}" curl -sfL "${AUDIO_URL}" -o "${AUDIO_ZIP}" fi rm -rf "${AUDIO_DIR}" || : echo " Unzipping ${AUDIO_ZIPFILE}" unzip -u -q -d "${AUDIO_DIR}" "${AUDIO_ZIP}" fi if "${CLEANUP_ARCHIVES}" && [ -f "${AUDIO_ZIP}" ] ; then echo " Cleaning up ${AUDIO_ZIPFILE}" rm -rf "${AUDIO_ZIP}" fi converter actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || : filecounts[${AUDIO_ZIPFILE}]="${actual_filecount}" write_filecount=true fi if ${write_filecount} ; then write_filecounts filecounts "${AUDIO_FILECOUNT}" fi if "${CLEANUP_ARCHIVES}" && [ -f "${AUDIO_ZIP}" ] ; then echo " Cleaning up ${AUDIO_ZIPFILE}" rm -rf "${AUDIO_ZIP}" fi if "${CLEANUP_INTERMEDIATE_FILES}" && [ -d "${AUDIO_DIR}" ]; then echo " Cleaning up ${AUDIO_DIR}" rm -rf "${AUDIO_DIR}" fi echo " MIT_RIR complete" exit 0