Add files via upload

This commit is contained in:
Tater Totterson
2025-09-26 19:35:09 -05:00
committed by GitHub
parent 088761cedd
commit 7137c55482
3 changed files with 74 additions and 264 deletions

View File

@@ -1,69 +1,38 @@
# Use Ubuntu 20.04 as the base image
FROM ubuntu:20.04
# CUDA + cuDNN userspace from NVIDIA (no manual repo installs needed)
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
# Set environment variables for non-interactive installations and Python buffering
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1
# Install system dependencies
# System deps
RUN apt-get update && apt-get install -y --no-install-recommends \
wget curl git unzip software-properties-common build-essential \
libsndfile1 libffi-dev python3-dev g++ cmake gnupg && \
apt-get clean && rm -rf /var/lib/apt/lists/*
python3.10 python3.10-venv python3.10-distutils python3-pip \
git wget curl unzip ca-certificates \
build-essential g++ cmake \
libsndfile1 libffi-dev \
&& rm -rf /var/lib/apt/lists/*
# Add deadsnakes PPA for Python 3.10
RUN add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && apt-get install -y python3.10 python3.10-dev python3.10-distutils && \
apt-get clean && rm -rf /var/lib/apt/lists/*
# Use python3.10 everywhere
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \
&& update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
# Install pip for Python 3.10
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
# Python deps
COPY requirements.txt /tmp/requirements.txt
RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt
# Add NVIDIA's CUDA repository and install CUDA 12.4 Toolkit
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
wget https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \
dpkg -i cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \
cp /var/cuda-repo-ubuntu2004-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \
apt-get -y --allow-unauthenticated install cuda-toolkit-12-4 && \
apt-get -y --allow-unauthenticated install cuda-drivers && \
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \
rm -f cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb
# Install CuDNN 9.3
RUN wget https://developer.download.nvidia.com/compute/cudnn/9.3.0/local_installers/cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \
dpkg -i cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \
cp /var/cudnn-local-repo-ubuntu2004-9.3.0/cudnn-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \
apt-get -y --allow-unauthenticated install cudnn && \
apt-get -y --allow-unauthenticated install cudnn-cuda-12 && \
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \
rm -f cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb
# Install Python dependencies from requirements.txt
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/requirements.txt /tmp/requirements.txt
RUN pip install --no-cache-dir -r /tmp/requirements.txt
# Ensure numpy is installed for Python 3.10
RUN python3.10 -m pip install --no-cache-dir numpy==1.26.4
# Create a data directory for external mapping
# Workspace + notebook fallback
RUN mkdir -p /data
WORKDIR /data
COPY microWakeWord_training_notebook.ipynb /root/
# Copy the notebooks to a fallback location in the container
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/basic_training_notebook.ipynb /root/basic_training_notebook.ipynb
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/advanced_training_notebook.ipynb /root/advanced_training_notebook.ipynb
# Add the startup script from GitHub
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/startup.sh /usr/local/bin/startup.sh
# Startup script (copies default notebook if missing, then launches JupyterLab)
COPY startup.sh /usr/local/bin/startup.sh
RUN chmod +x /usr/local/bin/startup.sh
# Ensure /data is the default directory for Jupyter
WORKDIR /data
# Expose the Jupyter Notebook port
EXPOSE 8888
# Run the startup script and start Jupyter Notebook
CMD ["/bin/bash", "-c", "/usr/local/bin/startup.sh && jupyter notebook --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --notebook-dir=/data"]
# Launch Lab (tokenless for local dev; set a token if you want auth)
CMD ["/bin/bash", "-lc", "/usr/local/bin/startup.sh && \
exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root \
--ServerApp.token='' --ServerApp.password='' --ServerApp.root_dir=/data"]

View File

@@ -1,196 +1,33 @@
absl-py==2.1.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.11
aiosignal==1.3.2
anyio==4.7.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==3.0.0
astunparse==1.6.3
async-lru==2.0.4
async-timeout==5.0.1
attrs==24.3.0
audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f
audiomentations==0.38.0
audioread==3.0.1
babel==2.16.0
beautifulsoup4==4.12.3
bidict==0.23.1
bitstruct==8.19.0
bleach==6.2.0
cachetools==5.5.0
certifi==2024.12.14
cffi==1.17.1
charset-normalizer==3.4.1
comm==0.2.2
datasets==3.2.0
debugpy==1.8.11
decorator==5.1.1
defusedxml==0.7.1
dill==0.3.8
exceptiongroup==1.2.2
executing==2.1.0
fastjsonschema==2.21.1
filelock==3.16.1
flatbuffers==24.12.23
fqdn==1.5.1
frozenlist==1.5.0
fsspec==2024.9.0
gast==0.4.0
google-auth==2.37.0
google-auth-oauthlib==1.0.0
google-pasta==0.2.0
grpcio==1.68.1
h11==0.14.0
h5py==3.12.1
httpcore==1.0.7
httpx==0.28.1
huggingface-hub==0.27.0
idna==3.10
ipykernel==6.29.5
ipython==8.31.0
ipywidgets==8.1.5
isoduration==20.11.0
jedi==0.19.2
Jinja2==3.1.5
joblib==1.4.2
json5==0.10.0
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
jupyter==1.1.1
jupyter-console==6.6.3
jupyter-events==0.11.0
jupyter-lsp==2.2.5
jupyter_client==8.6.3
jupyter_core==5.7.2
jupyter_server==2.15.0
jupyter_server_terminals==0.5.3
jupyterlab==4.3.4
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
jupyterlab_widgets==3.0.13
keras==3.7.0
lazy_loader==0.4
libclang==18.1.1
librosa==0.10.2.post1
llvmlite==0.43.0
Markdown==3.7
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib-inline==0.1.7
mdurl==0.1.2
-e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword
mistune==3.0.2
ml-dtypes==0.4.1
mmap_ninja==0.7.4
more-itertools==8.14.0
mpmath==1.3.0
msgpack==1.1.0
multidict==6.1.0
multiprocess==0.70.16
namex==0.0.8
nbclient==0.10.2
nbconvert==7.16.4
nbformat==5.10.4
nest-asyncio==1.6.0
networkx==3.4.2
notebook==7.3.2
notebook_shim==0.2.4
numba==0.60.0
numpy==1.26.4
numpy-minmax==0.3.1
numpy-rms==0.4.2
nvidia-cublas-cu12==12.4.5.8
nvidia-cuda-cupti-cu12==12.4.127
nvidia-cuda-nvrtc-cu12==12.4.127
nvidia-cuda-runtime-cu12==12.4.127
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.2.1.3
nvidia-curand-cu12==10.3.5.147
nvidia-cusolver-cu12==11.6.1.9
nvidia-cusparse-cu12==12.3.1.170
nvidia-nccl-cu12==2.21.5
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.4.127
oauthlib==3.2.2
opt_einsum==3.4.0
optree==0.13.1
overrides==7.7.0
packaging==24.2
pandas==2.2.3
pandocfilters==1.5.1
parso==0.8.4
pexpect==4.9.0
piper_phonemize_cross==1.2.1
platformdirs==4.3.6
pooch==1.8.2
pprintpp==0.4.0
prometheus_client==0.21.1
prompt_toolkit==3.0.48
propcache==0.2.1
protobuf==4.25.5
psutil==6.1.1
ptyprocess==0.7.0
pure_eval==0.2.3
pyarrow==18.1.0
pyasn1==0.6.1
pyasn1_modules==0.4.1
pycparser==2.22
Pygments==2.18.0
pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762
python-dateutil==2.9.0.post0
python-json-logger==3.2.1
pytz==2024.2
PyYAML==6.0.2
pyzmq==26.2.0
referencing==0.35.1
requests==2.32.3
requests-oauthlib==2.0.0
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rich==13.9.4
rpds-py==0.22.3
rsa==4.9
scikit-learn==1.6.0
scipy==1.12.0
Send2Trash==1.8.3
six==1.17.0
sniffio==1.3.1
soundfile==0.12.1
soupsieve==2.6
soxr==0.5.0.post1
stack-data==0.6.3
sympy==1.13.1
tensorboard==2.18.0
tensorboard-data-server==0.7.2
tensorflow==2.18.0
tensorflow-estimator==2.13.0
tensorflow-io-gcs-filesystem==0.37.1
termcolor==2.5.0
terminado==0.18.1
threadpoolctl==3.5.0
tinycss2==1.4.0
tomli==2.2.1
# --- Core training (Microwakeword) ---
torch==2.5.1
torchaudio==2.5.1
tornado==6.4.2
tqdm==4.67.1
traitlets==5.14.3
triton==3.1.0
types-python-dateutil==2.9.0.20241206
typing_extensions==4.12.2
tzdata==2024.2
uri-template==1.3.0
urllib3==2.3.0
wcwidth==0.2.13
webcolors==24.11.1
webencodings==0.5.1
numpy==1.26.4
scipy==1.12.0
librosa==0.10.2.post1
soundfile==0.12.1
soxr==0.5.0.post1
audiomentations==0.38.0
webrtcvad==2.0.10
websocket-client==1.8.0
Werkzeug==3.1.3
widgetsnbextension==4.0.13
wrapt==1.17.0
xxhash==3.5.0
yarl==1.18.3
tqdm==4.67.1
scikit-learn==1.6.0
numba==0.60.0
joblib==1.4.2
pandas==2.2.3
# feature extractors + metadata helpers your repo uses
pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762
audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f
bitstruct==8.19.0
# --- Piper sample generation ---
onnxruntime-gpu>=1.16.0
piper-phonemize-cross==1.2.1
# --- Notebook / tooling (keep light) ---
ipykernel==6.29.5
jupyterlab==4.3.4
ipywidgets==8.1.5
matplotlib-inline==0.1.7
rich==13.9.4
# --- microWakeWord ---
-e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword

View File

@@ -1,19 +1,23 @@
#!/bin/bash
#!/usr/bin/env bash
set -euo pipefail
# Check if basic training notebook exists in /data
if [ ! -f /data/basic_training_notebook.ipynb ]; then
echo "Basic training notebook not found in /data. Copying the default notebook..."
cp /root/basic_training_notebook.ipynb /data/basic_training_notebook.ipynb
else
echo "Basic training notebook already exists in /data. Skipping copy."
: "${NB_UID:=0}"
: "${NB_GID:=0}"
umask 002
NOTEBOOK_SRC="/root/microWakeWord_training_notebook.ipynb"
NOTEBOOK_DST="/data/microWakeWord_training_notebook.ipynb"
mkdir -p /data /data/generated_samples
if [[ ! -f "$NOTEBOOK_DST" ]]; then
echo "No training notebook found in /data; copying default…"
cp -n "$NOTEBOOK_SRC" "$NOTEBOOK_DST"
fi
# Check if advanced training notebook exists in /data
if [ ! -f /data/advanced_training_notebook.ipynb ]; then
echo "Advanced training notebook not found in /data. Copying the default notebook..."
cp /root/advanced_training_notebook.ipynb /data/advanced_training_notebook.ipynb
else
echo "Advanced training notebook already exists in /data. Skipping copy."
# Try to align ownership for convenience (ignore errors if not permitted)
if [[ "$NB_UID" != "0" || "$NB_GID" != "0" ]]; then
chown -R "$NB_UID:$NB_GID" /data || true
fi
exec "$@"