mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Add files via upload
This commit is contained in:
83
dockerfile
83
dockerfile
@@ -1,69 +1,38 @@
|
||||
# Use Ubuntu 20.04 as the base image
|
||||
FROM ubuntu:20.04
|
||||
# CUDA + cuDNN userspace from NVIDIA (no manual repo installs needed)
|
||||
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
|
||||
|
||||
# Set environment variables for non-interactive installations and Python buffering
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1
|
||||
|
||||
# Install system dependencies
|
||||
# System deps
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
wget curl git unzip software-properties-common build-essential \
|
||||
libsndfile1 libffi-dev python3-dev g++ cmake gnupg && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
python3.10 python3.10-venv python3.10-distutils python3-pip \
|
||||
git wget curl unzip ca-certificates \
|
||||
build-essential g++ cmake \
|
||||
libsndfile1 libffi-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Add deadsnakes PPA for Python 3.10
|
||||
RUN add-apt-repository ppa:deadsnakes/ppa && \
|
||||
apt-get update && apt-get install -y python3.10 python3.10-dev python3.10-distutils && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
# Use python3.10 everywhere
|
||||
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \
|
||||
&& update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
|
||||
|
||||
# Install pip for Python 3.10
|
||||
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
|
||||
# Python deps
|
||||
COPY requirements.txt /tmp/requirements.txt
|
||||
RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt
|
||||
|
||||
# Add NVIDIA's CUDA repository and install CUDA 12.4 Toolkit
|
||||
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
|
||||
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
|
||||
wget https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \
|
||||
dpkg -i cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \
|
||||
cp /var/cuda-repo-ubuntu2004-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \
|
||||
apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \
|
||||
apt-get -y --allow-unauthenticated install cuda-toolkit-12-4 && \
|
||||
apt-get -y --allow-unauthenticated install cuda-drivers && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \
|
||||
rm -f cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb
|
||||
|
||||
# Install CuDNN 9.3
|
||||
RUN wget https://developer.download.nvidia.com/compute/cudnn/9.3.0/local_installers/cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \
|
||||
dpkg -i cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \
|
||||
cp /var/cudnn-local-repo-ubuntu2004-9.3.0/cudnn-*-keyring.gpg /usr/share/keyrings/ && \
|
||||
apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \
|
||||
apt-get -y --allow-unauthenticated install cudnn && \
|
||||
apt-get -y --allow-unauthenticated install cudnn-cuda-12 && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \
|
||||
rm -f cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb
|
||||
|
||||
# Install Python dependencies from requirements.txt
|
||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/requirements.txt /tmp/requirements.txt
|
||||
RUN pip install --no-cache-dir -r /tmp/requirements.txt
|
||||
|
||||
# Ensure numpy is installed for Python 3.10
|
||||
RUN python3.10 -m pip install --no-cache-dir numpy==1.26.4
|
||||
|
||||
# Create a data directory for external mapping
|
||||
# Workspace + notebook fallback
|
||||
RUN mkdir -p /data
|
||||
WORKDIR /data
|
||||
COPY microWakeWord_training_notebook.ipynb /root/
|
||||
|
||||
# Copy the notebooks to a fallback location in the container
|
||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/basic_training_notebook.ipynb /root/basic_training_notebook.ipynb
|
||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/advanced_training_notebook.ipynb /root/advanced_training_notebook.ipynb
|
||||
|
||||
# Add the startup script from GitHub
|
||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/startup.sh /usr/local/bin/startup.sh
|
||||
# Startup script (copies default notebook if missing, then launches JupyterLab)
|
||||
COPY startup.sh /usr/local/bin/startup.sh
|
||||
RUN chmod +x /usr/local/bin/startup.sh
|
||||
|
||||
# Ensure /data is the default directory for Jupyter
|
||||
WORKDIR /data
|
||||
|
||||
# Expose the Jupyter Notebook port
|
||||
EXPOSE 8888
|
||||
|
||||
# Run the startup script and start Jupyter Notebook
|
||||
CMD ["/bin/bash", "-c", "/usr/local/bin/startup.sh && jupyter notebook --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --notebook-dir=/data"]
|
||||
# Launch Lab (tokenless for local dev; set a token if you want auth)
|
||||
CMD ["/bin/bash", "-lc", "/usr/local/bin/startup.sh && \
|
||||
exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root \
|
||||
--ServerApp.token='' --ServerApp.password='' --ServerApp.root_dir=/data"]
|
||||
223
requirements.txt
223
requirements.txt
@@ -1,196 +1,33 @@
|
||||
absl-py==2.1.0
|
||||
aiohappyeyeballs==2.4.4
|
||||
aiohttp==3.11.11
|
||||
aiosignal==1.3.2
|
||||
anyio==4.7.0
|
||||
argon2-cffi==23.1.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
arrow==1.3.0
|
||||
asttokens==3.0.0
|
||||
astunparse==1.6.3
|
||||
async-lru==2.0.4
|
||||
async-timeout==5.0.1
|
||||
attrs==24.3.0
|
||||
audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f
|
||||
audiomentations==0.38.0
|
||||
audioread==3.0.1
|
||||
babel==2.16.0
|
||||
beautifulsoup4==4.12.3
|
||||
bidict==0.23.1
|
||||
bitstruct==8.19.0
|
||||
bleach==6.2.0
|
||||
cachetools==5.5.0
|
||||
certifi==2024.12.14
|
||||
cffi==1.17.1
|
||||
charset-normalizer==3.4.1
|
||||
comm==0.2.2
|
||||
datasets==3.2.0
|
||||
debugpy==1.8.11
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
dill==0.3.8
|
||||
exceptiongroup==1.2.2
|
||||
executing==2.1.0
|
||||
fastjsonschema==2.21.1
|
||||
filelock==3.16.1
|
||||
flatbuffers==24.12.23
|
||||
fqdn==1.5.1
|
||||
frozenlist==1.5.0
|
||||
fsspec==2024.9.0
|
||||
gast==0.4.0
|
||||
google-auth==2.37.0
|
||||
google-auth-oauthlib==1.0.0
|
||||
google-pasta==0.2.0
|
||||
grpcio==1.68.1
|
||||
h11==0.14.0
|
||||
h5py==3.12.1
|
||||
httpcore==1.0.7
|
||||
httpx==0.28.1
|
||||
huggingface-hub==0.27.0
|
||||
idna==3.10
|
||||
ipykernel==6.29.5
|
||||
ipython==8.31.0
|
||||
ipywidgets==8.1.5
|
||||
isoduration==20.11.0
|
||||
jedi==0.19.2
|
||||
Jinja2==3.1.5
|
||||
joblib==1.4.2
|
||||
json5==0.10.0
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2024.10.1
|
||||
jupyter==1.1.1
|
||||
jupyter-console==6.6.3
|
||||
jupyter-events==0.11.0
|
||||
jupyter-lsp==2.2.5
|
||||
jupyter_client==8.6.3
|
||||
jupyter_core==5.7.2
|
||||
jupyter_server==2.15.0
|
||||
jupyter_server_terminals==0.5.3
|
||||
jupyterlab==4.3.4
|
||||
jupyterlab_pygments==0.3.0
|
||||
jupyterlab_server==2.27.3
|
||||
jupyterlab_widgets==3.0.13
|
||||
keras==3.7.0
|
||||
lazy_loader==0.4
|
||||
libclang==18.1.1
|
||||
librosa==0.10.2.post1
|
||||
llvmlite==0.43.0
|
||||
Markdown==3.7
|
||||
markdown-it-py==3.0.0
|
||||
MarkupSafe==3.0.2
|
||||
matplotlib-inline==0.1.7
|
||||
mdurl==0.1.2
|
||||
-e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword
|
||||
mistune==3.0.2
|
||||
ml-dtypes==0.4.1
|
||||
mmap_ninja==0.7.4
|
||||
more-itertools==8.14.0
|
||||
mpmath==1.3.0
|
||||
msgpack==1.1.0
|
||||
multidict==6.1.0
|
||||
multiprocess==0.70.16
|
||||
namex==0.0.8
|
||||
nbclient==0.10.2
|
||||
nbconvert==7.16.4
|
||||
nbformat==5.10.4
|
||||
nest-asyncio==1.6.0
|
||||
networkx==3.4.2
|
||||
notebook==7.3.2
|
||||
notebook_shim==0.2.4
|
||||
numba==0.60.0
|
||||
numpy==1.26.4
|
||||
numpy-minmax==0.3.1
|
||||
numpy-rms==0.4.2
|
||||
nvidia-cublas-cu12==12.4.5.8
|
||||
nvidia-cuda-cupti-cu12==12.4.127
|
||||
nvidia-cuda-nvrtc-cu12==12.4.127
|
||||
nvidia-cuda-runtime-cu12==12.4.127
|
||||
nvidia-cudnn-cu12==9.1.0.70
|
||||
nvidia-cufft-cu12==11.2.1.3
|
||||
nvidia-curand-cu12==10.3.5.147
|
||||
nvidia-cusolver-cu12==11.6.1.9
|
||||
nvidia-cusparse-cu12==12.3.1.170
|
||||
nvidia-nccl-cu12==2.21.5
|
||||
nvidia-nvjitlink-cu12==12.4.127
|
||||
nvidia-nvtx-cu12==12.4.127
|
||||
oauthlib==3.2.2
|
||||
opt_einsum==3.4.0
|
||||
optree==0.13.1
|
||||
overrides==7.7.0
|
||||
packaging==24.2
|
||||
pandas==2.2.3
|
||||
pandocfilters==1.5.1
|
||||
parso==0.8.4
|
||||
pexpect==4.9.0
|
||||
piper_phonemize_cross==1.2.1
|
||||
platformdirs==4.3.6
|
||||
pooch==1.8.2
|
||||
pprintpp==0.4.0
|
||||
prometheus_client==0.21.1
|
||||
prompt_toolkit==3.0.48
|
||||
propcache==0.2.1
|
||||
protobuf==4.25.5
|
||||
psutil==6.1.1
|
||||
ptyprocess==0.7.0
|
||||
pure_eval==0.2.3
|
||||
pyarrow==18.1.0
|
||||
pyasn1==0.6.1
|
||||
pyasn1_modules==0.4.1
|
||||
pycparser==2.22
|
||||
Pygments==2.18.0
|
||||
pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762
|
||||
python-dateutil==2.9.0.post0
|
||||
python-json-logger==3.2.1
|
||||
pytz==2024.2
|
||||
PyYAML==6.0.2
|
||||
pyzmq==26.2.0
|
||||
referencing==0.35.1
|
||||
requests==2.32.3
|
||||
requests-oauthlib==2.0.0
|
||||
rfc3339-validator==0.1.4
|
||||
rfc3986-validator==0.1.1
|
||||
rich==13.9.4
|
||||
rpds-py==0.22.3
|
||||
rsa==4.9
|
||||
scikit-learn==1.6.0
|
||||
scipy==1.12.0
|
||||
Send2Trash==1.8.3
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
soundfile==0.12.1
|
||||
soupsieve==2.6
|
||||
soxr==0.5.0.post1
|
||||
stack-data==0.6.3
|
||||
sympy==1.13.1
|
||||
tensorboard==2.18.0
|
||||
tensorboard-data-server==0.7.2
|
||||
tensorflow==2.18.0
|
||||
tensorflow-estimator==2.13.0
|
||||
tensorflow-io-gcs-filesystem==0.37.1
|
||||
termcolor==2.5.0
|
||||
terminado==0.18.1
|
||||
threadpoolctl==3.5.0
|
||||
tinycss2==1.4.0
|
||||
tomli==2.2.1
|
||||
# --- Core training (Microwakeword) ---
|
||||
torch==2.5.1
|
||||
torchaudio==2.5.1
|
||||
tornado==6.4.2
|
||||
tqdm==4.67.1
|
||||
traitlets==5.14.3
|
||||
triton==3.1.0
|
||||
types-python-dateutil==2.9.0.20241206
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2024.2
|
||||
uri-template==1.3.0
|
||||
urllib3==2.3.0
|
||||
wcwidth==0.2.13
|
||||
webcolors==24.11.1
|
||||
webencodings==0.5.1
|
||||
numpy==1.26.4
|
||||
scipy==1.12.0
|
||||
librosa==0.10.2.post1
|
||||
soundfile==0.12.1
|
||||
soxr==0.5.0.post1
|
||||
audiomentations==0.38.0
|
||||
webrtcvad==2.0.10
|
||||
websocket-client==1.8.0
|
||||
Werkzeug==3.1.3
|
||||
widgetsnbextension==4.0.13
|
||||
wrapt==1.17.0
|
||||
xxhash==3.5.0
|
||||
yarl==1.18.3
|
||||
tqdm==4.67.1
|
||||
scikit-learn==1.6.0
|
||||
numba==0.60.0
|
||||
joblib==1.4.2
|
||||
pandas==2.2.3
|
||||
# feature extractors + metadata helpers your repo uses
|
||||
pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762
|
||||
audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f
|
||||
bitstruct==8.19.0
|
||||
|
||||
# --- Piper sample generation ---
|
||||
onnxruntime-gpu>=1.16.0
|
||||
piper-phonemize-cross==1.2.1
|
||||
|
||||
# --- Notebook / tooling (keep light) ---
|
||||
ipykernel==6.29.5
|
||||
jupyterlab==4.3.4
|
||||
ipywidgets==8.1.5
|
||||
matplotlib-inline==0.1.7
|
||||
rich==13.9.4
|
||||
|
||||
# --- microWakeWord ---
|
||||
-e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword
|
||||
30
startup.sh
30
startup.sh
@@ -1,19 +1,23 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Check if basic training notebook exists in /data
|
||||
if [ ! -f /data/basic_training_notebook.ipynb ]; then
|
||||
echo "Basic training notebook not found in /data. Copying the default notebook..."
|
||||
cp /root/basic_training_notebook.ipynb /data/basic_training_notebook.ipynb
|
||||
else
|
||||
echo "Basic training notebook already exists in /data. Skipping copy."
|
||||
: "${NB_UID:=0}"
|
||||
: "${NB_GID:=0}"
|
||||
umask 002
|
||||
|
||||
NOTEBOOK_SRC="/root/microWakeWord_training_notebook.ipynb"
|
||||
NOTEBOOK_DST="/data/microWakeWord_training_notebook.ipynb"
|
||||
|
||||
mkdir -p /data /data/generated_samples
|
||||
|
||||
if [[ ! -f "$NOTEBOOK_DST" ]]; then
|
||||
echo "No training notebook found in /data; copying default…"
|
||||
cp -n "$NOTEBOOK_SRC" "$NOTEBOOK_DST"
|
||||
fi
|
||||
|
||||
# Check if advanced training notebook exists in /data
|
||||
if [ ! -f /data/advanced_training_notebook.ipynb ]; then
|
||||
echo "Advanced training notebook not found in /data. Copying the default notebook..."
|
||||
cp /root/advanced_training_notebook.ipynb /data/advanced_training_notebook.ipynb
|
||||
else
|
||||
echo "Advanced training notebook already exists in /data. Skipping copy."
|
||||
# Try to align ownership for convenience (ignore errors if not permitted)
|
||||
if [[ "$NB_UID" != "0" || "$NB_GID" != "0" ]]; then
|
||||
chown -R "$NB_UID:$NB_GID" /data || true
|
||||
fi
|
||||
|
||||
exec "$@"
|
||||
Reference in New Issue
Block a user