mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Add files via upload
This commit is contained in:
83
dockerfile
83
dockerfile
@@ -1,69 +1,38 @@
|
|||||||
# Use Ubuntu 20.04 as the base image
|
# CUDA + cuDNN userspace from NVIDIA (no manual repo installs needed)
|
||||||
FROM ubuntu:20.04
|
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
|
||||||
|
|
||||||
# Set environment variables for non-interactive installations and Python buffering
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
PYTHONUNBUFFERED=1 \
|
||||||
ENV PYTHONUNBUFFERED=1
|
PIP_NO_CACHE_DIR=1
|
||||||
|
|
||||||
# Install system dependencies
|
# System deps
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
wget curl git unzip software-properties-common build-essential \
|
python3.10 python3.10-venv python3.10-distutils python3-pip \
|
||||||
libsndfile1 libffi-dev python3-dev g++ cmake gnupg && \
|
git wget curl unzip ca-certificates \
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
build-essential g++ cmake \
|
||||||
|
libsndfile1 libffi-dev \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Add deadsnakes PPA for Python 3.10
|
# Use python3.10 everywhere
|
||||||
RUN add-apt-repository ppa:deadsnakes/ppa && \
|
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \
|
||||||
apt-get update && apt-get install -y python3.10 python3.10-dev python3.10-distutils && \
|
&& update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install pip for Python 3.10
|
# Python deps
|
||||||
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
|
COPY requirements.txt /tmp/requirements.txt
|
||||||
|
RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt
|
||||||
|
|
||||||
# Add NVIDIA's CUDA repository and install CUDA 12.4 Toolkit
|
# Workspace + notebook fallback
|
||||||
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
|
|
||||||
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
|
|
||||||
wget https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \
|
|
||||||
dpkg -i cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \
|
|
||||||
cp /var/cuda-repo-ubuntu2004-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \
|
|
||||||
apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \
|
|
||||||
apt-get -y --allow-unauthenticated install cuda-toolkit-12-4 && \
|
|
||||||
apt-get -y --allow-unauthenticated install cuda-drivers && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \
|
|
||||||
rm -f cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb
|
|
||||||
|
|
||||||
# Install CuDNN 9.3
|
|
||||||
RUN wget https://developer.download.nvidia.com/compute/cudnn/9.3.0/local_installers/cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \
|
|
||||||
dpkg -i cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \
|
|
||||||
cp /var/cudnn-local-repo-ubuntu2004-9.3.0/cudnn-*-keyring.gpg /usr/share/keyrings/ && \
|
|
||||||
apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \
|
|
||||||
apt-get -y --allow-unauthenticated install cudnn && \
|
|
||||||
apt-get -y --allow-unauthenticated install cudnn-cuda-12 && \
|
|
||||||
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \
|
|
||||||
rm -f cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb
|
|
||||||
|
|
||||||
# Install Python dependencies from requirements.txt
|
|
||||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/requirements.txt /tmp/requirements.txt
|
|
||||||
RUN pip install --no-cache-dir -r /tmp/requirements.txt
|
|
||||||
|
|
||||||
# Ensure numpy is installed for Python 3.10
|
|
||||||
RUN python3.10 -m pip install --no-cache-dir numpy==1.26.4
|
|
||||||
|
|
||||||
# Create a data directory for external mapping
|
|
||||||
RUN mkdir -p /data
|
RUN mkdir -p /data
|
||||||
|
WORKDIR /data
|
||||||
|
COPY microWakeWord_training_notebook.ipynb /root/
|
||||||
|
|
||||||
# Copy the notebooks to a fallback location in the container
|
# Startup script (copies default notebook if missing, then launches JupyterLab)
|
||||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/basic_training_notebook.ipynb /root/basic_training_notebook.ipynb
|
COPY startup.sh /usr/local/bin/startup.sh
|
||||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/advanced_training_notebook.ipynb /root/advanced_training_notebook.ipynb
|
|
||||||
|
|
||||||
# Add the startup script from GitHub
|
|
||||||
ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/startup.sh /usr/local/bin/startup.sh
|
|
||||||
RUN chmod +x /usr/local/bin/startup.sh
|
RUN chmod +x /usr/local/bin/startup.sh
|
||||||
|
|
||||||
# Ensure /data is the default directory for Jupyter
|
|
||||||
WORKDIR /data
|
|
||||||
|
|
||||||
# Expose the Jupyter Notebook port
|
|
||||||
EXPOSE 8888
|
EXPOSE 8888
|
||||||
|
|
||||||
# Run the startup script and start Jupyter Notebook
|
# Launch Lab (tokenless for local dev; set a token if you want auth)
|
||||||
CMD ["/bin/bash", "-c", "/usr/local/bin/startup.sh && jupyter notebook --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --notebook-dir=/data"]
|
CMD ["/bin/bash", "-lc", "/usr/local/bin/startup.sh && \
|
||||||
|
exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root \
|
||||||
|
--ServerApp.token='' --ServerApp.password='' --ServerApp.root_dir=/data"]
|
||||||
223
requirements.txt
223
requirements.txt
@@ -1,196 +1,33 @@
|
|||||||
absl-py==2.1.0
|
# --- Core training (Microwakeword) ---
|
||||||
aiohappyeyeballs==2.4.4
|
|
||||||
aiohttp==3.11.11
|
|
||||||
aiosignal==1.3.2
|
|
||||||
anyio==4.7.0
|
|
||||||
argon2-cffi==23.1.0
|
|
||||||
argon2-cffi-bindings==21.2.0
|
|
||||||
arrow==1.3.0
|
|
||||||
asttokens==3.0.0
|
|
||||||
astunparse==1.6.3
|
|
||||||
async-lru==2.0.4
|
|
||||||
async-timeout==5.0.1
|
|
||||||
attrs==24.3.0
|
|
||||||
audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f
|
|
||||||
audiomentations==0.38.0
|
|
||||||
audioread==3.0.1
|
|
||||||
babel==2.16.0
|
|
||||||
beautifulsoup4==4.12.3
|
|
||||||
bidict==0.23.1
|
|
||||||
bitstruct==8.19.0
|
|
||||||
bleach==6.2.0
|
|
||||||
cachetools==5.5.0
|
|
||||||
certifi==2024.12.14
|
|
||||||
cffi==1.17.1
|
|
||||||
charset-normalizer==3.4.1
|
|
||||||
comm==0.2.2
|
|
||||||
datasets==3.2.0
|
|
||||||
debugpy==1.8.11
|
|
||||||
decorator==5.1.1
|
|
||||||
defusedxml==0.7.1
|
|
||||||
dill==0.3.8
|
|
||||||
exceptiongroup==1.2.2
|
|
||||||
executing==2.1.0
|
|
||||||
fastjsonschema==2.21.1
|
|
||||||
filelock==3.16.1
|
|
||||||
flatbuffers==24.12.23
|
|
||||||
fqdn==1.5.1
|
|
||||||
frozenlist==1.5.0
|
|
||||||
fsspec==2024.9.0
|
|
||||||
gast==0.4.0
|
|
||||||
google-auth==2.37.0
|
|
||||||
google-auth-oauthlib==1.0.0
|
|
||||||
google-pasta==0.2.0
|
|
||||||
grpcio==1.68.1
|
|
||||||
h11==0.14.0
|
|
||||||
h5py==3.12.1
|
|
||||||
httpcore==1.0.7
|
|
||||||
httpx==0.28.1
|
|
||||||
huggingface-hub==0.27.0
|
|
||||||
idna==3.10
|
|
||||||
ipykernel==6.29.5
|
|
||||||
ipython==8.31.0
|
|
||||||
ipywidgets==8.1.5
|
|
||||||
isoduration==20.11.0
|
|
||||||
jedi==0.19.2
|
|
||||||
Jinja2==3.1.5
|
|
||||||
joblib==1.4.2
|
|
||||||
json5==0.10.0
|
|
||||||
jsonpointer==3.0.0
|
|
||||||
jsonschema==4.23.0
|
|
||||||
jsonschema-specifications==2024.10.1
|
|
||||||
jupyter==1.1.1
|
|
||||||
jupyter-console==6.6.3
|
|
||||||
jupyter-events==0.11.0
|
|
||||||
jupyter-lsp==2.2.5
|
|
||||||
jupyter_client==8.6.3
|
|
||||||
jupyter_core==5.7.2
|
|
||||||
jupyter_server==2.15.0
|
|
||||||
jupyter_server_terminals==0.5.3
|
|
||||||
jupyterlab==4.3.4
|
|
||||||
jupyterlab_pygments==0.3.0
|
|
||||||
jupyterlab_server==2.27.3
|
|
||||||
jupyterlab_widgets==3.0.13
|
|
||||||
keras==3.7.0
|
|
||||||
lazy_loader==0.4
|
|
||||||
libclang==18.1.1
|
|
||||||
librosa==0.10.2.post1
|
|
||||||
llvmlite==0.43.0
|
|
||||||
Markdown==3.7
|
|
||||||
markdown-it-py==3.0.0
|
|
||||||
MarkupSafe==3.0.2
|
|
||||||
matplotlib-inline==0.1.7
|
|
||||||
mdurl==0.1.2
|
|
||||||
-e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword
|
|
||||||
mistune==3.0.2
|
|
||||||
ml-dtypes==0.4.1
|
|
||||||
mmap_ninja==0.7.4
|
|
||||||
more-itertools==8.14.0
|
|
||||||
mpmath==1.3.0
|
|
||||||
msgpack==1.1.0
|
|
||||||
multidict==6.1.0
|
|
||||||
multiprocess==0.70.16
|
|
||||||
namex==0.0.8
|
|
||||||
nbclient==0.10.2
|
|
||||||
nbconvert==7.16.4
|
|
||||||
nbformat==5.10.4
|
|
||||||
nest-asyncio==1.6.0
|
|
||||||
networkx==3.4.2
|
|
||||||
notebook==7.3.2
|
|
||||||
notebook_shim==0.2.4
|
|
||||||
numba==0.60.0
|
|
||||||
numpy==1.26.4
|
|
||||||
numpy-minmax==0.3.1
|
|
||||||
numpy-rms==0.4.2
|
|
||||||
nvidia-cublas-cu12==12.4.5.8
|
|
||||||
nvidia-cuda-cupti-cu12==12.4.127
|
|
||||||
nvidia-cuda-nvrtc-cu12==12.4.127
|
|
||||||
nvidia-cuda-runtime-cu12==12.4.127
|
|
||||||
nvidia-cudnn-cu12==9.1.0.70
|
|
||||||
nvidia-cufft-cu12==11.2.1.3
|
|
||||||
nvidia-curand-cu12==10.3.5.147
|
|
||||||
nvidia-cusolver-cu12==11.6.1.9
|
|
||||||
nvidia-cusparse-cu12==12.3.1.170
|
|
||||||
nvidia-nccl-cu12==2.21.5
|
|
||||||
nvidia-nvjitlink-cu12==12.4.127
|
|
||||||
nvidia-nvtx-cu12==12.4.127
|
|
||||||
oauthlib==3.2.2
|
|
||||||
opt_einsum==3.4.0
|
|
||||||
optree==0.13.1
|
|
||||||
overrides==7.7.0
|
|
||||||
packaging==24.2
|
|
||||||
pandas==2.2.3
|
|
||||||
pandocfilters==1.5.1
|
|
||||||
parso==0.8.4
|
|
||||||
pexpect==4.9.0
|
|
||||||
piper_phonemize_cross==1.2.1
|
|
||||||
platformdirs==4.3.6
|
|
||||||
pooch==1.8.2
|
|
||||||
pprintpp==0.4.0
|
|
||||||
prometheus_client==0.21.1
|
|
||||||
prompt_toolkit==3.0.48
|
|
||||||
propcache==0.2.1
|
|
||||||
protobuf==4.25.5
|
|
||||||
psutil==6.1.1
|
|
||||||
ptyprocess==0.7.0
|
|
||||||
pure_eval==0.2.3
|
|
||||||
pyarrow==18.1.0
|
|
||||||
pyasn1==0.6.1
|
|
||||||
pyasn1_modules==0.4.1
|
|
||||||
pycparser==2.22
|
|
||||||
Pygments==2.18.0
|
|
||||||
pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762
|
|
||||||
python-dateutil==2.9.0.post0
|
|
||||||
python-json-logger==3.2.1
|
|
||||||
pytz==2024.2
|
|
||||||
PyYAML==6.0.2
|
|
||||||
pyzmq==26.2.0
|
|
||||||
referencing==0.35.1
|
|
||||||
requests==2.32.3
|
|
||||||
requests-oauthlib==2.0.0
|
|
||||||
rfc3339-validator==0.1.4
|
|
||||||
rfc3986-validator==0.1.1
|
|
||||||
rich==13.9.4
|
|
||||||
rpds-py==0.22.3
|
|
||||||
rsa==4.9
|
|
||||||
scikit-learn==1.6.0
|
|
||||||
scipy==1.12.0
|
|
||||||
Send2Trash==1.8.3
|
|
||||||
six==1.17.0
|
|
||||||
sniffio==1.3.1
|
|
||||||
soundfile==0.12.1
|
|
||||||
soupsieve==2.6
|
|
||||||
soxr==0.5.0.post1
|
|
||||||
stack-data==0.6.3
|
|
||||||
sympy==1.13.1
|
|
||||||
tensorboard==2.18.0
|
|
||||||
tensorboard-data-server==0.7.2
|
|
||||||
tensorflow==2.18.0
|
|
||||||
tensorflow-estimator==2.13.0
|
|
||||||
tensorflow-io-gcs-filesystem==0.37.1
|
|
||||||
termcolor==2.5.0
|
|
||||||
terminado==0.18.1
|
|
||||||
threadpoolctl==3.5.0
|
|
||||||
tinycss2==1.4.0
|
|
||||||
tomli==2.2.1
|
|
||||||
torch==2.5.1
|
torch==2.5.1
|
||||||
torchaudio==2.5.1
|
torchaudio==2.5.1
|
||||||
tornado==6.4.2
|
numpy==1.26.4
|
||||||
tqdm==4.67.1
|
scipy==1.12.0
|
||||||
traitlets==5.14.3
|
librosa==0.10.2.post1
|
||||||
triton==3.1.0
|
soundfile==0.12.1
|
||||||
types-python-dateutil==2.9.0.20241206
|
soxr==0.5.0.post1
|
||||||
typing_extensions==4.12.2
|
audiomentations==0.38.0
|
||||||
tzdata==2024.2
|
|
||||||
uri-template==1.3.0
|
|
||||||
urllib3==2.3.0
|
|
||||||
wcwidth==0.2.13
|
|
||||||
webcolors==24.11.1
|
|
||||||
webencodings==0.5.1
|
|
||||||
webrtcvad==2.0.10
|
webrtcvad==2.0.10
|
||||||
websocket-client==1.8.0
|
tqdm==4.67.1
|
||||||
Werkzeug==3.1.3
|
scikit-learn==1.6.0
|
||||||
widgetsnbextension==4.0.13
|
numba==0.60.0
|
||||||
wrapt==1.17.0
|
joblib==1.4.2
|
||||||
xxhash==3.5.0
|
pandas==2.2.3
|
||||||
yarl==1.18.3
|
# feature extractors + metadata helpers your repo uses
|
||||||
|
pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762
|
||||||
|
audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f
|
||||||
|
bitstruct==8.19.0
|
||||||
|
|
||||||
|
# --- Piper sample generation ---
|
||||||
|
onnxruntime-gpu>=1.16.0
|
||||||
|
piper-phonemize-cross==1.2.1
|
||||||
|
|
||||||
|
# --- Notebook / tooling (keep light) ---
|
||||||
|
ipykernel==6.29.5
|
||||||
|
jupyterlab==4.3.4
|
||||||
|
ipywidgets==8.1.5
|
||||||
|
matplotlib-inline==0.1.7
|
||||||
|
rich==13.9.4
|
||||||
|
|
||||||
|
# --- microWakeWord ---
|
||||||
|
-e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword
|
||||||
30
startup.sh
30
startup.sh
@@ -1,19 +1,23 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
# Check if basic training notebook exists in /data
|
: "${NB_UID:=0}"
|
||||||
if [ ! -f /data/basic_training_notebook.ipynb ]; then
|
: "${NB_GID:=0}"
|
||||||
echo "Basic training notebook not found in /data. Copying the default notebook..."
|
umask 002
|
||||||
cp /root/basic_training_notebook.ipynb /data/basic_training_notebook.ipynb
|
|
||||||
else
|
NOTEBOOK_SRC="/root/microWakeWord_training_notebook.ipynb"
|
||||||
echo "Basic training notebook already exists in /data. Skipping copy."
|
NOTEBOOK_DST="/data/microWakeWord_training_notebook.ipynb"
|
||||||
|
|
||||||
|
mkdir -p /data /data/generated_samples
|
||||||
|
|
||||||
|
if [[ ! -f "$NOTEBOOK_DST" ]]; then
|
||||||
|
echo "No training notebook found in /data; copying default…"
|
||||||
|
cp -n "$NOTEBOOK_SRC" "$NOTEBOOK_DST"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if advanced training notebook exists in /data
|
# Try to align ownership for convenience (ignore errors if not permitted)
|
||||||
if [ ! -f /data/advanced_training_notebook.ipynb ]; then
|
if [[ "$NB_UID" != "0" || "$NB_GID" != "0" ]]; then
|
||||||
echo "Advanced training notebook not found in /data. Copying the default notebook..."
|
chown -R "$NB_UID:$NB_GID" /data || true
|
||||||
cp /root/advanced_training_notebook.ipynb /data/advanced_training_notebook.ipynb
|
|
||||||
else
|
|
||||||
echo "Advanced training notebook already exists in /data. Skipping copy."
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exec "$@"
|
exec "$@"
|
||||||
Reference in New Issue
Block a user