From 7137c55482bb0c1cbeb9df30d5ace7ef1f1de742 Mon Sep 17 00:00:00 2001 From: Tater Totterson Date: Fri, 26 Sep 2025 19:35:09 -0500 Subject: [PATCH] Add files via upload --- dockerfile | 83 ++++++------------ requirements.txt | 223 +++++++---------------------------------------- startup.sh | 32 ++++--- 3 files changed, 74 insertions(+), 264 deletions(-) diff --git a/dockerfile b/dockerfile index ed2e224..c02cbc1 100644 --- a/dockerfile +++ b/dockerfile @@ -1,69 +1,38 @@ -# Use Ubuntu 20.04 as the base image -FROM ubuntu:20.04 +# CUDA + cuDNN userspace from NVIDIA (no manual repo installs needed) +FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 -# Set environment variables for non-interactive installations and Python buffering -ENV DEBIAN_FRONTEND=noninteractive -ENV PYTHONUNBUFFERED=1 +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 -# Install system dependencies +# System deps RUN apt-get update && apt-get install -y --no-install-recommends \ - wget curl git unzip software-properties-common build-essential \ - libsndfile1 libffi-dev python3-dev g++ cmake gnupg && \ - apt-get clean && rm -rf /var/lib/apt/lists/* + python3.10 python3.10-venv python3.10-distutils python3-pip \ + git wget curl unzip ca-certificates \ + build-essential g++ cmake \ + libsndfile1 libffi-dev \ + && rm -rf /var/lib/apt/lists/* -# Add deadsnakes PPA for Python 3.10 -RUN add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && apt-get install -y python3.10 python3.10-dev python3.10-distutils && \ - apt-get clean && rm -rf /var/lib/apt/lists/* +# Use python3.10 everywhere +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \ + && update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 -# Install pip for Python 3.10 -RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 +# Python deps +COPY requirements.txt /tmp/requirements.txt +RUN pip install --upgrade pip && pip install -r /tmp/requirements.txt -# Add NVIDIA's CUDA repository and install CUDA 12.4 Toolkit -RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ - mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - wget https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \ - dpkg -i cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb && \ - cp /var/cuda-repo-ubuntu2004-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \ - apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \ - apt-get -y --allow-unauthenticated install cuda-toolkit-12-4 && \ - apt-get -y --allow-unauthenticated install cuda-drivers && \ - apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \ - rm -f cuda-repo-ubuntu2004-12-4-local_12.4.0-550.54.14-1_amd64.deb - -# Install CuDNN 9.3 -RUN wget https://developer.download.nvidia.com/compute/cudnn/9.3.0/local_installers/cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \ - dpkg -i cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb && \ - cp /var/cudnn-local-repo-ubuntu2004-9.3.0/cudnn-*-keyring.gpg /usr/share/keyrings/ && \ - apt-get update -o Acquire::AllowInsecureRepositories=true -o Acquire::AllowDowngradeToInsecureRepositories=true && \ - apt-get -y --allow-unauthenticated install cudnn && \ - apt-get -y --allow-unauthenticated install cudnn-cuda-12 && \ - apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* && \ - rm -f cudnn-local-repo-ubuntu2004-9.3.0_1.0-1_amd64.deb - -# Install Python dependencies from requirements.txt -ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt - -# Ensure numpy is installed for Python 3.10 -RUN python3.10 -m pip install --no-cache-dir numpy==1.26.4 - -# Create a data directory for external mapping +# Workspace + notebook fallback RUN mkdir -p /data +WORKDIR /data +COPY microWakeWord_training_notebook.ipynb /root/ -# Copy the notebooks to a fallback location in the container -ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/basic_training_notebook.ipynb /root/basic_training_notebook.ipynb -ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/advanced_training_notebook.ipynb /root/advanced_training_notebook.ipynb - -# Add the startup script from GitHub -ADD https://raw.githubusercontent.com/MasterPhooey/MicroWakeWord-Trainer-Docker/refs/heads/main/startup.sh /usr/local/bin/startup.sh +# Startup script (copies default notebook if missing, then launches JupyterLab) +COPY startup.sh /usr/local/bin/startup.sh RUN chmod +x /usr/local/bin/startup.sh -# Ensure /data is the default directory for Jupyter -WORKDIR /data - -# Expose the Jupyter Notebook port EXPOSE 8888 -# Run the startup script and start Jupyter Notebook -CMD ["/bin/bash", "-c", "/usr/local/bin/startup.sh && jupyter notebook --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --notebook-dir=/data"] +# Launch Lab (tokenless for local dev; set a token if you want auth) +CMD ["/bin/bash", "-lc", "/usr/local/bin/startup.sh && \ + exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root \ + --ServerApp.token='' --ServerApp.password='' --ServerApp.root_dir=/data"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 48ae8c1..09679b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,196 +1,33 @@ -absl-py==2.1.0 -aiohappyeyeballs==2.4.4 -aiohttp==3.11.11 -aiosignal==1.3.2 -anyio==4.7.0 -argon2-cffi==23.1.0 -argon2-cffi-bindings==21.2.0 -arrow==1.3.0 -asttokens==3.0.0 -astunparse==1.6.3 -async-lru==2.0.4 -async-timeout==5.0.1 -attrs==24.3.0 -audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f -audiomentations==0.38.0 -audioread==3.0.1 -babel==2.16.0 -beautifulsoup4==4.12.3 -bidict==0.23.1 -bitstruct==8.19.0 -bleach==6.2.0 -cachetools==5.5.0 -certifi==2024.12.14 -cffi==1.17.1 -charset-normalizer==3.4.1 -comm==0.2.2 -datasets==3.2.0 -debugpy==1.8.11 -decorator==5.1.1 -defusedxml==0.7.1 -dill==0.3.8 -exceptiongroup==1.2.2 -executing==2.1.0 -fastjsonschema==2.21.1 -filelock==3.16.1 -flatbuffers==24.12.23 -fqdn==1.5.1 -frozenlist==1.5.0 -fsspec==2024.9.0 -gast==0.4.0 -google-auth==2.37.0 -google-auth-oauthlib==1.0.0 -google-pasta==0.2.0 -grpcio==1.68.1 -h11==0.14.0 -h5py==3.12.1 -httpcore==1.0.7 -httpx==0.28.1 -huggingface-hub==0.27.0 -idna==3.10 -ipykernel==6.29.5 -ipython==8.31.0 -ipywidgets==8.1.5 -isoduration==20.11.0 -jedi==0.19.2 -Jinja2==3.1.5 -joblib==1.4.2 -json5==0.10.0 -jsonpointer==3.0.0 -jsonschema==4.23.0 -jsonschema-specifications==2024.10.1 -jupyter==1.1.1 -jupyter-console==6.6.3 -jupyter-events==0.11.0 -jupyter-lsp==2.2.5 -jupyter_client==8.6.3 -jupyter_core==5.7.2 -jupyter_server==2.15.0 -jupyter_server_terminals==0.5.3 -jupyterlab==4.3.4 -jupyterlab_pygments==0.3.0 -jupyterlab_server==2.27.3 -jupyterlab_widgets==3.0.13 -keras==3.7.0 -lazy_loader==0.4 -libclang==18.1.1 -librosa==0.10.2.post1 -llvmlite==0.43.0 -Markdown==3.7 -markdown-it-py==3.0.0 -MarkupSafe==3.0.2 -matplotlib-inline==0.1.7 -mdurl==0.1.2 --e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword -mistune==3.0.2 -ml-dtypes==0.4.1 -mmap_ninja==0.7.4 -more-itertools==8.14.0 -mpmath==1.3.0 -msgpack==1.1.0 -multidict==6.1.0 -multiprocess==0.70.16 -namex==0.0.8 -nbclient==0.10.2 -nbconvert==7.16.4 -nbformat==5.10.4 -nest-asyncio==1.6.0 -networkx==3.4.2 -notebook==7.3.2 -notebook_shim==0.2.4 -numba==0.60.0 -numpy==1.26.4 -numpy-minmax==0.3.1 -numpy-rms==0.4.2 -nvidia-cublas-cu12==12.4.5.8 -nvidia-cuda-cupti-cu12==12.4.127 -nvidia-cuda-nvrtc-cu12==12.4.127 -nvidia-cuda-runtime-cu12==12.4.127 -nvidia-cudnn-cu12==9.1.0.70 -nvidia-cufft-cu12==11.2.1.3 -nvidia-curand-cu12==10.3.5.147 -nvidia-cusolver-cu12==11.6.1.9 -nvidia-cusparse-cu12==12.3.1.170 -nvidia-nccl-cu12==2.21.5 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.4.127 -oauthlib==3.2.2 -opt_einsum==3.4.0 -optree==0.13.1 -overrides==7.7.0 -packaging==24.2 -pandas==2.2.3 -pandocfilters==1.5.1 -parso==0.8.4 -pexpect==4.9.0 -piper_phonemize_cross==1.2.1 -platformdirs==4.3.6 -pooch==1.8.2 -pprintpp==0.4.0 -prometheus_client==0.21.1 -prompt_toolkit==3.0.48 -propcache==0.2.1 -protobuf==4.25.5 -psutil==6.1.1 -ptyprocess==0.7.0 -pure_eval==0.2.3 -pyarrow==18.1.0 -pyasn1==0.6.1 -pyasn1_modules==0.4.1 -pycparser==2.22 -Pygments==2.18.0 -pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762 -python-dateutil==2.9.0.post0 -python-json-logger==3.2.1 -pytz==2024.2 -PyYAML==6.0.2 -pyzmq==26.2.0 -referencing==0.35.1 -requests==2.32.3 -requests-oauthlib==2.0.0 -rfc3339-validator==0.1.4 -rfc3986-validator==0.1.1 -rich==13.9.4 -rpds-py==0.22.3 -rsa==4.9 -scikit-learn==1.6.0 -scipy==1.12.0 -Send2Trash==1.8.3 -six==1.17.0 -sniffio==1.3.1 -soundfile==0.12.1 -soupsieve==2.6 -soxr==0.5.0.post1 -stack-data==0.6.3 -sympy==1.13.1 -tensorboard==2.18.0 -tensorboard-data-server==0.7.2 -tensorflow==2.18.0 -tensorflow-estimator==2.13.0 -tensorflow-io-gcs-filesystem==0.37.1 -termcolor==2.5.0 -terminado==0.18.1 -threadpoolctl==3.5.0 -tinycss2==1.4.0 -tomli==2.2.1 +# --- Core training (Microwakeword) --- torch==2.5.1 torchaudio==2.5.1 -tornado==6.4.2 -tqdm==4.67.1 -traitlets==5.14.3 -triton==3.1.0 -types-python-dateutil==2.9.0.20241206 -typing_extensions==4.12.2 -tzdata==2024.2 -uri-template==1.3.0 -urllib3==2.3.0 -wcwidth==0.2.13 -webcolors==24.11.1 -webencodings==0.5.1 +numpy==1.26.4 +scipy==1.12.0 +librosa==0.10.2.post1 +soundfile==0.12.1 +soxr==0.5.0.post1 +audiomentations==0.38.0 webrtcvad==2.0.10 -websocket-client==1.8.0 -Werkzeug==3.1.3 -widgetsnbextension==4.0.13 -wrapt==1.17.0 -xxhash==3.5.0 -yarl==1.18.3 +tqdm==4.67.1 +scikit-learn==1.6.0 +numba==0.60.0 +joblib==1.4.2 +pandas==2.2.3 +# feature extractors + metadata helpers your repo uses +pymicro_features @ git+https://github.com/puddly/pymicro-features@e1d3f88183e12bb8af2df9e399ea157af7393762 +audio-metadata @ git+https://github.com/whatsnowplaying/audio-metadata@d4ebb238e6a401bb1a5aaaac60c9e2b3cb30929f +bitstruct==8.19.0 + +# --- Piper sample generation --- +onnxruntime-gpu>=1.16.0 +piper-phonemize-cross==1.2.1 + +# --- Notebook / tooling (keep light) --- +ipykernel==6.29.5 +jupyterlab==4.3.4 +ipywidgets==8.1.5 +matplotlib-inline==0.1.7 +rich==13.9.4 + +# --- microWakeWord --- +-e git+https://github.com/kahrendt/microWakeWord@ac6502bf48b5e372c47ed509f5f5ca181e6d50bb#egg=microwakeword \ No newline at end of file diff --git a/startup.sh b/startup.sh index 20ed0a9..92dab09 100644 --- a/startup.sh +++ b/startup.sh @@ -1,19 +1,23 @@ -#!/bin/bash +#!/usr/bin/env bash +set -euo pipefail -# Check if basic training notebook exists in /data -if [ ! -f /data/basic_training_notebook.ipynb ]; then - echo "Basic training notebook not found in /data. Copying the default notebook..." - cp /root/basic_training_notebook.ipynb /data/basic_training_notebook.ipynb -else - echo "Basic training notebook already exists in /data. Skipping copy." +: "${NB_UID:=0}" +: "${NB_GID:=0}" +umask 002 + +NOTEBOOK_SRC="/root/microWakeWord_training_notebook.ipynb" +NOTEBOOK_DST="/data/microWakeWord_training_notebook.ipynb" + +mkdir -p /data /data/generated_samples + +if [[ ! -f "$NOTEBOOK_DST" ]]; then + echo "No training notebook found in /data; copying default…" + cp -n "$NOTEBOOK_SRC" "$NOTEBOOK_DST" fi -# Check if advanced training notebook exists in /data -if [ ! -f /data/advanced_training_notebook.ipynb ]; then - echo "Advanced training notebook not found in /data. Copying the default notebook..." - cp /root/advanced_training_notebook.ipynb /data/advanced_training_notebook.ipynb -else - echo "Advanced training notebook already exists in /data. Skipping copy." +# Try to align ownership for convenience (ignore errors if not permitted) +if [[ "$NB_UID" != "0" || "$NB_GID" != "0" ]]; then + chown -R "$NB_UID:$NB_GID" /data || true fi -exec "$@" +exec "$@" \ No newline at end of file