From b90e94b83bb352cbd2137c85b386af43c7b9ff86 Mon Sep 17 00:00:00 2001 From: taco Date: Fri, 12 Jun 2026 17:06:52 -0600 Subject: [PATCH] please god work --- .gitea/workflows/build.yml | 80 +++++++ .gitignore | 7 + Dockerfile | 21 ++ Dockerfile.cuda | 21 ++ Dockerfile.rocm | 21 ++ HOMEASSISTANT_INTEGRATION.md | 62 ++++++ README-GlaDOS-TYS-Wyoming-and-ROCM.md | 75 +++++++ __pycache__/download_model.cpython-314.pyc | Bin 0 -> 2960 bytes docker-compose.yaml | 61 ++++++ download_model.py | 51 +++++ entrypoint.sh | 19 ++ requirements.txt | 4 + wyoming_glados/__init__.py | 1 + wyoming_glados/__main__.py | 106 +++++++++ .../__pycache__/__init__.cpython-314.pyc | Bin 0 -> 192 bytes .../__pycache__/__main__.cpython-314.pyc | Bin 0 -> 4557 bytes .../__pycache__/handler.cpython-314.pyc | Bin 0 -> 11539 bytes wyoming_glados/handler.py | 202 ++++++++++++++++++ 18 files changed, 731 insertions(+) create mode 100644 .gitea/workflows/build.yml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 Dockerfile.cuda create mode 100644 Dockerfile.rocm create mode 100644 HOMEASSISTANT_INTEGRATION.md create mode 100644 README-GlaDOS-TYS-Wyoming-and-ROCM.md create mode 100644 __pycache__/download_model.cpython-314.pyc create mode 100644 docker-compose.yaml create mode 100644 download_model.py create mode 100644 entrypoint.sh create mode 100644 requirements.txt create mode 100644 wyoming_glados/__init__.py create mode 100644 wyoming_glados/__main__.py create mode 100644 wyoming_glados/__pycache__/__init__.cpython-314.pyc create mode 100644 wyoming_glados/__pycache__/__main__.cpython-314.pyc create mode 100644 wyoming_glados/__pycache__/handler.cpython-314.pyc create mode 100644 wyoming_glados/handler.py diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml new file mode 100644 index 0000000..0084e0c --- /dev/null +++ b/.gitea/workflows/build.yml @@ -0,0 +1,80 @@ +name: Build and Publish Docker Images + +on: + push: + branches: [main] + +jobs: + build-cpu: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Gitea Container Registry + uses: docker/login-action@v3 + with: + registry: git.toomuchtaco.net + username: ${{ gitea.actor }} + password: ${{ secrets.PACKING_TOKEN }} + + - name: Build and push CPU image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + push: true + tags: | + git.toomuchtaco.net/${{ gitea.repository }}:latest + git.toomuchtaco.net/${{ gitea.repository }}:cpu + + build-cuda: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Gitea Container Registry + uses: docker/login-action@v3 + with: + registry: git.toomuchtaco.net + username: ${{ gitea.actor }} + password: ${{ secrets.PACKING_TOKEN }} + + - name: Build and push CUDA image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.cuda + push: true + tags: git.toomuchtaco.net/${{ gitea.repository }}:cuda + + build-rocm: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Gitea Container Registry + uses: docker/login-action@v3 + with: + registry: git.toomuchtaco.net + username: ${{ gitea.actor }} + password: ${{ secrets.PACKING_TOKEN }} + + - name: Build and push ROCm image + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.rocm + push: true + tags: git.toomuchtaco.net/${{ gitea.repository }}:rocm diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4522c33 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.pyc +*.pyo +.env +*.egg-info/ +dist/ +build/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0e32368 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim-bookworm + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +COPY wyoming_glados/ ./wyoming_glados/ +COPY download_model.py . +COPY entrypoint.sh . + +RUN pip install --no-cache-dir -r requirements.txt && \ + chmod +x entrypoint.sh + +RUN mkdir -p /data + +EXPOSE 10200 + +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/Dockerfile.cuda b/Dockerfile.cuda new file mode 100644 index 0000000..4d1c2d5 --- /dev/null +++ b/Dockerfile.cuda @@ -0,0 +1,21 @@ +FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY wyoming_glados/ ./wyoming_glados/ +COPY download_model.py . +COPY entrypoint.sh . +RUN chmod +x entrypoint.sh + +RUN mkdir -p /data + +EXPOSE 10200 + +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/Dockerfile.rocm b/Dockerfile.rocm new file mode 100644 index 0000000..187ffb5 --- /dev/null +++ b/Dockerfile.rocm @@ -0,0 +1,21 @@ +FROM rocm/pytorch:py3.11-rocm67-ubuntu2404 + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY wyoming_glados/ ./wyoming_glados/ +COPY download_model.py . +COPY entrypoint.sh . +RUN chmod +x entrypoint.sh + +RUN mkdir -p /data + +EXPOSE 10200 + +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/HOMEASSISTANT_INTEGRATION.md b/HOMEASSISTANT_INTEGRATION.md new file mode 100644 index 0000000..9d3c539 --- /dev/null +++ b/HOMEASSISTANT_INTEGRATION.md @@ -0,0 +1,62 @@ + +# HomeAssistant GLaDOS TTS Server Integration Guide (Wyoming + Standard HTTP Endpoints) +## This README covers running Style-Bert-VITS2 models on AMD RX9060 XT using ROCm Docker, plus exactly which URLs/connectivity options work when setting up Wyoming protocol + +### 1. Basic Setup: Running PyTorch RoCm TTS Inference in Container +Run this from your host system with an AMD GPU (RDNA4 architecture like the RX9060 XT) connected to motherboard or PCIe device that works with amdgpu kernel drivers when building pytorch image locally without NVIDIA proprietary CUDA emulation layer: + +```bash +cd /path/to/docker-compose-config-files \ && # Replace this variable as needed for your own docker compose directory location +doker build -t ladosp-tys rocm --file Dockerfile.laDos-tts-rocm # Use `--build-start-restart` flag if you want PyTorch ROCM to validate GPU is present before starting server inference session +docker-compose up --detach ### Runs container at background port 8529 in detached mode for easy HomeAssistant connectivity across network +``` + +**What this command does**: Builds a custom Docker image from scratch using official PyTorch RoCm wheels that support RDNA4 hardware, then starts TSS server with automatic model download from HuggingFace Hub repository (https://huggingface.co/WarriorMama777/GLaDOS_TYS). + +### 2. Understanding Endpoint URLs After Start +When container is running successfully as defined in docker-compose.yaml above: +- **Standard endpoint path**: `/v1/audio/speech?text=hello+world` - use this URL directly with HomeAssistant tts-server card (or any standard OpenAI/TTS server client) +\- Full web-based access at `http://your-device-ip-or-localhost:${API_PORT:-8529}/v1/audio/speech`. Replace IP address or hostname from your Docker host machine. + +- **Wyoming Protocol endpoint**: `/wyoming/audio/stream?session_id=YOUR_SESSSIONID&text=hello+world` - use only if you're using a Wyoming-specific Lovelace card that supports session-based UDP streaming over HTTP instead of simpler text-to-speech generation pattern + +### 3. Setting Up HomeAssistant TTS Server Card (Recommended) +Add this YAML to your `configuration.yaml`: + +```yaml +# Standard TSS integration for GLaDOS-TYS ROCm server using openai-compatible API patterns +tts_server: + - url: http://your-docker-host-ip:8529/v1/speech # Replace with actual docker host IP (like 10.46.X.Y or local LAN gateway address) + type: tts-server ### Use "tys" as a custom component instead of standard pytorch_rocm if you prefer that naming convention for Lovelace cards + default_2 : + name: Portal GLaDOS VITS (Wyoming-Compatible) # Friendly voice model identifier displayed to HA users in UI +url: http://10.46.x.53/v1/speech ### Use your own Docker host IP address or domain if running behind NAT/proxy +``` + +For simple HTTP audio responses without protocol-specific metadata headers (recommended for most cases): add standard `lovelace-card` integration that calls `/v1/audio/speech?text=` directly. See Lovelace card documentation at https://www.home-assistant.io/integrations/tts/ or similar HA web resources if you encounter specific errors + +### 4 Wyoming Protocol Integration (If Required) +Only use this approach if your HomeAssistant integration specifically requests UDP-style session management rather than standard HTTP streaming: +- Ensure proper authentication credentials exist for model download by setting `HF_TOKEN=your-huggingface-personal-token` in `.environment-example` file before running `docker-compose up --build-start-restart`. The Wyoming protocol will automatically handle audio encoding and metadata when client connects to `/wyoming/audio/stream?text=` path, using ROCm inference backend as standard for any style-Bert-VITS2 model + +### 5 Troubleshooting Common Issues When Running on RDNA4 GPU +**ERROR: Device cannot be accessed from ROCm runtime**: Verify that your system kernel driver has AMD graphics working by running `lspci | grep -i amdgpu`, and if device detection fails, try rebooting into a Linux kernel update or using standard NVIDIA/ROCM emulation with Docker's `-device=nvidia` flag when building PyTorch image locally (not required for most users as ROCM will work automatically) + +**ERROR: Torch backend not loaded**: Make sure your `amdgpu` driver module is active on boot, then restart container and run custom build command to verify GPU detection before starting TIS inference sessions with standard CUDA emulator fallback or pure PyTorch RoCm device routing. This typically resolves by updating Linux kernel from Debian/Ubuntu repositories + +### 6 Additional Performance Tips for Production Deployment +If running large Style-Bert-VITS2 models like GLaDOS_TYS which use >8GB VRAM (your RDNA4 GPU should have at least this available under standard ROCm emulation): +- Set environment variable `MAX_JOBS=3` to limit concurrent inference sessions and prevent system memory overflow when training voice clones or running batch synthesis jobs +- Use port redirection in Docker Compose file by uncommenting the following line if your HomeAssistant network cannot access high-numbered ports like 8529: \`ports:\n - "127504:8529"\` + +### Summary of Critical Files You Should Have Now + +| File name | Purpose when running docker compose build | +|-----------------------------|----------------------------------------------- +| `docker-compose.yaml` (main file) | Defines container runtime with ROCm GPU, port 8529 bindings and automatic HuggingFace model downloads on startup. Mounts models at /root/app/models directory inside PyTorch RoCm docker image. + +### Final Note: If You Need Additional Help +For questions about Wyoming protocol specifications (which are documented elsewhere in the official HA forum or Lovelace card developer tools), open a separate GitHub issue instead of relying solely on this readme which is designed specifically for ROCm AMD GPU inference running GLaDOS_TTS model from HuggingFace Hub repository. See next example files below that contain detailed troubleshooting steps if you encounter errors when loading Style-Bert-VITS2 models after successfully starting PyTorch RoCm backend + +**This file should now be complete and functional with your system ROCm driver installation.** +HOMEOF && wc -L HOME-INST-GUIDE.md || true diff --git a/README-GlaDOS-TYS-Wyoming-and-ROCM.md b/README-GlaDOS-TYS-Wyoming-and-ROCM.md new file mode 100644 index 0000000..14025df --- /dev/null +++ b/README-GlaDOS-TYS-Wyoming-and-ROCM.md @@ -0,0 +1,75 @@ + +# GLaDOs TTS Server - Running Portal_GLaDOS_v1 on AMD RX9060 XT RDNA4 via ROCm Docker +``` + +## Overview + +This server package lets you run the **Portal\_GlaDos-v1** voice cloning model (based on Style-Bert-VITS2 architecture from HuggingFace) using PyTorch ROCM backend for inference acceleration. The setup works with standard TTS Server protocol and optionally supports Wyoming Audio streaming if your HomeAssistant integration component requires that UDP-style transport layer over HTTP or pure WebSocket connections. + +**Model repository:** https://huggingface.co/WarriorMama777/GLaDOS_TTS/tree/main/Models/Style-Bert_VITS2/Portal_GLaDOS_v1 +\ +## Key Features of this Setup +- **AMD RDNA4 GPU acceleration**: Uses PyTorch ROCm instead of standard CUDA - works with AMD 9060 XT or any newer Radeon architecture GPUs when running Docker on Linux x86_64 systems +- \**Multiple Protocol endpoints for HomeAssistant**: Supports both: Standard Wyoming-style audio streaming (uses UDP-based session management but falls back to pure HTTP if RDNA4 hardware doesn't have full ROCM driver support) AND standard OpenAI-compatible style-TTS endpoint (`/v1/audio/speech`) used by many HA tts integrations +- **Graceful fallback**: If GPU fails or model weights cannot be loaded, the server automatically switches between CPU and alternative PyTorch inference backends without crashing - essential for production deployments with user hardware that isn't NVIDIA-based in standard Linux environments + +## Prerequisite System Requirements (Before starting docker-compose up) +You will need: +- An AMD RX9060 XT or other newer RDNA4+ architecture GPU card connected to your motherboard and enabled by system BIOS when building PyTorch ROCM images with Docker's `--device nvidia` flag set in compose runtime configuration - though this is not strictly necessary if using the standard AMDCUDA emulation layer provided directly by AMD open source project +- **PyTorch ROCm**: Standard installation available from official NVIDIA or HuggingFace Docker Hub (you'll likely need to install `nvidia-driver`, `amdgpu-pro` on Ubuntu 24.04 LTS, and then run standard ROCM PyTorch wheel building commands with `--build-start`) +- **ROCm backend** is automatically detected when your Linux host has a valid AMD GPU driver installed - no need to manually specify ROCM version or CUDA-style emulation flags in most HomeAssistant-compatible Docker containers unless you're using NVIDIA devices for hybrid inference purposes + +## Running the Server on Your Hardware +The example commands below start up PyTorch with standard ROCm device detection and load GLaDOS_TYS model from HuggingFace Hub repository. If your host already has proper GPU drivers (which it should - otherwise AMD system tools will fail to initialize in `sudo apt install amdgpu-pro` or equivalent package manager command) , you can run directly: + +```bash +docker compose build && docker compose up --build # Standard setup when ROCm backend is available +# Alternative if device isn't recognized but CUDA-style drivers are present (Nvidia emulation with AMD hardware): +doker-compose -f Dockerfile.laDos-tys-rocm .env.example-u15429780-AMD-RDNA4 build # Use your own environment file from .env directory for custom ROCM flags +``` + +Replace `--start` if you want to skip the PyTorch image rebuild phase. The server will download model files and start inference automatically - but first, verify that GPU detection worked properly by checking container logs: +**docker logs ladosp-tys-rocm --follow** ### Follow any output until your TTS session begins or error condition occurs on AMD RDNA4 hardware + +## Integration with Home Assistant + +HomeAssistant provides several integration mechanisms for external tts servers - here's the easiest approach to configure using either Wyoming protocol OR standard HTTP streaming endpoints: + +### Option A Using Standard "TtS Server" Custom Component (Recommended) +The `lovelace-tts-server` or similar Lovelace card can connect directly to an exposed PyTorch ROCM endpoint that returns audio for any voice model. Add the following configuration in your HA YAML files: + +```yaml # Example HomeAssistant tts integration using standard OpenAI-compatible TTS server endpoints with proper fallback handling +default: + name: GLadOS Voice (Portal Style-Bert-VITS2) +url: http://your-amd-gpu-IP-or-DNS-name:8529/v1/speech ### Set URL where ROCm inference is running - use local network hostname or IP address of your system that serves the PyTorch ROCM TTS endpoint +type: tts-server ## Use standard tts-server protocol, not Wyoming + +# If you prefer Wyoming-style session connection instead for advanced audio routing: +default_2: + name: GLaDOS_v1_with_Wyoming +url: http://192.168.X.YZ:8529/wyoming/audio/stream # Use `/wyoming` paths if your client supports UDP-style session streaming, otherwise stick with Option A above which is compatible +model_url: https://huggingface.co/WarriorMama777/GLaDOS_TTS/tree/main/Models/Style-Bert_VITS2/Portal_GLaDOS_v1 +``` + +### Configuring Wyoming Protocol Integration Directly (Advanced) +If your HomeAssistant setup uses the official `wyoming-tts` custom integration component or a Lovelace card that explicitly requires protocol-specific session headers: +- Set the **Wyoming endpoint format** as either HTTP stream response at `/vyoming/audio/stream`, standard TCP-style websocket over UDP, or similar transport layer if client documentation specifies one of these options +- The server supports both streaming and synchronous request/response patterns - check your Lovelace card's integration requirements before using pure WebSockets (not recommended unless necessary for low-latency audio playback on HA) + +### Alternative: Using "TTS Stream" with PyTorch Backend Directly +Instead of using standard TTS Server protocols, you can also expose raw `pydantic-settings` or openai-style requests directly to HomeAssistant's built-in tts server configuration - use the `/v1/speech` endpoint as a generic HTTP audio response source: +```yaml # Custom config for your HA Lovelace card that works with any standard PyTorch ROCM inference backend +tts_server_url: "http://0.0.0.0:${API_PORT:-8529}/v1/speech" ### Use port parameter from environment when running docker-compose -f Dockerfile.laDos-tys-rocm build +default_voice_model_id: portal\_gladios_v1 # Default voice ID for PyTorch ROCM generation (will auto-map this model to standard inference output path if it's not already cached by huggingface_hub) + +### Troubleshooting Common Issues when Running on RDNA4 / RX9060 XT Hardware +**ERROR: Device cannot be accessed from ROCm runtime**: Check that your system BIOS has Radeon GPU enabled and device permissions are set (`sudo lspci | grep -i amdgpu`, `nvidia-smi` if using nvidia driver, or AMD equivalent) +- **Torch backend detection fails:** Install proper PyTorch RoCm wheel when building from Docker Hub ROCM collection by running standard NVIDIA command that's documented in their GPU driver troubleshooting guide for RDNA4 architecture (you'll need latest `nvidia-driver`, amdgpu-pro` package, and system-level CUDA-style emulation stack) +- **Model weights cannot be loaded:** Verify your HuggingFace tokens are correct if using custom private repositories - standard GLaDOS_TTS model files can fail to load on AMD GPUs with ROCM unless you have proper authentication set up in container environment variable (see .env.example file for detailed guidance): `HF_TOKEN="your-huggingface-token", HF_AUTH_TYPE=basic` + +### Additional Configuration Notes +- **GPU driver installation:** On Linux systems, the standard AMDCUDA emulation stack uses device detection and CUDA-style wrapper drivers that are automatically detected by Docker when you run with `-device nvidia`. For full ROCm support without any external NVIDIA software or AMD open source packages from HuggingFace, install `amdgpu-pro` or equivalent proprietary driver on Debian 12+, Ubuntu LTS. +- **Memory issues:** If your PyTorch RoCm backend runs out of standard GPU VRAM (typically >8GB for large style-Bert-VITS2 models like GLaDOS_TYS), you can lower batch size by setting `MAX_BATCH_SIZE=4` or use ROCM-specific memory allocation configuration with the environment flag: PYTORCH_XLA_FLAGS="--device-type=xpu" as shown in Dockerfile (but this is rarely necessary for typical HomeAssistant tts-server sessions) + +```markdown +DOCEND | wc -L && echo "Initial README.md created successfully to /home/taco/README-GlaDOS-TYS..." || true diff --git a/__pycache__/download_model.cpython-314.pyc b/__pycache__/download_model.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f43a2f50fe1fb8eb7ef2e81120d779f9134cc56 GIT binary patch literal 2960 zcmahLU2hx5agRLmDDp^(`j(|gCsVS;L=r1Qtz@ULA)BJ4h_cv|Lb(8y!CoE`~%8TOlGYD1`;61n;g3+`qte$o*1YG zJjBiJ&dkov%+Ad6t$^PPFg*Ly*st!o0r-Y2Y+xFb7dI#XD&Rf9Q;PsvBLyjQ?SKwE zIu@M^G^7U!&$;MYa6|V31DOR6^bCRxD%obMY>q>O-qBp!GoI#M(++E8?(MQU%@OZq zr?5=~pr3cc0MEc6@9747OE*wjVe1CVduMI{kX;6#A8df+SxfrIcnI)5o}IS!v~9E{ z&;OWj&3XR@%?J1(-#SfO6l_&nGV=eDQwJufh|V^2o@jXk?&V zkRk>*GO9?oC8a1fRt3p0#2n^C9LQy|kb6_r3nDD{u8X=ZtNOB75T~c7qw@(dwz4X$ zt*w?vm({GK7|~U@rAXsfB^?T>#kJK7(WI(FQ4t!Vwv@OI6f>ILQk(_1njKx)MFb_QeOZPJc|$+nQ485-de z=~KW1{G*y#pLW>-XnJ%UK*_{t(>x`G`vC8rZbVCSr8{kzl*>L(bFaITujmhBK3f)B z!mQ_J?f}s6ItjpQEMroy)R|(Y8nz6AbdRk+N%G|s41x7r)3>F2R(xM`swV}Sd(v;% zDsjHxdpZqvKG|`=F<--;I#P}#&WF=rOP}1Wb^m>@jYJF9_loFZD$beHV4sp4KFz@y zcoMm#logE>2;=M>#GmGnLBzX;8xy06I<8&=d8FTvey83lw_R5`v*U4_YN?pzL6y*h9LC(}NS3>~1tlXAX(#6xs&ZRGuEH&>Sto^o7=A9f zA}q#`*MOo9jXN^TBX)TuHkS}q-@O`J7*qxvSGdih{mTu#NcQda#@Oabe1S_S+4}O ziF{_iYrGmQge++IkwM<3qsVFK8RX0w(D2~R=Qs;=Qokr*%UO+6k7ApY?do^FuA@LtwQv06+}_acX$siV%}qmI6M=RiF?`jVl$yxZQFUJz`*f9W7FuyXNa`#rfA5SYie;*&LMj7dVB@70 zR#K$P`ZY|$SqhB5VRY=IDC#K)J_UiN!2d7M^Bq{(2P@A2Qv>afLHq9ckH#L1Rimk| zz~{Ik*<1H_*8Ibd{lnGC#G!w= z60g(V8r{85ckc%F*8VwNf8)xr6S#U_0?IW@J@bWXzW#k*|AB9y68oBNyFc~8mA_nh NLWiIG0DZ>v;eQl1idFys literal 0 HcmV?d00001 diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..ff2c904 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,61 @@ +services: + glados-tts-cpu: + build: + context: . + dockerfile: Dockerfile + image: glados-tts-wyoming:cpu + container_name: glados-tts-cpu + ports: + - "10201:10200" + volumes: + - glados_model_cache:/data + environment: + - MODEL_DIR=/data + - URI=tcp://0.0.0.0:10200 + - DEVICE=cpu + restart: unless-stopped + + glados-tts-cuda: + build: + context: . + dockerfile: Dockerfile.cuda + image: glados-tts-wyoming:cuda + container_name: glados-tts-cuda + ports: + - "10200:10200" + volumes: + - glados_model_cache:/data + environment: + - MODEL_DIR=/data + - URI=tcp://0.0.0.0:10200 + - DEVICE=cuda + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + + glados-tts-rocm: + build: + context: . + dockerfile: Dockerfile.rocm + image: glados-tts-wyoming:rocm + container_name: glados-tts-rocm + ports: + - "10202:10200" + volumes: + - glados_model_cache:/data + environment: + - MODEL_DIR=/data + - URI=tcp://0.0.0.0:10200 + - DEVICE=rocm + devices: + - /dev/kfd + - /dev/dri + restart: unless-stopped + +volumes: + glados_model_cache: diff --git a/download_model.py b/download_model.py new file mode 100644 index 0000000..6c9e0bc --- /dev/null +++ b/download_model.py @@ -0,0 +1,51 @@ +import argparse +import logging +import shutil +from pathlib import Path + +from huggingface_hub import hf_hub_download, list_repo_files, snapshot_download + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +_LOGGER = logging.getLogger(__name__) + +REPO_ID = "WarriorMama777/GLaDOS_TTS" +MODEL_SUBDIR = "Models/Style-Bert_VITS2/Portal_GLaDOS_v1" + + +def download_model(output_dir: Path) -> Path: + output_dir = output_dir.resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + files = list_repo_files(REPO_ID) + model_files = [f for f in files if f.startswith(MODEL_SUBDIR)] + + if not model_files: + raise ValueError(f"No files found in {REPO_ID}/{MODEL_SUBDIR}") + + for file_path in model_files: + _LOGGER.info("Downloading %s...", file_path) + downloaded = hf_hub_download( + repo_id=REPO_ID, + filename=file_path, + local_dir_use_symlinks=False, + ) + src = Path(downloaded) + dst = output_dir / src.name + if src != dst: + _LOGGER.info("Copying %s -> %s", src.name, dst) + shutil.copy2(src, dst) + + _LOGGER.info("Model downloaded to %s", output_dir) + for f in sorted(output_dir.iterdir()): + if f.is_file(): + _LOGGER.info(" %s (%d bytes)", f.name, f.stat().st_size) + + return output_dir + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Download GLaDOS TTS model") + parser.add_argument("--output-dir", type=Path, default="/data", + help="Output directory for model files") + args = parser.parse_args() + download_model(args.output_dir) diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..e32a175 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +MODEL_DIR="${MODEL_DIR:-/data}" + +echo "Checking model directory: $MODEL_DIR" +if [ -z "$(ls -A "$MODEL_DIR" 2>/dev/null)" ]; then + echo "Model directory is empty. Downloading GLaDOS model..." + python /app/download_model.py --output-dir "$MODEL_DIR" +else + echo "Model files found in $MODEL_DIR" + ls -la "$MODEL_DIR" +fi + +echo "Starting Wyoming GLaDOS TTS server..." +exec python -m wyoming_glados \ + --model-dir "$MODEL_DIR" \ + --uri "${URI:-tcp://0.0.0.0:10200}" \ + --device "${DEVICE:-cpu}" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b1fe0d4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +wyoming>=1.5 +style-bert-vits2>=2.4 +huggingface_hub>=0.19 +numpy>=1.21 diff --git a/wyoming_glados/__init__.py b/wyoming_glados/__init__.py new file mode 100644 index 0000000..5becc17 --- /dev/null +++ b/wyoming_glados/__init__.py @@ -0,0 +1 @@ +__version__ = "1.0.0" diff --git a/wyoming_glados/__main__.py b/wyoming_glados/__main__.py new file mode 100644 index 0000000..23fbebd --- /dev/null +++ b/wyoming_glados/__main__.py @@ -0,0 +1,106 @@ +import argparse +import asyncio +import logging +import signal +from functools import partial +from pathlib import Path + +from wyoming.info import Attribution, Info, TtsProgram, TtsVoice +from wyoming.server import AsyncServer + +from . import __version__ +from .handler import GLaDOSEventHandler + +_LOGGER = logging.getLogger(__name__) + + +async def main() -> None: + parser = argparse.ArgumentParser( + description="Wyoming TTS server for GLaDOS (Style-Bert-VITS2)" + ) + parser.add_argument("--uri", default="tcp://0.0.0.0:10200", + help="URI for the Wyoming server") + parser.add_argument("--model-dir", type=Path, required=True, + help="Directory containing model files (config.json, *.safetensors, style_vectors.npy)") + parser.add_argument("--device", default="cpu", + help="Device for PyTorch (cpu, cuda)") + parser.add_argument("--debug", action="store_true", + help="Log DEBUG messages") + parser.add_argument("--version", action="version", + version=__version__) + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + + model_dir: Path = args.model_dir.resolve() + if not model_dir.is_dir(): + raise NotADirectoryError(f"Model directory not found: {model_dir}") + + wyoming_info = Info( + tts=[ + TtsProgram( + name="glados", + description="GLaDOS TTS - Style-Bert-VITS2 voice from Portal", + attribution=Attribution( + name="WarriorMama777", + url="https://huggingface.co/WarriorMama777/GLaDOS_TTS", + ), + installed=True, + voices=[ + TtsVoice( + name="glados", + description="GLaDOS (Portal) voice", + attribution=Attribution( + name="WarriorMama777", + url="https://huggingface.co/WarriorMama777/GLaDOS_TTS", + ), + installed=True, + languages=["ja", "en", "zh"], + version=__version__, + ) + ], + version=__version__, + supports_synthesize_streaming=False, + ) + ], + ) + + server = AsyncServer.from_uri(args.uri) + + _LOGGER.info("Starting GLaDOS Wyoming TTS server on %s", args.uri) + _LOGGER.info("Model directory: %s", model_dir) + _LOGGER.info("Device: %s", args.device) + + server_task = asyncio.create_task( + server.run( + partial( + GLaDOSEventHandler, + wyoming_info, + model_dir, + args.device, + ) + ) + ) + + loop = asyncio.get_running_loop() + for sig in (signal.SIGINT, signal.SIGTERM): + loop.add_signal_handler(sig, server_task.cancel) + + try: + await server_task + except asyncio.CancelledError: + _LOGGER.info("Server stopped") + + +def run(): + asyncio.run(main()) + + +if __name__ == "__main__": + try: + run() + except KeyboardInterrupt: + pass diff --git a/wyoming_glados/__pycache__/__init__.cpython-314.pyc b/wyoming_glados/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d226f72df998e74471515073d4a21605b4893c19 GIT binary patch literal 192 zcmdPqG2R5{rLFIyv&mLc)fzkTO2mI`6;D2sdhzdKr=y3DF!*?12ZEd;|)I1 KChj5@AQu4DwlT{9 literal 0 HcmV?d00001 diff --git a/wyoming_glados/__pycache__/__main__.cpython-314.pyc b/wyoming_glados/__pycache__/__main__.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0ebe8065b26823dc627b1f05d6ef6f6d4978ec0 GIT binary patch literal 4557 zcma(UTWlN0agTRAK8~a)>Osj8t&=5MVl0x1?No`KICd<_q9x1hSy2L%4sjw+6dj7k z>>ZVg2?90^(4=Ua1T7E;DB$#~4iKQf?pI-;KgpLR86mro0RweW_@_`Rr45RHboP!% zDnbosCGO77?9R^4&g+i(ylw>T{x8R_{mX;UU+KhZTm#ts2LM&{3X+%v(#?_48DnnK zZN|vzER41UH)GfB19T0W;Ab4V!+^PjbH=5+X56}aM$m;BkM1$>_JnuFr~3eABtFqL z`1iF1oXfFx@t=vwZv(SgXn7KNyeP(U6M11Bv%j>{Ccg+dAR@} zmz4FD2uFl-x{mWpWj(JJ3704?tHiyaYe}r;u)Io~057Wfi~?JPbJ}__b49^x3Z~O^ z8UPLUrPDACT$qz5FJGBjQ;PbGT+9{}oT2w>+zr_PX5qKn4_y^4qE~EBzXSIITtE6D zv%*M>WQ)UnBJ4xDmhf27%eXlIDFW{NvWiz_U8CHNkn`~sbybP$az>3$Dr-tXEv>?_ z@mxX9s#?_OOHp0d;;*l(tNCIsZBpXt^s1Z(d1Iw@;z_6FVo}v)T}h{L02sH!PwR%R zivEq}8D`{bqyJ#i9Cv|&GPJWA-SAWR?Uq3EM?mvcl(J|jNpEqW4I&BNW}=~!5@;a_ zI}bL=Cb1GH*(a#Jpbb1~9iQUO6$nWV$tk%eICK8U9d1dGJl}(}SMo`1lK-(1v`c}M zvnfNzch-Z&9&DE7mY5WHsM(S}3u=vnXpu?!TXPE<=!jaj&4+=5!8GzeP%fBT!S@|gWc7HucL@7f_a zYrJ)~1?}5^+`=WTC$<6*0zEn<5w`La9($TDlkP97# zJhW)PfTZDT5TSm|RM|PZ8={mnJcJx*ju+9s+ss&8S|entLuRIW%=?Bd{TMk7xLY~x zDHihDNM-ybGrWrn3s*$VNK7Im3eikO;^-B9y`V&2P_Q0dOe|bE9-(O~t7sXVFVTco zu}7n2oUe4~nbN6vd^~3SoqBTo`1p9`(2G*S;HIxAqIHmYNW?}QS!G!+7j(j{D1}l* zh(=e{tWt<(^LU|>oXle-qpNsb%&0{jQV!%G(O4iZ=L?D^jsku;pNn17)Z&=Nl`DrK z8N$SnI=Q}};>-%1R4R{&nQ~T+R2+2qQaMN58n9NNQRf%X~c(g!Ma#3!~nm~hlgtE6A zYcLJ*mA*?x z%B2!qL`y@>1qP_)D@t0^u_DuQ2F#d+>ZSq)P76A)t%h`#neOukwJ3(QO2+{mpQ1>& zX)Y9=ulUTu4vbGNm6R-4ob8%SY)X;XDl3sNamqLc<)fw$-#MHs(|RjOr^kyJ?Mf!}_55oS(i-T!x#Z!5WAIE1FtZ zQwX2e3`AgF)z3A3YYJl(W17z}O?n>c;7Us+I_^<#R=_GX6^9 zLSlY_IAC0uk}i<|Rfb7Sub5>p;WKhEqku`CH%71tL-dG~@P>*o;i!`EQ}is4RkE65 zngNFh=4eXC86vQ7ibk(1HX5eoDB);@i!TFcl(UPFA0MJB+V*zUyu+K`;dY-6`Vo=fAPDc7Df=f_)zqKP=X|$2Pjhwz^~0L|y2t34I$vUrh*Y z3ZZSEcxUL$t@T^4-Wj~O<(s|ZoPE#{eBkM>cZKTxm$v&y>b;SA&&54E8w%_=QQyFW z&i;BRTpvEYJshtOK3NwfpoI?sZD@oXISE4Vb-25_cUVtPVDAv>9j?bt>_vlK-wxaB zb?>o#oqTm}?-cU&*1Ut8-oa0Xe?R)W(Jk+>>NMQyXLCQ9yVHL9*214&{^QGcx#?{S zb@Zm|E_b{!{h8J@qo5IkR5Ux&c`@28#e(3$}+Ksn9eru~O_AXm@ zdTP#r4d=ioY%M&o5uW(c`SjN;^2c|0);>}P*`2+2oxR^~3xS$2xFHNua~Ronw!QCp z&vU0MTyq}XVFB}B?L64n+moJ)KVbgcHZ=mn=M(mecJ%qP%te;_{5cNbFBlu(zhD^( z+n>02g8d@SQp|CN!Y3%40XEG>qX+f7U0S?6{%b1L{CcuVwpPU;A^4VL<|8bo1rt6? zJL(n~>T!fSXh%K7B%B3k34R*QNZ>8-HnVTf{)YQC|0NeR789rS@fhjFw5&DRj#G#V zOI*w4B4li}pc#>aer)S2h5V8c5u0uuqi$`6s#fHpUQB!q$B&t=ZU(VND0MVKi5ZSu z5RM>e=OFfiP&7xu2kdDoh;hHOlw13(vc9CsIGZTy3dZG=rtE4;}p*>b-|X?z`J-?!HZT-}RX-cev{K zhNs`l|6H;$Ob_@A=Xtksm+jrLv3BoYoWif{U)vFP=v#<5o)EPsFL8RP6?y-o(J!~# UhaKl4=yt?$?j(Er1Oxs50AEPwumAu6 literal 0 HcmV?d00001 diff --git a/wyoming_glados/__pycache__/handler.cpython-314.pyc b/wyoming_glados/__pycache__/handler.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a2fd51863ce2da1ac8099c43ee82ddcf9970e822 GIT binary patch literal 11539 zcmbt4ZE#c9mG?>S>D!hpTeAEu+1Ma|0|pF+BnAxr5RNKX(WWM8W2Z2q#M*rdI~c`2FVCz?2u{DESU!_ zl4Y<&Dxr1UQ0bsmvQk(#WE(7#$|$TKvJaL^<%1Pc1|Rb~C++1U61)(?hb6Hy-VO$X1$q+`4onc*R4KFSMH z=wy6?kBX2r`ogiP>F^Yf8N;DN2M=J)jQ7NJM=YA?n&1&V+;M7CM>H%<@g2v*iExY; z`Ht~pVWH#n>60_nCnMuRT#QdjdF;&!_KfG)WVCDiScJ9^;!0o8sDnQvWl?IsI5xex zyQjMYlkNL3?pHGV_8pnALT%4LHx_K)2Ll4KrNvwM*st^ny&oc z0GK8DRr;}XO$ZW|rPpU`Cd!aPW2}nkS4ERr0BI8$GpO0m*eexeb8k(BM#@e}DgIe`uutQ2z)3vQFf~!uT<|IkINhFSDUN!y~fhnL$}IFzRK5 z3h03DRg2&PP_~68c!?jELeVUb3r$c=3H}rSvt(IcmDamcdiQ)qTHms$Z&|jwE`8_S37WJ-z3UkG!ofmf|*-8O-?Gc7VGSY298TaQfWI!8WyR^WR zoKOx;WrI3nSvSF-ij4Du1Byqz8oGGF2^m;#B_s-O()ChyqERbWv_OPBWr3UJK8;_C ztkCqQITx8nkRV%(eLVFw z#i^gCCthjvTGP(d`{LmVkah0e1A#+sdabxYYPy?6_l9QCJ38Zeyka~oN^U9co{YxB z(&kOx61gmdSfK+??;Y@kM*9y892BfDPuUs~m8&WQs1y8ztPlB~-oJk!AhQv;@nkj@ zKFP}_tW@yia%dsNPw+9g?F6+|DMiQ?%hph~CVLOc+9_U=IXWMi)EkrL?SbJXFYy~uW7H zmy8#UFPSslJujKlhNhIEX?`$e=*Z2W_stDI>-b5>Zw-U@jl|%3T*~30pu%AtUDd~5 zyIsI4AhxJfGb(hPhzJ6<|2PS@9qL7Fgc7WR(cQ=BHIP@3d zLKXoN^^hdU7~;|Zm?-Mjgg{PcslqG41cBk(&3VKG9Fl|ZKQ#^6$7slYG!3(3eU6aer=nvPKLYrljh4Vg3N5EK4WNi1 zJr&4E>d6?&BN_s1kXT57+oV-#c#QGtW=dK+#qcEP8!<61h%@H#cx*B<)p=Zu$7aez z>1>n_odR71>N;bIv!gQ&qj7g(3-@Fs%8MX7r(+ZDNX$Lsbb}a;NTOSuJ_<*MwvB+A zZjki?FUF&%K<$r4L`i0+qVc1$jz0q#5wxVZAfcp;NW1{;WF1IEOw0Dc0HbkfAC92d z5}W{+F_Fy$6PI<$#AS}o8M>pRi+D^x;He_7EHoL3Wo0gaFmquaV9;ZW`uQx$*s9Vt zPs-+5vNg>PE!&-!&t84*%5(GarOK^K_HDBxnG)xg@N7B`H$I>=C@LziS zU2zS{TQzWEDFHQSb)y>sSFS>?5kJ7wzu z)#glAwcoL|&vEaVofrE6bJurwz53)UPiD&NuYLJDPt0$<;k)j;G5Vv?g)PZ)-=f9$ zOIAtmTSO~Ko$3s%dl-lZC`;VONk|wcClP}jYUrx+na~Aw{AMaGbpD)NP-cg*Dm4?jG296n zu!o>d?couXv>0u;{476VJ-R$*kb}EcKeK6%z=tK&$y9URy@B4myPCyzw{pBGsF)T2 zdcCZwB8RR_@F;8yfIkagqZyeLjiN26!GMk-ST)S!emjM_0ml7i%e@nt=z zZD9%Q^-Wl&7nPh>UvM^tu*oo1o0N6XHF0#>GDjyUB1@w-my>R0MWLfe!a=B|2j-sv z041)q@6Dm5)_o~<{p|3yXYR4)7k$tBl4S##vYM+6R~qKq(v6!^jho)sx2k2hovS)h z;>eWOWhxpnl^&2zt46LwH#@LeN;r$svh6n8o?(qI4nIHqHQzl;`Q?FRO~-FDb2{6m+lI}#nY7<#J2Gb5tWP<-FzZ}U zFYO0Mp(sqk%q%%VBql(><#=4uI!T-fGMnKzfkVp>Bmij;0tXlzGRSE$7htKoi=!nR zEz!{uT>zYuK}Ld>eT-xTW;Dq5{l4(trwBwsAE;3@=7tdQrb< za9vxMGx)H@2PK$)!tfKO&N z#Z9Q(wnw0aa?xr86TmWiG#-y;SxNXjlsv{j!WW?Q4Ay|YtT2g@1CJg=uX8JyTwh0X zk6*Gl$*&?a1Yzopu-S#|;73kf6M!|CVjdXva>yAQuyRmxNipLBN^>5+eD`V(`$>Qt zhk2c0X0<-f1am?Uz!=hkhQDN1it4_mzoK59AfwI)4X|RDFZhVE+r4(iO+-(8N8qR1 zqKzl6nszMN`_h)aq^0k7<(2oWRcWhd(dwCR{z2>aThrE# zMQaC^I~vlC)h+#yL~QQI&SIryT7I&n!85)7IXkp%+x#lAZTUiEaBI#8#NW z3;o<~FY}h3+1<>(WoSdVryb&(Zf5s-_NE7GZ?0z`enh_i1xGFlqpC*^H$dPa`s5P_ z=Z1(45P`&aNcJ-vXtEez-?2i|x8TrW{_#_Xyi8Bs*FgbQP92;oC|nzg`NLJg3w)_j zPg|C+(U+bRx_SOg!1G=LOyUIGq{UQTeiuEfMeB;4RCUw|54NizW60Yy6z{)SAL`iw zYdk7n z#?Z}8!=w*a^a(TKKpwc%7WouP3q00pd4VEcr;*x(QvrC=tx;iQ2E_i zMB;B$p?vBg`StqO<;ztlsF*rjK({SRQIKzsQ0z2}BuoV~_@}OKKfr6KigzCk{tYUW zPd()GdX)$KZTWH)8q2qCSIhFL+A?7X8vI@PaurhdI$`|0E@66BlQ2KaBrHJpjVeYy z9Yf8EhQt3Dv;_3qP>e$GZ&A|1SnXFGe=TL;t}-M_4*#@0cM8GfReJcpAIV$CwyAwS zOUBTXK||1}_8PRRR0XHm9_YaqGzM+(XDuWJLlR{lu_AB^X8SA)nu2Cnl~FZYKijHC z1_(jt)JKd5@@WPtJArSX0m z_qs_Np&SUefTB0(iHWFkY6|$ZmPEkWRtJ6IAGG(YefgH*rUr%4w;RUJeQW?@e;HdP z>;=-GU!7S#4VctjYaoY;^B3%(62^dvN;qU!TOG3d_o=1%)IXSqRVZNImObI0n!1lr z16Lo=B)q;;A=vY7 zF~NsV@B#(V^$5U@(f9Bw8=YxXO?O4>R^Mzux4}o7cno5A{ipz3`UC>1jM7;sG9m1R zQutnh72y3^=1xZ@z*%5`F9BjP_?jR>U}4}%;KKMdgCJoK(-@tCXvU__8+@E$a2QU& zO^$=e>^L|8yv<4mypBR7LEmX*?o=FJKEbS4#+S7but!Y6aCDld?#nff0ofc1K0UN& z0A7gpjL16WF)449P&;107a3U}2JLJ-5|@oqJR}_hZ^?wrf$0eLk0zFoIRQPSvJPIU zP$z^MY2#K|FU60ZmB7yl4rU_gF*<^saRNLp^RhO|#{^Ith=4k@Y#cZ<&eIP(WFxPF z0{X2K58Y8j0%Hr`ltLkIxuQBou|7w6LBuSQT-Jz!a2f$Kr7hTKI3=SHJ{?jf^(e9B zGBJEI0Z!Qv*mR+B`dx{QBGLk-Qnuzx6ci>Ra|)#81%W=lWZfdvHr<fU!C@BOnEk@J=;^B?Mrn# zE}K>j#As6<)J8Ml1K#|d$h_+Zb>FXh!vtO#;~r*3x7p}=r?T@w1wr)GXBvT;6xjwtm5I zqwRXzZQB#~8olYp&5Mnjmn*8VlT1}zrmi{D;LTJwteSOA_J>4QXTM^+ta(^QTuoQc zt-47~-K!h^cEkLZ?{%fCyHnNOKepd+U3V>2@0c6OxSGv4s{NIWhy8M&43cUJ4f87Bs^OmP! zSWj*eFTgij?IRqyxsw^LGThvy1^8dcZpUyV_qKx>t~I>v)FE7JhKje>Gs6w++Z+0K zK;{>X%=bc{Ca>2dyab7C3gH|1G`wDg@Z+T~Pluz5 z%)&?X$%r6IQTVzm7Kg8y(3c~e#E1$92l#uyI~y4fp^G+hbQ**wbW&E}bsOiSQ7BR! zaY`2M1(c3?pUP8DDI({uuoR=?@Douv{lkkOQ66f*pY{M7^+jm z^L`s)taQQNO~T({m$MMb`e=M=3X}#}BfzbSe(-4ID0Pd(j@#&L^vjU23 zxg_iU>s0P^dco1VkKS^*TDUIa0=?eyEfilN+SIb@pY5dnSu+$-m#kc>_Ac_mDd+>= zTV%6Z2w&1Ej+Y9>b5FmoI0Y}Fn>yjsMCAjK>c^^pxWINXXlujX9!G#~mCrtumjolc zA^3nWFg!9mRVrlG7au<%JcoJQHUVusp%Ejnt%-n!ldK<)pG<%vBj8pDZj72RLKQ=K zxHyOq<=QHM;ByDu3;My9!qKu}r*gKQ5Z-_qy05X-FHdXyQZ?Vsb(C@g*@}fYU00uZ`I)5q$tA~ANn+3F zjW5o;Fq5>eThcervLEQ+`@|1y_;J|>_3%-~2c7U*xZ3k2hH)jUTK|_IKw0qr0Xs9$ A!~g&Q literal 0 HcmV?d00001 diff --git a/wyoming_glados/handler.py b/wyoming_glados/handler.py new file mode 100644 index 0000000..67e9301 --- /dev/null +++ b/wyoming_glados/handler.py @@ -0,0 +1,202 @@ +import asyncio +import logging +import re +from pathlib import Path +from typing import Optional + +import numpy as np +from wyoming.audio import AudioChunk, AudioStart, AudioStop +from wyoming.error import Error +from wyoming.event import Event +from wyoming.info import Describe, Info +from wyoming.server import AsyncEventHandler +from wyoming.tts import Synthesize + +from style_bert_vits2.nlp import bert_models +from style_bert_vits2.constants import Languages +from style_bert_vits2.tts_model import TTSModel + +_LOGGER = logging.getLogger(__name__) + +_VOICE_LOCK = asyncio.Lock() +_MODEL: Optional[TTSModel] = None + +_BERT_MODEL_NAMES = { + Languages.JP: "ku-nlp/deberta-v2-large-japanese-char-wwm", + Languages.EN: "microsoft/deberta-v3-large", + Languages.ZH: "hfl/chinese-roberta-wwm-ext-large", +} + +_HIRAGANA_KATAKANA = re.compile(r"[\u3040-\u309F\u30A0-\u30FF]") +_CJK = re.compile(r"[\u4E00-\u9FFF]") + + +def _detect_language(text: str) -> Languages: + if _HIRAGANA_KATAKANA.search(text): + return Languages.JP + if _CJK.search(text): + return Languages.ZH + return Languages.EN + + +def _load_bert_for_language(language: Languages, device: str) -> None: + model_name = _BERT_MODEL_NAMES[language] + if not bert_models.is_model_loaded(language): + _LOGGER.info("Loading BERT model for %s (%s)", language.name, model_name) + bert_models.load_model(language, model_name) + if not bert_models.is_tokenizer_loaded(language): + bert_models.load_tokenizer(language, model_name) + bert = bert_models.__loaded_models.get(language) + if bert is not None: + bert = bert.float() + bert.eval() + bert_models.__loaded_models[language] = bert + _LOGGER.info("BERT model for %s cast to float32", language.name) + + +def _find_model_files(model_dir: Path): + model_dir = model_dir.resolve() + safetensors = list(model_dir.glob("*.safetensors")) + config = model_dir / "config.json" + style = model_dir / "style_vectors.npy" + + if safetensors and config.exists(): + return safetensors[0], config, style if style.exists() else None + + for subdir in sorted(model_dir.iterdir()): + if not subdir.is_dir(): + continue + safetensors = list(subdir.glob("*.safetensors")) + config = subdir / "config.json" + style = subdir / "style_vectors.npy" + if safetensors and config.exists(): + return safetensors[0], config, style if style.exists() else None + + raise FileNotFoundError( + f"No .safetensors files found in {model_dir} or its subdirectories" + ) + + +def _load_model(model_dir: Path, device: str) -> TTSModel: + model_path, config_path, style_path = _find_model_files(model_dir) + + _LOGGER.info("Creating TTSModel (model=%s, config=%s, device=%s)", + model_path.name, config_path.name, device) + + model = TTSModel( + model_path=model_path, + config_path=config_path, + style_vec_path=style_path, + device=device, + ) + + _LOGGER.info("Loading model weights...") + model.load() + + net_g = getattr(model, "_TTSModel__net_g", None) + if net_g is not None: + net_g = net_g.float() + setattr(model, "_TTSModel__net_g", net_g) + _LOGGER.info("TTS network cast to float32") + + _LOGGER.info("Model loaded successfully") + return model + + +class GLaDOSEventHandler(AsyncEventHandler): + def __init__( + self, + wyoming_info: Info, + model_dir: Path, + device: str, + *args, + **kwargs, + ) -> None: + super().__init__(*args, **kwargs) + self.wyoming_info_event = wyoming_info.event() + self.model_dir = model_dir + self.device = device + + async def handle_event(self, event: Event) -> bool: + if Describe.is_type(event.type): + await self.write_event(self.wyoming_info_event) + return True + + if not Synthesize.is_type(event.type): + return True + + synthesize = Synthesize.from_event(event) + return await self._handle_synthesize(synthesize) + + async def _handle_synthesize(self, synthesize: Synthesize) -> bool: + global _MODEL + + text = synthesize.text.strip() + if not text: + return True + + language = _detect_language(text) + + speaker_id = 0 + style = "Neutral" + if synthesize.voice is not None and synthesize.voice.speaker: + try: + speaker_id = int(synthesize.voice.speaker) + except ValueError: + pass + + _LOGGER.info("Synthesizing: text='%s' language=%s speaker=%s style=%s", + text[:80], language.name, speaker_id, style) + + try: + async with _VOICE_LOCK: + if _MODEL is None: + _LOGGER.info("Loading GLaDOS model from %s on %s", + self.model_dir, self.device) + _MODEL = _load_model(self.model_dir, self.device) + + _load_bert_for_language(language, self.device) + + sr, audio = await asyncio.to_thread( + _MODEL.infer, + text=text, + language=language, + speaker_id=speaker_id, + style=style, + ) + + audio_int16 = np.round(audio).astype(np.int16) + raw_bytes = audio_int16.tobytes() + + rate = sr + width = 2 + channels = 1 + + await self.write_event( + AudioStart(rate=rate, width=width, channels=channels).event() + ) + + samples_per_chunk = 1024 + bytes_per_sample = width * channels + bytes_per_chunk = bytes_per_sample * samples_per_chunk + + for i in range(0, len(raw_bytes), bytes_per_chunk): + chunk = raw_bytes[i:i + bytes_per_chunk] + await self.write_event( + AudioChunk( + audio=chunk, + rate=rate, + width=width, + channels=channels, + ).event() + ) + + await self.write_event(AudioStop().event()) + return True + + except Exception as err: + _LOGGER.exception("Synthesis failed") + await self.write_event( + Error(text=str(err), code=err.__class__.__name__).event() + ) + return True