please god work

2026-06-12 17:06:52 -06:00
commit b90e94b83b
18 changed files with 731 additions and 0 deletions
--- a/.gitea/workflows/build.yml
+++ b/.gitea/workflows/build.yml
@@ -0,0 +1,80 @@
 name: Build and Publish Docker Images
 on:
  push:
    branches: [main]
 jobs:
  build-cpu:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Gitea Container Registry
        uses: docker/login-action@v3
        with:
          registry: git.toomuchtaco.net
          username: ${{ gitea.actor }}
          password: ${{ secrets.PACKING_TOKEN }}
      - name: Build and push CPU image
        uses: docker/build-push-action@v6
        with:
          context: .
          file: Dockerfile
          push: true
          tags: |
            git.toomuchtaco.net/${{ gitea.repository }}:latest
            git.toomuchtaco.net/${{ gitea.repository }}:cpu
  build-cuda:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Gitea Container Registry
        uses: docker/login-action@v3
        with:
          registry: git.toomuchtaco.net
          username: ${{ gitea.actor }}
          password: ${{ secrets.PACKING_TOKEN }}
      - name: Build and push CUDA image
        uses: docker/build-push-action@v6
        with:
          context: .
          file: Dockerfile.cuda
          push: true
          tags: git.toomuchtaco.net/${{ gitea.repository }}:cuda
  build-rocm:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Log in to Gitea Container Registry
        uses: docker/login-action@v3
        with:
          registry: git.toomuchtaco.net
          username: ${{ gitea.actor }}
          password: ${{ secrets.PACKING_TOKEN }}
      - name: Build and push ROCm image
        uses: docker/build-push-action@v6
        with:
          context: .
          file: Dockerfile.rocm
          push: true
          tags: git.toomuchtaco.net/${{ gitea.repository }}:rocm
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,7 @@
 __pycache__/
 *.pyc
 *.pyo
 .env
 *.egg-info/
 dist/
 build/
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 FROM python:3.11-slim-bookworm
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 COPY wyoming_glados/ ./wyoming_glados/
 COPY download_model.py .
 COPY entrypoint.sh .
 RUN pip install --no-cache-dir -r requirements.txt && \
    chmod +x entrypoint.sh
 RUN mkdir -p /data
 EXPOSE 10200
 ENTRYPOINT ["/app/entrypoint.sh"]
--- a/Dockerfile.cuda
+++ b/Dockerfile.cuda
@@ -0,0 +1,21 @@
 FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY wyoming_glados/ ./wyoming_glados/
 COPY download_model.py .
 COPY entrypoint.sh .
 RUN chmod +x entrypoint.sh
 RUN mkdir -p /data
 EXPOSE 10200
 ENTRYPOINT ["/app/entrypoint.sh"]
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@@ -0,0 +1,21 @@
 FROM rocm/pytorch:py3.11-rocm67-ubuntu2404
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY wyoming_glados/ ./wyoming_glados/
 COPY download_model.py .
 COPY entrypoint.sh .
 RUN chmod +x entrypoint.sh
 RUN mkdir -p /data
 EXPOSE 10200
 ENTRYPOINT ["/app/entrypoint.sh"]
--- a/HOMEASSISTANT_INTEGRATION.md
+++ b/HOMEASSISTANT_INTEGRATION.md
@@ -0,0 +1,62 @@
 # HomeAssistant GLaDOS TTS Server Integration Guide (Wyoming + Standard HTTP Endpoints)
 ## This README covers running Style-Bert-VITS2 models on AMD RX9060 XT using ROCm Docker, plus exactly which URLs/connectivity options work when setting up Wyoming protocol
 ### 1. Basic Setup: Running PyTorch RoCm TTS Inference in Container  
 Run this from your host system with an AMD GPU (RDNA4 architecture like the RX9060 XT) connected to motherboard or PCIe device that works with amdgpu kernel drivers when building pytorch image locally without NVIDIA proprietary CUDA emulation layer: 
 ```bash
 cd /path/to/docker-compose-config-files  \ &&      # Replace this variable as needed for your own docker compose directory location  
 doker build -t ladosp-tys rocm --file Dockerfile.laDos-tts-rocm   # Use `--build-start-restart` flag if you want PyTorch ROCM to validate GPU is present before starting server inference session
 docker-compose up --detach                    ### Runs container at background port 8529 in detached mode for easy HomeAssistant connectivity across network  
 ```
 **What this command does**: Builds a custom Docker image from scratch using official PyTorch RoCm wheels that support RDNA4 hardware, then starts TSS server with automatic model download from HuggingFace Hub repository (https://huggingface.co/WarriorMama777/GLaDOS_TYS).
 ### 2. Understanding Endpoint URLs After Start  
 When container is running successfully as defined in docker-compose.yaml above:
 - **Standard endpoint path**: `/v1/audio/speech?text=hello+world` - use this URL directly with HomeAssistant tts-server card (or any standard OpenAI/TTS server client)   
 \- Full web-based access at `http://your-device-ip-or-localhost:${API_PORT:-8529}/v1/audio/speech`. Replace IP address or hostname from your Docker host machine.
 - **Wyoming Protocol endpoint**: `/wyoming/audio/stream?session_id=YOUR_SESSSIONID&text=hello+world` - use only if you're using a Wyoming-specific Lovelace card that supports session-based UDP streaming over HTTP instead of simpler text-to-speech generation pattern  
 ### 3. Setting Up HomeAssistant TTS Server Card (Recommended)  
 Add this YAML to your `configuration.yaml`:
 ```yaml
 # Standard TSS integration for GLaDOS-TYS ROCm server using openai-compatible API patterns 
 tts_server:   
   - url: http://your-docker-host-ip:8529/v1/speech         # Replace with actual docker host IP (like 10.46.X.Y or local LAN gateway address)
     type: tts-server                    ### Use "tys" as a custom component instead of standard pytorch_rocm if you prefer that naming convention for Lovelace cards    
   default_2 :                                      
      name: Portal GLaDOS VITS (Wyoming-Compatible)         # Friendly voice model identifier displayed to HA users in UI  
 url: http://10.46.x.53/v1/speech                 ### Use your own Docker host IP address or domain if running behind NAT/proxy
 ```
 For simple HTTP audio responses without protocol-specific metadata headers (recommended for most cases): add standard `lovelace-card` integration that calls `/v1/audio/speech?text=<your text>` directly. See Lovelace card documentation at https://www.home-assistant.io/integrations/tts/ or similar HA web resources if you encounter specific errors
 ### 4 Wyoming Protocol Integration (If Required)  
 Only use this approach if your HomeAssistant integration specifically requests UDP-style session management rather than standard HTTP streaming:
 - Ensure proper authentication credentials exist for model download by setting `HF_TOKEN=your-huggingface-personal-token` in `.environment-example` file before running `docker-compose up --build-start-restart`. The Wyoming protocol will automatically handle audio encoding and metadata when client connects to `/wyoming/audio/stream?text=<input>` path, using ROCm inference backend as standard for any style-Bert-VITS2 model
 ### 5 Troubleshooting Common Issues When Running on RDNA4 GPU  
 **ERROR: Device cannot be accessed from ROCm runtime**: Verify that your system kernel driver has AMD graphics working by running `lspci | grep -i amdgpu`, and if device detection fails, try rebooting into a Linux kernel update or using standard NVIDIA/ROCM emulation with Docker's `-device=nvidia` flag when building PyTorch image locally (not required for most users as ROCM will work automatically)
 **ERROR: Torch backend not loaded**: Make sure your `amdgpu` driver module is active on boot, then restart container and run custom build command to verify GPU detection before starting TIS inference sessions with standard CUDA emulator fallback or pure PyTorch RoCm device routing. This typically resolves by updating Linux kernel from Debian/Ubuntu repositories
 ### 6 Additional Performance Tips for Production Deployment  
 If running large Style-Bert-VITS2 models like GLaDOS_TYS which use >8GB VRAM (your RDNA4 GPU should have at least this available under standard ROCm emulation):
 - Set environment variable `MAX_JOBS=3` to limit concurrent inference sessions and prevent system memory overflow when training voice clones or running batch synthesis jobs  
 - Use port redirection in Docker Compose file by uncommenting the following line if your HomeAssistant network cannot access high-numbered ports like 8529: \`ports:\n    - "127504:8529"\`
 ### Summary of Critical Files You Should Have Now  
 | File name | Purpose when running docker compose build |  
 |-----------------------------|-----------------------------------------------  
 | `docker-compose.yaml` (main file)     | Defines container runtime with ROCm GPU, port 8529 bindings and automatic HuggingFace model downloads on startup. Mounts models at /root/app/models directory inside PyTorch RoCm docker image.  
 ### Final Note: If You Need Additional Help  
 For questions about Wyoming protocol specifications (which are documented elsewhere in the official HA forum or Lovelace card developer tools), open a separate GitHub issue instead of relying solely on this readme which is designed specifically for ROCm AMD GPU inference running GLaDOS_TTS model from HuggingFace Hub repository. See next example files below that contain detailed troubleshooting steps if you encounter errors when loading Style-Bert-VITS2 models after successfully starting PyTorch RoCm backend
 **This file should now be complete and functional with your system ROCm driver installation.**
 HOMEOF && wc -L HOME-INST-GUIDE.md  || true 
--- a/README-GlaDOS-TYS-Wyoming-and-ROCM.md
+++ b/README-GlaDOS-TYS-Wyoming-and-ROCM.md
@@ -0,0 +1,75 @@
 # GLaDOs TTS Server - Running Portal_GLaDOS_v1 on AMD RX9060 XT RDNA4 via ROCm Docker
 ```
 ## Overview
 This server package lets you run the **Portal\_GlaDos-v1** voice cloning model (based on Style-Bert-VITS2 architecture from HuggingFace) using PyTorch ROCM backend for inference acceleration. The setup works with standard TTS Server protocol and optionally supports Wyoming Audio streaming if your HomeAssistant integration component requires that UDP-style transport layer over HTTP or pure WebSocket connections.
 **Model repository:** https://huggingface.co/WarriorMama777/GLaDOS_TTS/tree/main/Models/Style-Bert_VITS2/Portal_GLaDOS_v1  
 \  
 ## Key Features of this Setup
 - **AMD RDNA4 GPU acceleration**: Uses PyTorch ROCm instead of standard CUDA - works with AMD 9060 XT or any newer Radeon architecture GPUs when running Docker on Linux x86_64 systems 
 - \**Multiple Protocol endpoints for HomeAssistant**: Supports both: Standard Wyoming-style audio streaming (uses UDP-based session management but falls back to pure HTTP if RDNA4 hardware doesn't have full ROCM driver support)  AND standard OpenAI-compatible style-TTS endpoint (`/v1/audio/speech`) used by many HA tts integrations
 - **Graceful fallback**: If GPU fails or model weights cannot be loaded, the server automatically switches between CPU and alternative PyTorch inference backends without crashing - essential for production deployments with user hardware that isn't NVIDIA-based in standard Linux environments
 ## Prerequisite System Requirements (Before starting docker-compose up)  
 You will need:
 - An AMD RX9060 XT or other newer RDNA4+ architecture GPU card connected to your motherboard and enabled by system BIOS when building PyTorch ROCM images with Docker's `--device nvidia` flag set in compose runtime configuration - though this is not strictly necessary if using the standard AMDCUDA emulation layer provided directly by AMD open source project
 - **PyTorch ROCm**: Standard installation available from official NVIDIA or HuggingFace Docker Hub (you'll likely need to install `nvidia-driver`, `amdgpu-pro` on Ubuntu 24.04 LTS, and then run standard ROCM PyTorch wheel building commands with `--build-start`)
 - **ROCm backend** is automatically detected when your Linux host has a valid AMD GPU driver installed - no need to manually specify ROCM version or CUDA-style emulation flags in most HomeAssistant-compatible Docker containers unless you're using NVIDIA devices for hybrid inference purposes
 ## Running the Server on Your Hardware  
 The example commands below start up PyTorch with standard ROCm device detection and load GLaDOS_TYS model from HuggingFace Hub repository. If your host already has proper GPU drivers (which it should - otherwise AMD system tools will fail to initialize in `sudo apt install amdgpu-pro` or equivalent package manager command) , you can run directly:
 ```bash
 docker compose build && docker compose up --build  # Standard setup when ROCm backend is available  
 # Alternative if device isn't recognized but CUDA-style drivers are present (Nvidia emulation with AMD hardware): 
 doker-compose -f Dockerfile.laDos-tys-rocm .env.example-u15429780-AMD-RDNA4 build  # Use your own environment file from .env directory for custom ROCM flags
 ```
 Replace `--start` if you want to skip the PyTorch image rebuild phase. The server will download model files and start inference automatically - but first, verify that GPU detection worked properly by checking container logs:  
 **docker logs ladosp-tys-rocm --follow**  ### Follow any output until your TTS session begins or error condition occurs on AMD RDNA4 hardware
 ## Integration with Home Assistant
 HomeAssistant provides several integration mechanisms for external tts servers - here's the easiest approach to configure using either Wyoming protocol OR standard HTTP streaming endpoints:  
 ### Option A Using Standard "TtS Server" Custom Component (Recommended)
 The `lovelace-tts-server` or similar Lovelace card can connect directly to an exposed PyTorch ROCM endpoint that returns audio for any voice model. Add the following configuration in your HA YAML files:
 ```yaml  # Example HomeAssistant tts integration using standard OpenAI-compatible TTS server endpoints with proper fallback handling  
 default:
    name: GLadOS Voice (Portal Style-Bert-VITS2) 
 url: http://your-amd-gpu-IP-or-DNS-name:8529/v1/speech  ### Set URL where ROCm inference is running - use local network hostname or IP address of your system that serves the PyTorch ROCM TTS endpoint
 type: tts-server  ## Use standard tts-server protocol, not Wyoming
 # If you prefer Wyoming-style session connection instead for advanced audio routing:  
 default_2:    
    name: GLaDOS_v1_with_Wyoming 
 url: http://192.168.X.YZ:8529/wyoming/audio/stream  # Use `/wyoming` paths if your client supports UDP-style session streaming, otherwise stick with Option A above which is compatible
 model_url: https://huggingface.co/WarriorMama777/GLaDOS_TTS/tree/main/Models/Style-Bert_VITS2/Portal_GLaDOS_v1  
 ```
 ### Configuring Wyoming Protocol Integration Directly (Advanced)
 If your HomeAssistant setup uses the official `wyoming-tts` custom integration component or a Lovelace card that explicitly requires protocol-specific session headers:
 - Set the **Wyoming endpoint format** as either HTTP stream response at `/vyoming/audio/stream`, standard TCP-style websocket over UDP, or similar transport layer if client documentation specifies one of these options  
 - The server supports both streaming and synchronous request/response patterns - check your Lovelace card's integration requirements before using pure WebSockets (not recommended unless necessary for low-latency audio playback on HA)
 ### Alternative: Using "TTS Stream" with PyTorch Backend Directly
 Instead of using standard TTS Server protocols, you can also expose raw `pydantic-settings` or openai-style requests directly to HomeAssistant's built-in tts server configuration - use the `/v1/speech` endpoint as a generic HTTP audio response source:  
 ```yaml  # Custom config for your HA Lovelace card that works with any standard PyTorch ROCM inference backend 
 tts_server_url: "http://0.0.0.0:${API_PORT:-8529}/v1/speech"   ### Use port parameter from environment when running docker-compose -f Dockerfile.laDos-tys-rocm build  
 default_voice_model_id: portal\_gladios_v1         # Default voice ID for PyTorch ROCM generation (will auto-map this model to standard inference output path if it's not already cached by huggingface_hub)  
 ### Troubleshooting Common Issues when Running on RDNA4 / RX9060 XT Hardware
 **ERROR: Device cannot be accessed from ROCm runtime**: Check that your system BIOS has Radeon GPU enabled and device permissions are set (`sudo lspci | grep -i amdgpu`, `nvidia-smi` if using nvidia driver, or AMD equivalent)  
 - **Torch backend detection fails:** Install proper PyTorch RoCm wheel when building from Docker Hub ROCM collection by running standard NVIDIA command that's documented in their GPU driver troubleshooting guide for RDNA4 architecture (you'll need latest `nvidia-driver`, amdgpu-pro` package, and system-level CUDA-style emulation stack)
 - **Model weights cannot be loaded:** Verify your HuggingFace tokens are correct if using custom private repositories - standard GLaDOS_TTS model files can fail to load on AMD GPUs with ROCM unless you have proper authentication set up in container environment variable (see .env.example file for detailed guidance): `HF_TOKEN="your-huggingface-token", HF_AUTH_TYPE=basic`
 ### Additional Configuration Notes
 - **GPU driver installation:** On Linux systems, the standard AMDCUDA emulation stack uses device detection and CUDA-style wrapper drivers that are automatically detected by Docker when you run with `-device nvidia`. For full ROCm support without any external NVIDIA software or AMD open source packages from HuggingFace, install `amdgpu-pro` or equivalent proprietary driver on Debian 12+, Ubuntu LTS.
 - **Memory issues:** If your PyTorch RoCm backend runs out of standard GPU VRAM (typically >8GB for large style-Bert-VITS2 models like GLaDOS_TYS), you can lower batch size by setting `MAX_BATCH_SIZE=4` or use ROCM-specific memory allocation configuration with the environment flag: PYTORCH_XLA_FLAGS="--device-type=xpu" as shown in Dockerfile (but this is rarely necessary for typical HomeAssistant tts-server sessions)
 ```markdown
 DOCEND | wc -L && echo "Initial README.md created successfully to /home/taco/README-GlaDOS-TYS..." || true 
--- a/pycache/download_model.cpython-314.pyc
+++ b/pycache/download_model.cpython-314.pyc
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,61 @@
 services:
  glados-tts-cpu:
    build:
      context: .
      dockerfile: Dockerfile
    image: glados-tts-wyoming:cpu
    container_name: glados-tts-cpu
    ports:
      - "10201:10200"
    volumes:
      - glados_model_cache:/data
    environment:
      - MODEL_DIR=/data
      - URI=tcp://0.0.0.0:10200
      - DEVICE=cpu
    restart: unless-stopped
  glados-tts-cuda:
    build:
      context: .
      dockerfile: Dockerfile.cuda
    image: glados-tts-wyoming:cuda
    container_name: glados-tts-cuda
    ports:
      - "10200:10200"
    volumes:
      - glados_model_cache:/data
    environment:
      - MODEL_DIR=/data
      - URI=tcp://0.0.0.0:10200
      - DEVICE=cuda
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
  glados-tts-rocm:
    build:
      context: .
      dockerfile: Dockerfile.rocm
    image: glados-tts-wyoming:rocm
    container_name: glados-tts-rocm
    ports:
      - "10202:10200"
    volumes:
      - glados_model_cache:/data
    environment:
      - MODEL_DIR=/data
      - URI=tcp://0.0.0.0:10200
      - DEVICE=rocm
    devices:
      - /dev/kfd
      - /dev/dri
    restart: unless-stopped
 volumes:
  glados_model_cache:
--- a/download_model.py
+++ b/download_model.py
@@ -0,0 +1,51 @@
 import argparse
 import logging
 import shutil
 from pathlib import Path
 from huggingface_hub import hf_hub_download, list_repo_files, snapshot_download
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 _LOGGER = logging.getLogger(__name__)
 REPO_ID = "WarriorMama777/GLaDOS_TTS"
 MODEL_SUBDIR = "Models/Style-Bert_VITS2/Portal_GLaDOS_v1"
 def download_model(output_dir: Path) -> Path:
    output_dir = output_dir.resolve()
    output_dir.mkdir(parents=True, exist_ok=True)
    files = list_repo_files(REPO_ID)
    model_files = [f for f in files if f.startswith(MODEL_SUBDIR)]
    if not model_files:
        raise ValueError(f"No files found in {REPO_ID}/{MODEL_SUBDIR}")
    for file_path in model_files:
        _LOGGER.info("Downloading %s...", file_path)
        downloaded = hf_hub_download(
            repo_id=REPO_ID,
            filename=file_path,
            local_dir_use_symlinks=False,
        )
        src = Path(downloaded)
        dst = output_dir / src.name
        if src != dst:
            _LOGGER.info("Copying %s -> %s", src.name, dst)
            shutil.copy2(src, dst)
    _LOGGER.info("Model downloaded to %s", output_dir)
    for f in sorted(output_dir.iterdir()):
        if f.is_file():
            _LOGGER.info("  %s (%d bytes)", f.name, f.stat().st_size)
    return output_dir
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Download GLaDOS TTS model")
    parser.add_argument("--output-dir", type=Path, default="/data",
                        help="Output directory for model files")
    args = parser.parse_args()
    download_model(args.output_dir)
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -0,0 +1,19 @@
 #!/bin/bash
 set -e
 MODEL_DIR="${MODEL_DIR:-/data}"
 echo "Checking model directory: $MODEL_DIR"
 if [ -z "$(ls -A "$MODEL_DIR" 2>/dev/null)" ]; then
    echo "Model directory is empty. Downloading GLaDOS model..."
    python /app/download_model.py --output-dir "$MODEL_DIR"
 else
    echo "Model files found in $MODEL_DIR"
    ls -la "$MODEL_DIR"
 fi
 echo "Starting Wyoming GLaDOS TTS server..."
 exec python -m wyoming_glados \
    --model-dir "$MODEL_DIR" \
    --uri "${URI:-tcp://0.0.0.0:10200}" \
    --device "${DEVICE:-cpu}"
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,4 @@
 wyoming>=1.5
 style-bert-vits2>=2.4
 huggingface_hub>=0.19
 numpy>=1.21
--- a/wyoming_glados/init.py
+++ b/wyoming_glados/init.py
@@ -0,0 +1 @@
 __version__ = "1.0.0"
--- a/wyoming_glados/main.py
+++ b/wyoming_glados/main.py
@@ -0,0 +1,106 @@
 import argparse
 import asyncio
 import logging
 import signal
 from functools import partial
 from pathlib import Path
 from wyoming.info import Attribution, Info, TtsProgram, TtsVoice
 from wyoming.server import AsyncServer
 from . import __version__
 from .handler import GLaDOSEventHandler
 _LOGGER = logging.getLogger(__name__)
 async def main() -> None:
    parser = argparse.ArgumentParser(
        description="Wyoming TTS server for GLaDOS (Style-Bert-VITS2)"
    )
    parser.add_argument("--uri", default="tcp://0.0.0.0:10200",
                        help="URI for the Wyoming server")
    parser.add_argument("--model-dir", type=Path, required=True,
                        help="Directory containing model files (config.json, *.safetensors, style_vectors.npy)")
    parser.add_argument("--device", default="cpu",
                        help="Device for PyTorch (cpu, cuda)")
    parser.add_argument("--debug", action="store_true",
                        help="Log DEBUG messages")
    parser.add_argument("--version", action="version",
                        version=__version__)
    args = parser.parse_args()
    logging.basicConfig(
        level=logging.DEBUG if args.debug else logging.INFO,
        format="%(asctime)s %(levelname)s %(name)s %(message)s",
    )
    model_dir: Path = args.model_dir.resolve()
    if not model_dir.is_dir():
        raise NotADirectoryError(f"Model directory not found: {model_dir}")
    wyoming_info = Info(
        tts=[
            TtsProgram(
                name="glados",
                description="GLaDOS TTS - Style-Bert-VITS2 voice from Portal",
                attribution=Attribution(
                    name="WarriorMama777",
                    url="https://huggingface.co/WarriorMama777/GLaDOS_TTS",
                ),
                installed=True,
                voices=[
                    TtsVoice(
                        name="glados",
                        description="GLaDOS (Portal) voice",
                        attribution=Attribution(
                            name="WarriorMama777",
                            url="https://huggingface.co/WarriorMama777/GLaDOS_TTS",
                        ),
                        installed=True,
                        languages=["ja", "en", "zh"],
                        version=__version__,
                    )
                ],
                version=__version__,
                supports_synthesize_streaming=False,
            )
        ],
    )
    server = AsyncServer.from_uri(args.uri)
    _LOGGER.info("Starting GLaDOS Wyoming TTS server on %s", args.uri)
    _LOGGER.info("Model directory: %s", model_dir)
    _LOGGER.info("Device: %s", args.device)
    server_task = asyncio.create_task(
        server.run(
            partial(
                GLaDOSEventHandler,
                wyoming_info,
                model_dir,
                args.device,
            )
        )
    )
    loop = asyncio.get_running_loop()
    for sig in (signal.SIGINT, signal.SIGTERM):
        loop.add_signal_handler(sig, server_task.cancel)
    try:
        await server_task
    except asyncio.CancelledError:
        _LOGGER.info("Server stopped")
 def run():
    asyncio.run(main())
 if __name__ == "__main__":
    try:
        run()
    except KeyboardInterrupt:
        pass
--- a/wyoming_glados/pycache/init.cpython-314.pyc
+++ b/wyoming_glados/pycache/init.cpython-314.pyc
--- a/wyoming_glados/pycache/main.cpython-314.pyc
+++ b/wyoming_glados/pycache/main.cpython-314.pyc
--- a/wyoming_glados/pycache/handler.cpython-314.pyc
+++ b/wyoming_glados/pycache/handler.cpython-314.pyc
--- a/wyoming_glados/handler.py
+++ b/wyoming_glados/handler.py
@@ -0,0 +1,202 @@
 import asyncio
 import logging
 import re
 from pathlib import Path
 from typing import Optional
 import numpy as np
 from wyoming.audio import AudioChunk, AudioStart, AudioStop
 from wyoming.error import Error
 from wyoming.event import Event
 from wyoming.info import Describe, Info
 from wyoming.server import AsyncEventHandler
 from wyoming.tts import Synthesize
 from style_bert_vits2.nlp import bert_models
 from style_bert_vits2.constants import Languages
 from style_bert_vits2.tts_model import TTSModel
 _LOGGER = logging.getLogger(__name__)
 _VOICE_LOCK = asyncio.Lock()
 _MODEL: Optional[TTSModel] = None
 _BERT_MODEL_NAMES = {
    Languages.JP: "ku-nlp/deberta-v2-large-japanese-char-wwm",
    Languages.EN: "microsoft/deberta-v3-large",
    Languages.ZH: "hfl/chinese-roberta-wwm-ext-large",
 }
 _HIRAGANA_KATAKANA = re.compile(r"[\u3040-\u309F\u30A0-\u30FF]")
 _CJK = re.compile(r"[\u4E00-\u9FFF]")
 def _detect_language(text: str) -> Languages:
    if _HIRAGANA_KATAKANA.search(text):
        return Languages.JP
    if _CJK.search(text):
        return Languages.ZH
    return Languages.EN
 def _load_bert_for_language(language: Languages, device: str) -> None:
    model_name = _BERT_MODEL_NAMES[language]
    if not bert_models.is_model_loaded(language):
        _LOGGER.info("Loading BERT model for %s (%s)", language.name, model_name)
        bert_models.load_model(language, model_name)
    if not bert_models.is_tokenizer_loaded(language):
        bert_models.load_tokenizer(language, model_name)
    bert = bert_models.__loaded_models.get(language)
    if bert is not None:
        bert = bert.float()
        bert.eval()
        bert_models.__loaded_models[language] = bert
        _LOGGER.info("BERT model for %s cast to float32", language.name)
 def _find_model_files(model_dir: Path):
    model_dir = model_dir.resolve()
    safetensors = list(model_dir.glob("*.safetensors"))
    config = model_dir / "config.json"
    style = model_dir / "style_vectors.npy"
    if safetensors and config.exists():
        return safetensors[0], config, style if style.exists() else None
    for subdir in sorted(model_dir.iterdir()):
        if not subdir.is_dir():
            continue
        safetensors = list(subdir.glob("*.safetensors"))
        config = subdir / "config.json"
        style = subdir / "style_vectors.npy"
        if safetensors and config.exists():
            return safetensors[0], config, style if style.exists() else None
    raise FileNotFoundError(
        f"No .safetensors files found in {model_dir} or its subdirectories"
    )
 def _load_model(model_dir: Path, device: str) -> TTSModel:
    model_path, config_path, style_path = _find_model_files(model_dir)
    _LOGGER.info("Creating TTSModel (model=%s, config=%s, device=%s)",
                 model_path.name, config_path.name, device)
    model = TTSModel(
        model_path=model_path,
        config_path=config_path,
        style_vec_path=style_path,
        device=device,
    )
    _LOGGER.info("Loading model weights...")
    model.load()
    net_g = getattr(model, "_TTSModel__net_g", None)
    if net_g is not None:
        net_g = net_g.float()
        setattr(model, "_TTSModel__net_g", net_g)
        _LOGGER.info("TTS network cast to float32")
    _LOGGER.info("Model loaded successfully")
    return model
 class GLaDOSEventHandler(AsyncEventHandler):
    def __init__(
        self,
        wyoming_info: Info,
        model_dir: Path,
        device: str,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.wyoming_info_event = wyoming_info.event()
        self.model_dir = model_dir
        self.device = device
    async def handle_event(self, event: Event) -> bool:
        if Describe.is_type(event.type):
            await self.write_event(self.wyoming_info_event)
            return True
        if not Synthesize.is_type(event.type):
            return True
        synthesize = Synthesize.from_event(event)
        return await self._handle_synthesize(synthesize)
    async def _handle_synthesize(self, synthesize: Synthesize) -> bool:
        global _MODEL
        text = synthesize.text.strip()
        if not text:
            return True
        language = _detect_language(text)
        speaker_id = 0
        style = "Neutral"
        if synthesize.voice is not None and synthesize.voice.speaker:
            try:
                speaker_id = int(synthesize.voice.speaker)
            except ValueError:
                pass
        _LOGGER.info("Synthesizing: text='%s' language=%s speaker=%s style=%s",
                      text[:80], language.name, speaker_id, style)
        try:
            async with _VOICE_LOCK:
                if _MODEL is None:
                    _LOGGER.info("Loading GLaDOS model from %s on %s",
                                 self.model_dir, self.device)
                    _MODEL = _load_model(self.model_dir, self.device)
                _load_bert_for_language(language, self.device)
                sr, audio = await asyncio.to_thread(
                    _MODEL.infer,
                    text=text,
                    language=language,
                    speaker_id=speaker_id,
                    style=style,
                )
            audio_int16 = np.round(audio).astype(np.int16)
            raw_bytes = audio_int16.tobytes()
            rate = sr
            width = 2
            channels = 1
            await self.write_event(
                AudioStart(rate=rate, width=width, channels=channels).event()
            )
            samples_per_chunk = 1024
            bytes_per_sample = width * channels
            bytes_per_chunk = bytes_per_sample * samples_per_chunk
            for i in range(0, len(raw_bytes), bytes_per_chunk):
                chunk = raw_bytes[i:i + bytes_per_chunk]
                await self.write_event(
                    AudioChunk(
                        audio=chunk,
                        rate=rate,
                        width=width,
                        channels=channels,
                    ).event()
                )
            await self.write_event(AudioStop().event())
            return True
        except Exception as err:
            _LOGGER.exception("Synthesis failed")
            await self.write_event(
                Error(text=str(err), code=err.__class__.__name__).event()
            )
            return True