mirror of
https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git
synced 2026-06-12 20:10:19 -06:00
Add VAD trimming and Docker publishing
This commit is contained in:
48
.github/workflows/docker-publish.yml
vendored
Normal file
48
.github/workflows/docker-publish.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
name: Publish Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: docker-publish-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: tatertotterson/microwakeword-trainer-nvidia-docker
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
docker:
|
||||||
|
name: Docker image
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Check out repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to GHCR
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push image
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: dockerfile
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: true
|
||||||
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
|
cache-from: type=gha,scope=mww-trainer-nvidia-docker
|
||||||
|
cache-to: type=gha,mode=max,scope=mww-trainer-nvidia-docker
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
personal_samples/*
|
personal_samples/*
|
||||||
data/
|
data/
|
||||||
|
trim_history/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
66
run.sh
66
run.sh
@@ -26,6 +26,16 @@ echo "-> URL: http://localhost:${PORT}/"
|
|||||||
|
|
||||||
mkdir -p "${DATA_DIR}"
|
mkdir -p "${DATA_DIR}"
|
||||||
|
|
||||||
|
install_ui_deps() {
|
||||||
|
${PIP} install \
|
||||||
|
"fastapi==${FASTAPI_VERSION}" \
|
||||||
|
"uvicorn[standard]==${UVICORN_VERSION}" \
|
||||||
|
"python-multipart==${PY_MULTIPART_VERSION}" \
|
||||||
|
"esphome==${ESPHOME_VERSION}" \
|
||||||
|
"silero-vad>=5.0.0" \
|
||||||
|
"numpy>=1.24.0"
|
||||||
|
}
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Trainer UI venv (separate)
|
# Trainer UI venv (separate)
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
@@ -40,32 +50,54 @@ source "${VENV_DIR}/bin/activate"
|
|||||||
if [[ ! -f "${PIN_FILE}" ]]; then
|
if [[ ! -f "${PIN_FILE}" ]]; then
|
||||||
echo "Installing pinned trainer UI deps"
|
echo "Installing pinned trainer UI deps"
|
||||||
${PIP} install -U pip setuptools wheel
|
${PIP} install -U pip setuptools wheel
|
||||||
${PIP} install \
|
install_ui_deps
|
||||||
"fastapi==${FASTAPI_VERSION}" \
|
|
||||||
"uvicorn[standard]==${UVICORN_VERSION}" \
|
|
||||||
"python-multipart==${PY_MULTIPART_VERSION}" \
|
|
||||||
"esphome==${ESPHOME_VERSION}"
|
|
||||||
touch "${PIN_FILE}"
|
touch "${PIN_FILE}"
|
||||||
else
|
else
|
||||||
echo "Reusing existing trainer UI venv (no upgrades)"
|
echo "Reusing existing trainer UI venv (no upgrades)"
|
||||||
if ! "${PY}" - "${ESPHOME_VERSION}" <<'PY' >/dev/null 2>&1
|
if ! "${PY}" - "${FASTAPI_VERSION}" "${UVICORN_VERSION}" "${PY_MULTIPART_VERSION}" "${ESPHOME_VERSION}" <<'PY' >/dev/null 2>&1
|
||||||
import importlib.metadata
|
import importlib.metadata as md
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
expected = sys.argv[1]
|
fastapi_version, uvicorn_version, multipart_version, esphome_version = sys.argv[1:5]
|
||||||
installed = importlib.metadata.version("esphome")
|
|
||||||
raise SystemExit(0 if installed == expected else 1)
|
def version_tuple(value):
|
||||||
|
parts = []
|
||||||
|
for token in str(value).replace("-", ".").split("."):
|
||||||
|
if token.isdigit():
|
||||||
|
parts.append(int(token))
|
||||||
|
else:
|
||||||
|
digits = "".join(ch for ch in token if ch.isdigit())
|
||||||
|
if digits:
|
||||||
|
parts.append(int(digits))
|
||||||
|
break
|
||||||
|
return tuple(parts)
|
||||||
|
|
||||||
|
exact = {
|
||||||
|
"fastapi": fastapi_version,
|
||||||
|
"uvicorn": uvicorn_version,
|
||||||
|
"python-multipart": multipart_version,
|
||||||
|
"esphome": esphome_version,
|
||||||
|
}
|
||||||
|
minimum = {
|
||||||
|
"silero-vad": "5.0.0",
|
||||||
|
"numpy": "1.24.0",
|
||||||
|
}
|
||||||
|
present = ("torch", "zeroconf")
|
||||||
|
|
||||||
|
for package, expected in exact.items():
|
||||||
|
if md.version(package) != expected:
|
||||||
|
raise SystemExit(1)
|
||||||
|
for package, minimum_version in minimum.items():
|
||||||
|
if version_tuple(md.version(package)) < version_tuple(minimum_version):
|
||||||
|
raise SystemExit(1)
|
||||||
|
for package in present:
|
||||||
|
md.version(package)
|
||||||
PY
|
PY
|
||||||
then
|
then
|
||||||
echo "Firmware tab dependencies missing or stale; installing ESPHome firmware dependencies"
|
echo "UI dependencies missing or stale; installing recorder dependencies"
|
||||||
${PIP} install \
|
install_ui_deps
|
||||||
"fastapi==${FASTAPI_VERSION}" \
|
|
||||||
"uvicorn[standard]==${UVICORN_VERSION}" \
|
|
||||||
"python-multipart==${PY_MULTIPART_VERSION}" \
|
|
||||||
"esphome==${ESPHOME_VERSION}"
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Trainer server env
|
# Trainer server env
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
|
|||||||
@@ -622,6 +622,67 @@
|
|||||||
color: var(--orange2);
|
color: var(--orange2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.paginationControls {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
gap: 16px;
|
||||||
|
padding: 12px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.paginationControls .pageBtn {
|
||||||
|
padding: 6px 14px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 13px;
|
||||||
|
background: rgba(255,255,255,0.06);
|
||||||
|
border: 1px solid rgba(255,255,255,0.12);
|
||||||
|
color: var(--text, #fff);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.paginationControls .pageBtn:disabled {
|
||||||
|
opacity: 0.3;
|
||||||
|
cursor: default;
|
||||||
|
}
|
||||||
|
|
||||||
|
.paginationControls .pageInfo {
|
||||||
|
font-size: 13px;
|
||||||
|
color: var(--muted, #888);
|
||||||
|
}
|
||||||
|
|
||||||
|
.paginationControls .pageJump {
|
||||||
|
font-size: 13px;
|
||||||
|
color: var(--muted, #888);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.paginationControls .pageInput {
|
||||||
|
width: 40px;
|
||||||
|
padding: 4px 6px;
|
||||||
|
font-size: 13px;
|
||||||
|
text-align: center;
|
||||||
|
background: rgba(255,255,255,0.06);
|
||||||
|
border: 1px solid rgba(255,255,255,0.12);
|
||||||
|
border-radius: 4px;
|
||||||
|
color: var(--text, #fff);
|
||||||
|
}
|
||||||
|
|
||||||
|
.paginationControls .pageJumpBtn {
|
||||||
|
padding: 4px 10px;
|
||||||
|
font-size: 13px;
|
||||||
|
background: rgba(255,255,255,0.1);
|
||||||
|
border: 1px solid rgba(255,255,255,0.15);
|
||||||
|
border-radius: 4px;
|
||||||
|
color: var(--text, #fff);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.paginationControls .pageJumpBtn:hover {
|
||||||
|
background: rgba(255,255,255,0.18);
|
||||||
|
}
|
||||||
|
|
||||||
.tabs {
|
.tabs {
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
@@ -960,6 +1021,78 @@
|
|||||||
100% { transform: translateY(0) scale(1) rotate(0deg); }
|
100% { transform: translateY(0) scale(1) rotate(0deg); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.trimOverlay {
|
||||||
|
position: fixed; inset: 0; padding: 22px;
|
||||||
|
display: flex; align-items: center; justify-content: center;
|
||||||
|
background: rgba(4,5,10,0.6); backdrop-filter: blur(10px);
|
||||||
|
opacity: 0; visibility: hidden; pointer-events: none;
|
||||||
|
transition: opacity 0.18s ease, visibility 0.18s ease;
|
||||||
|
z-index: 11000;
|
||||||
|
}
|
||||||
|
.trimOverlay.open { opacity: 1; visibility: visible; pointer-events: auto; }
|
||||||
|
|
||||||
|
.trimDialog {
|
||||||
|
width: min(960px, calc(100vw - 36px));
|
||||||
|
max-height: min(90vh, 900px);
|
||||||
|
display: grid; grid-template-rows: auto 1fr auto auto auto; gap: 12px;
|
||||||
|
padding: 18px; border-radius: 22px;
|
||||||
|
border: 1px solid rgba(255,255,255,0.12);
|
||||||
|
background: linear-gradient(180deg, rgba(17,20,28,0.82), rgba(8,10,16,0.94));
|
||||||
|
box-shadow: 0 28px 84px rgba(0,0,0,0.58);
|
||||||
|
backdrop-filter: blur(18px) saturate(1.12);
|
||||||
|
}
|
||||||
|
.trimHeader { display: flex; justify-content: space-between; align-items: flex-start; gap: 16px; }
|
||||||
|
.trimTitle { margin: 0; font-size: 18px; }
|
||||||
|
.trimHint { margin: 6px 0 0; font-size: 13px; color: var(--muted); }
|
||||||
|
|
||||||
|
.trimCanvasWrap {
|
||||||
|
position: relative; width: 100%; min-height: 120px;
|
||||||
|
border-radius: 14px; border: 1px solid rgba(255,255,255,0.08);
|
||||||
|
background: rgba(0,0,0,0.4); overflow: hidden;
|
||||||
|
}
|
||||||
|
.trimCanvas { width: 100%; height: 100%; display: block; }
|
||||||
|
|
||||||
|
.trimHandle {
|
||||||
|
position: absolute; top: 0; width: 48px; height: 100%;
|
||||||
|
cursor: ew-resize; pointer-events: auto; touch-action: none;
|
||||||
|
transform: translateX(-50%);
|
||||||
|
}
|
||||||
|
.trimHandle::after {
|
||||||
|
content: ''; position: absolute; top: 10%; bottom: 10%; left: 50%;
|
||||||
|
transform: translateX(-50%); width: 3px; border-radius: 2px;
|
||||||
|
background: var(--orange); box-shadow: 0 0 6px rgba(255,138,42,0.5);
|
||||||
|
}
|
||||||
|
.trimHandle::before {
|
||||||
|
content: ''; position: absolute; top: 50%; left: 50%;
|
||||||
|
transform: translate(-50%,-50%); width: 10px; height: 24px;
|
||||||
|
border-radius: 5px; border: 1px solid rgba(255,138,42,0.5);
|
||||||
|
background: rgba(255,138,42,0.15);
|
||||||
|
}
|
||||||
|
|
||||||
|
.trimTimeInfo {
|
||||||
|
display: flex; align-items: center; justify-content: center;
|
||||||
|
gap: 12px; font-size: 14px;
|
||||||
|
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||||
|
}
|
||||||
|
.trimSeparator { color: var(--muted); }
|
||||||
|
.trimVadInfo { display: flex; align-items: center; gap: 8px; font-size: 12px; }
|
||||||
|
.trimActions { display: flex; gap: 8px; flex-wrap: wrap; }
|
||||||
|
.trimActions button { flex: 1; min-width: 120px; }
|
||||||
|
|
||||||
|
.pill.trimBadge {
|
||||||
|
color: #89d4ff;
|
||||||
|
border-color: rgba(137,212,255,0.25);
|
||||||
|
background: rgba(137,212,255,0.08);
|
||||||
|
}
|
||||||
|
.trimBtn {
|
||||||
|
border-color: rgba(255,138,42,0.3);
|
||||||
|
background: rgba(255,138,42,0.1);
|
||||||
|
}
|
||||||
|
.trimBtn:hover {
|
||||||
|
border-color: rgba(255,138,42,0.5);
|
||||||
|
background: rgba(255,138,42,0.18);
|
||||||
|
}
|
||||||
|
|
||||||
@media (max-width: 720px) {
|
@media (max-width: 720px) {
|
||||||
.wrap { padding: 18px 14px 30px; }
|
.wrap { padding: 18px 14px 30px; }
|
||||||
input[type="text"] { width: 100%; }
|
input[type="text"] { width: 100%; }
|
||||||
@@ -1033,6 +1166,15 @@
|
|||||||
.trainFooter button {
|
.trainFooter button {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
}
|
}
|
||||||
|
.trimOverlay { padding: 8px; }
|
||||||
|
.trimDialog {
|
||||||
|
width: 100%; height: 96vh;
|
||||||
|
padding: 14px; grid-template-rows: auto 1fr auto auto auto;
|
||||||
|
}
|
||||||
|
.trimCanvasWrap { min-height: 100px; }
|
||||||
|
.trimHeader { flex-direction: column; align-items: stretch; }
|
||||||
|
.trimActions { flex-direction: column; }
|
||||||
|
.trimActions button { width: 100%; min-width: unset; }
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
@@ -1218,7 +1360,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<section class="card studioPanel stack">
|
<section class="card studioPanel stack">
|
||||||
<div class="sampleLibraryHeader">
|
<div id="sampleLibraryHeader" class="sampleLibraryHeader">
|
||||||
<div class="studioPanelTitle">
|
<div class="studioPanelTitle">
|
||||||
<span class="studioStepBadge">1</span>
|
<span class="studioStepBadge">1</span>
|
||||||
<div>
|
<div>
|
||||||
@@ -1241,6 +1383,7 @@
|
|||||||
<div id="sampleLibraryList" class="capturedList">
|
<div id="sampleLibraryList" class="capturedList">
|
||||||
<div class="emptyState">No samples saved yet.</div>
|
<div class="emptyState">No samples saved yet.</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div id="samplePagination"></div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section class="card studioPanel stack">
|
<section class="card studioPanel stack">
|
||||||
@@ -1414,6 +1557,40 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div id="trimOverlay" class="trimOverlay" aria-hidden="true">
|
||||||
|
<div id="trimDialog" class="trimDialog" role="dialog" aria-modal="true">
|
||||||
|
<div class="trimHeader">
|
||||||
|
<div>
|
||||||
|
<h3 id="trimTitle" class="trimTitle">Trim Audio</h3>
|
||||||
|
<p id="trimHint" class="trimHint">Drag the handles to select a region, then save as a new sample.</p>
|
||||||
|
</div>
|
||||||
|
<button id="closeTrimBtn" type="button">Close</button>
|
||||||
|
</div>
|
||||||
|
<div class="trimCanvasWrap">
|
||||||
|
<canvas id="trimCanvas" class="trimCanvas"></canvas>
|
||||||
|
<div id="trimStartHandle" class="trimHandle" data-handle="start"></div>
|
||||||
|
<div id="trimEndHandle" class="trimHandle" data-handle="end"></div>
|
||||||
|
</div>
|
||||||
|
<div class="trimTimeInfo">
|
||||||
|
<span id="trimStartTime">0.00s</span>
|
||||||
|
<span class="trimSeparator">--</span>
|
||||||
|
<span id="trimEndTime">0.00s</span>
|
||||||
|
<span class="trimSeparator">|</span>
|
||||||
|
<span id="trimDuration">Duration: 0.00s</span>
|
||||||
|
</div>
|
||||||
|
<div id="trimVadInfo" class="trimVadInfo">
|
||||||
|
<span class="pill ok">VAD detected speech</span>
|
||||||
|
<span id="trimVadSegments" class="muted"></span>
|
||||||
|
</div>
|
||||||
|
<div class="trimActions">
|
||||||
|
<button id="trimPlayBtn" type="button">Play selection</button>
|
||||||
|
<button id="trimSelectFirstVadBtn" type="button">Select first VAD</button>
|
||||||
|
<button id="trimSaveBtn" class="primary" type="button">Save Trim</button>
|
||||||
|
<button id="trimCancelBtn" type="button">Cancel</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
const $ = (id) => document.getElementById(id);
|
const $ = (id) => document.getElementById(id);
|
||||||
|
|
||||||
@@ -1423,7 +1600,7 @@
|
|||||||
availableLanguages: [],
|
availableLanguages: [],
|
||||||
selectedFiles: [],
|
selectedFiles: [],
|
||||||
captured: { items: [], captured_count: 0, negative_count: 0, personal_count: 0 },
|
captured: { items: [], captured_count: 0, negative_count: 0, personal_count: 0 },
|
||||||
samples: { personal: [], negative: [], personal_count: 0, negative_count: 0, activeBucket: "personal" },
|
samples: { personal: [], negative: [], personal_count: 0, negative_count: 0, activeBucket: "personal", pages: { personal: 0, negative: 0 } },
|
||||||
firmware: { devices: [], templates: [], flashing: null, logLines: [], activeTemplateKey: "" },
|
firmware: { devices: [], templates: [], flashing: null, logLines: [], activeTemplateKey: "" },
|
||||||
uploadBusy: false,
|
uploadBusy: false,
|
||||||
reviewBusy: false,
|
reviewBusy: false,
|
||||||
@@ -1432,11 +1609,274 @@
|
|||||||
firmwarePoller: null,
|
firmwarePoller: null,
|
||||||
activeView: "trainer",
|
activeView: "trainer",
|
||||||
};
|
};
|
||||||
|
const SAMPLE_PAGE_SIZE = 50;
|
||||||
let firmwareProfileSaveTimer = null;
|
let firmwareProfileSaveTimer = null;
|
||||||
let firmwareProfileReloadTimer = null;
|
let firmwareProfileReloadTimer = null;
|
||||||
let wakeSoundPreviewAudio = null;
|
let wakeSoundPreviewAudio = null;
|
||||||
let wakeSoundPreviewButton = null;
|
let wakeSoundPreviewButton = null;
|
||||||
|
|
||||||
|
// --- Trim Waveform Module ---
|
||||||
|
const TrimWaveform = {
|
||||||
|
audioBuffer: null,
|
||||||
|
duration: 0,
|
||||||
|
startRatio: 0,
|
||||||
|
endRatio: 1,
|
||||||
|
vadSegments: [],
|
||||||
|
isDragging: null,
|
||||||
|
|
||||||
|
async init(bucket, fileName) {
|
||||||
|
const audioUrl = `/api/audio/${encodeURIComponent(bucket)}/${encodeURIComponent(fileName)}`;
|
||||||
|
const resp = await fetch(audioUrl);
|
||||||
|
const arrayBuf = await resp.arrayBuffer();
|
||||||
|
const ctx = new (window.AudioContext || window.webkitAudioContext)();
|
||||||
|
this.audioBuffer = await ctx.decodeAudioData(arrayBuf);
|
||||||
|
this.duration = this.audioBuffer.duration;
|
||||||
|
ctx.close();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const vadData = await api(
|
||||||
|
`/api/samples/${encodeURIComponent(bucket)}/${encodeURIComponent(fileName)}/vad`,
|
||||||
|
{ method: 'POST' }
|
||||||
|
);
|
||||||
|
this.vadSegments = vadData.segments || [];
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('VAD failed:', e);
|
||||||
|
this.vadSegments = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
this.startRatio = 0;
|
||||||
|
this.endRatio = 1;
|
||||||
|
if (this.vadSegments.length > 0) {
|
||||||
|
this.startRatio = this.vadSegments[0].start / this.duration;
|
||||||
|
this.endRatio = this.vadSegments[0].end / this.duration;
|
||||||
|
}
|
||||||
|
return { duration: this.duration, vadCount: this.vadSegments.length };
|
||||||
|
},
|
||||||
|
|
||||||
|
draw() {
|
||||||
|
const canvas = $('trimCanvas');
|
||||||
|
if (!canvas || !this.audioBuffer) return;
|
||||||
|
const dpr = window.devicePixelRatio || 1;
|
||||||
|
const rect = canvas.getBoundingClientRect();
|
||||||
|
canvas.width = rect.width * dpr;
|
||||||
|
canvas.height = rect.height * dpr;
|
||||||
|
const ctx = canvas.getContext('2d');
|
||||||
|
ctx.scale(dpr, dpr);
|
||||||
|
const w = rect.width, h = rect.height, mid = h / 2;
|
||||||
|
ctx.clearRect(0, 0, w, h);
|
||||||
|
|
||||||
|
// Full waveform (dim)
|
||||||
|
const data = this.audioBuffer.getChannelData(0);
|
||||||
|
const step = Math.max(1, Math.floor(data.length / w));
|
||||||
|
ctx.strokeStyle = 'rgba(255,255,255,0.12)';
|
||||||
|
ctx.lineWidth = 1;
|
||||||
|
ctx.beginPath();
|
||||||
|
for (let i = 0; i < w; i++) {
|
||||||
|
let mn = 1, mx = -1;
|
||||||
|
for (let j = 0; j < step; j++) {
|
||||||
|
const v = data[i * step + j] || 0;
|
||||||
|
if (v < mn) mn = v;
|
||||||
|
if (v > mx) mx = v;
|
||||||
|
}
|
||||||
|
ctx.moveTo(i, mid + mn * mid * 0.9);
|
||||||
|
ctx.lineTo(i, mid + mx * mid * 0.9);
|
||||||
|
}
|
||||||
|
ctx.stroke();
|
||||||
|
|
||||||
|
// Selection waveform (bright)
|
||||||
|
const selStartPx = this.startRatio * w;
|
||||||
|
const selEndPx = this.endRatio * w;
|
||||||
|
ctx.strokeStyle = 'rgba(255,138,42,0.7)';
|
||||||
|
ctx.lineWidth = 1.5;
|
||||||
|
ctx.beginPath();
|
||||||
|
for (let i = Math.floor(selStartPx); i <= Math.floor(selEndPx); i++) {
|
||||||
|
let mn = 1, mx = -1;
|
||||||
|
for (let j = 0; j < step; j++) {
|
||||||
|
const v = data[i * step + j] || 0;
|
||||||
|
if (v < mn) mn = v;
|
||||||
|
if (v > mx) mx = v;
|
||||||
|
}
|
||||||
|
ctx.moveTo(i, mid + mn * mid * 0.9);
|
||||||
|
ctx.lineTo(i, mid + mx * mid * 0.9);
|
||||||
|
}
|
||||||
|
ctx.stroke();
|
||||||
|
|
||||||
|
// Dim areas outside selection
|
||||||
|
ctx.fillStyle = 'rgba(0,0,0,0.45)';
|
||||||
|
ctx.fillRect(0, 0, selStartPx, h);
|
||||||
|
ctx.fillRect(selEndPx, 0, w - selEndPx, h);
|
||||||
|
|
||||||
|
// VAD segment markers (green lines)
|
||||||
|
this.vadSegments.forEach(seg => {
|
||||||
|
const x = (seg.start / this.duration) * w;
|
||||||
|
ctx.strokeStyle = 'rgba(57,212,160,0.4)';
|
||||||
|
ctx.lineWidth = 2;
|
||||||
|
ctx.beginPath();
|
||||||
|
ctx.moveTo(x, 0);
|
||||||
|
ctx.lineTo(x, h);
|
||||||
|
ctx.stroke();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update handle positions
|
||||||
|
$('trimStartHandle').style.left = (this.startRatio * 100) + '%';
|
||||||
|
$('trimEndHandle').style.left = (this.endRatio * 100) + '%';
|
||||||
|
},
|
||||||
|
|
||||||
|
getStartTime() { return this.startRatio * this.duration; },
|
||||||
|
getEndTime() { return this.endRatio * this.duration; },
|
||||||
|
setStartSeconds(s) {
|
||||||
|
this.startRatio = Math.max(0, Math.min(s / this.duration, this.endRatio - 0.001));
|
||||||
|
this.draw(); updateTrimTimeDisplay();
|
||||||
|
},
|
||||||
|
setEndSeconds(s) {
|
||||||
|
this.endRatio = Math.min(1, Math.max(s / this.duration, this.startRatio + 0.001));
|
||||||
|
this.draw(); updateTrimTimeDisplay();
|
||||||
|
},
|
||||||
|
|
||||||
|
playSelection() {
|
||||||
|
if (!this.audioBuffer) return;
|
||||||
|
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
|
||||||
|
const src = audioCtx.createBufferSource();
|
||||||
|
src.buffer = this.audioBuffer;
|
||||||
|
src.start(0, this.getStartTime(), this.getEndTime() - this.getStartTime());
|
||||||
|
src.connect(audioCtx.destination);
|
||||||
|
src.onended = () => audioCtx.close();
|
||||||
|
},
|
||||||
|
|
||||||
|
async getTrimmedWavBlob() {
|
||||||
|
const buf = this.audioBuffer;
|
||||||
|
const startSample = Math.floor(this.getStartTime() * buf.sampleRate);
|
||||||
|
const endSample = Math.min(Math.floor(this.getEndTime() * buf.sampleRate), buf.length);
|
||||||
|
const numSamples = endSample - startSample;
|
||||||
|
const targetRate = 16000;
|
||||||
|
|
||||||
|
let pcmFloat32;
|
||||||
|
if (buf.sampleRate === targetRate) {
|
||||||
|
pcmFloat32 = buf.getChannelData(0).slice(startSample, endSample);
|
||||||
|
} else {
|
||||||
|
const offlineCtx = new OfflineAudioContext(1, numSamples * (targetRate / buf.sampleRate) | 0, targetRate);
|
||||||
|
const src = offlineCtx.createBufferSource();
|
||||||
|
src.buffer = buf;
|
||||||
|
src.start(0, this.getStartTime(), this.getEndTime() - this.getStartTime());
|
||||||
|
src.connect(offlineCtx.destination);
|
||||||
|
const rendered = await offlineCtx.startRendering();
|
||||||
|
pcmFloat32 = rendered.getChannelData(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const int16 = new Int16Array(pcmFloat32.length);
|
||||||
|
for (let i = 0; i < pcmFloat32.length; i++) {
|
||||||
|
int16[i] = Math.max(-32768, Math.min(32767, Math.round(pcmFloat32[i] * 32767)));
|
||||||
|
}
|
||||||
|
|
||||||
|
const dataSize = int16.length * 2;
|
||||||
|
const wavSize = 36 + dataSize;
|
||||||
|
const wavBuf = new ArrayBuffer(44 + dataSize);
|
||||||
|
const view = new DataView(wavBuf);
|
||||||
|
view.setUint32(0, 0x52494646, false);
|
||||||
|
view.setUint32(4, wavSize, true);
|
||||||
|
view.setUint32(8, 0x57415645, false);
|
||||||
|
view.setUint32(12, 0x666d7420, false);
|
||||||
|
view.setUint32(16, 16, true);
|
||||||
|
view.setUint16(20, 1, true);
|
||||||
|
view.setUint16(22, 1, true);
|
||||||
|
view.setUint32(24, targetRate, true);
|
||||||
|
view.setUint32(28, targetRate * 2, true);
|
||||||
|
view.setUint16(32, 2, true);
|
||||||
|
view.setUint16(34, 16, true);
|
||||||
|
view.setUint32(36, 0x64617461, false);
|
||||||
|
view.setUint32(40, dataSize, true);
|
||||||
|
for (let i = 0; i < int16.length; i++) {
|
||||||
|
view.setInt16(44 + i * 2, int16[i], true);
|
||||||
|
}
|
||||||
|
return new Blob([wavBuf], { type: 'audio/wav' });
|
||||||
|
},
|
||||||
|
|
||||||
|
onPointerDown(handleType, e) {
|
||||||
|
this.isDragging = handleType;
|
||||||
|
e.preventDefault();
|
||||||
|
},
|
||||||
|
onPointerMove(e) {
|
||||||
|
if (!this.isDragging) return;
|
||||||
|
e.preventDefault();
|
||||||
|
const canvas = $('trimCanvas');
|
||||||
|
const rect = canvas.getBoundingClientRect();
|
||||||
|
const clientX = e.touches ? e.touches[0].clientX : e.clientX;
|
||||||
|
let ratio = (clientX - rect.left) / rect.width;
|
||||||
|
ratio = Math.max(0, Math.min(1, ratio));
|
||||||
|
if (this.isDragging === 'start') {
|
||||||
|
this.startRatio = Math.max(0, Math.min(ratio, this.endRatio - 0.002));
|
||||||
|
} else {
|
||||||
|
this.endRatio = Math.min(1, Math.max(ratio, this.startRatio + 0.002));
|
||||||
|
}
|
||||||
|
this.draw();
|
||||||
|
updateTrimTimeDisplay();
|
||||||
|
},
|
||||||
|
onPointerUp() { this.isDragging = null; },
|
||||||
|
|
||||||
|
destroy() {
|
||||||
|
this.audioBuffer = null;
|
||||||
|
this.vadSegments = [];
|
||||||
|
this.isDragging = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// --- Trim Modal Functions ---
|
||||||
|
let trimBucket = null;
|
||||||
|
let trimFileName = null;
|
||||||
|
|
||||||
|
async function openTrimModal(bucket, fileName) {
|
||||||
|
trimBucket = bucket;
|
||||||
|
trimFileName = fileName;
|
||||||
|
const overlay = $('trimOverlay');
|
||||||
|
overlay.classList.add('open');
|
||||||
|
overlay.setAttribute('aria-hidden', 'false');
|
||||||
|
$('trimHint').textContent = 'Loading audio...';
|
||||||
|
$('trimSaveBtn').disabled = true;
|
||||||
|
$('trimPlayBtn').disabled = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const info = await TrimWaveform.init(bucket, fileName);
|
||||||
|
$('trimHint').textContent = `Drag the handles to select a region, then save as a new sample. Duration: ${info.duration.toFixed(2)}s`;
|
||||||
|
$('trimSaveBtn').disabled = false;
|
||||||
|
$('trimPlayBtn').disabled = false;
|
||||||
|
|
||||||
|
if (info.vadCount > 0) {
|
||||||
|
$('trimVadInfo').style.display = 'flex';
|
||||||
|
$('trimVadSegments').textContent = `${info.vadCount} speech segment${info.vadCount > 1 ? 's' : ''} detected (first auto-selected)`;
|
||||||
|
} else {
|
||||||
|
$('trimVadInfo').style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
TrimWaveform.draw();
|
||||||
|
updateTrimTimeDisplay();
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
$('trimHint').textContent = 'Failed to load audio: ' + e.message;
|
||||||
|
alert('Failed to load audio for trimming: ' + e.message);
|
||||||
|
closeTrimModal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeTrimModal() {
|
||||||
|
$('trimOverlay').classList.remove('open');
|
||||||
|
$('trimOverlay').setAttribute('aria-hidden', 'true');
|
||||||
|
$('trimSaveBtn').disabled = false;
|
||||||
|
$('trimSaveBtn').textContent = 'Save Trim';
|
||||||
|
$('trimPlayBtn').disabled = false;
|
||||||
|
TrimWaveform.destroy();
|
||||||
|
trimBucket = null;
|
||||||
|
trimFileName = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateTrimTimeDisplay() {
|
||||||
|
const start = TrimWaveform.getStartTime();
|
||||||
|
const end = TrimWaveform.getEndTime();
|
||||||
|
$('trimStartTime').textContent = start.toFixed(2) + 's';
|
||||||
|
$('trimEndTime').textContent = end.toFixed(2) + 's';
|
||||||
|
$('trimDuration').textContent = 'Duration: ' + (end - start).toFixed(2) + 's';
|
||||||
|
}
|
||||||
|
|
||||||
function setPill(el, text, cls) {
|
function setPill(el, text, cls) {
|
||||||
el.className = "pill " + (cls || "");
|
el.className = "pill " + (cls || "");
|
||||||
el.textContent = text;
|
el.textContent = text;
|
||||||
@@ -1588,6 +2028,43 @@
|
|||||||
$("sampleNegativeCount").textContent = String(negativeCount);
|
$("sampleNegativeCount").textContent = String(negativeCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildSampleCardHtml(item, bucket) {
|
||||||
|
const when = formatTimestamp(item.reviewed_at || item.received_at || item.created_at);
|
||||||
|
const formatSummary = item.final_format ? describeFormat(item.final_format) : "16 kHz, mono, 16-bit";
|
||||||
|
const badge = bucket === "negative" ? { label: "Negative", cls: "err" } : { label: "Positive", cls: "ok" };
|
||||||
|
let trimBadgeHtml = '';
|
||||||
|
if (item.trimmed) {
|
||||||
|
trimBadgeHtml = `<span class="pill trimBadge">Trimmed from ${escapeHtml(item.source_file || '')}</span>`;
|
||||||
|
}
|
||||||
|
const subtitleParts = [];
|
||||||
|
if (item.original_name && item.original_name !== item.saved_as) subtitleParts.push(`From ${item.original_name}`);
|
||||||
|
if (when) subtitleParts.push(`Saved ${when}`);
|
||||||
|
if (item.message) subtitleParts.push(item.message);
|
||||||
|
let revertBtn = '';
|
||||||
|
if (item.trimmed) {
|
||||||
|
revertBtn = `<button type="button" data-sample-revert="${escapeAttr(item.saved_as)}" data-bucket="${escapeAttr(bucket)}">Revert</button>`;
|
||||||
|
}
|
||||||
|
return `
|
||||||
|
<div class="captureCard">
|
||||||
|
<div class="row space">
|
||||||
|
<div>
|
||||||
|
<p class="captureTitle">${escapeHtml(item.saved_as)}</p>
|
||||||
|
<p class="captureSubtitle">${escapeHtml(subtitleParts.join(" · ") || "Saved training sample.")}</p>
|
||||||
|
</div>
|
||||||
|
<span class="pill ${badge.cls}">${badge.label}</span>
|
||||||
|
${trimBadgeHtml}
|
||||||
|
</div>
|
||||||
|
<audio class="audioPlayer" controls preload="none" src="${escapeAttr(item.audio_url || `/api/audio/${bucket}/${encodeURIComponent(item.saved_as)}`)}?t=${encodeURIComponent(item.created_at || '')}"></audio>
|
||||||
|
<div class="muted">Stored in ${bucket === "negative" ? "negative_samples" : "personal_samples"} · ${escapeHtml(formatSummary)}</div>
|
||||||
|
<div class="captureActions">
|
||||||
|
${revertBtn}
|
||||||
|
<button type="button" data-sample-trim="${escapeAttr(item.saved_as)}" data-bucket="${escapeAttr(bucket)}" class="trimBtn">Trim</button>
|
||||||
|
<button type="button" data-sample-remove="${escapeAttr(item.saved_as)}" data-bucket="${escapeAttr(bucket)}" ${uiState.reviewBusy ? "disabled" : ""}>Remove sample</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
function renderSampleLibrary(payload) {
|
function renderSampleLibrary(payload) {
|
||||||
const data = payload || { personal: [], negative: [], personal_count: 0, negative_count: 0 };
|
const data = payload || { personal: [], negative: [], personal_count: 0, negative_count: 0 };
|
||||||
uiState.samples = {
|
uiState.samples = {
|
||||||
@@ -1606,34 +2083,36 @@
|
|||||||
const label = activeBucket === "negative" ? "negative" : "personal";
|
const label = activeBucket === "negative" ? "negative" : "personal";
|
||||||
if (!items.length) {
|
if (!items.length) {
|
||||||
$("sampleLibraryList").innerHTML = `<div class="emptyState">No ${label} samples saved yet.</div>`;
|
$("sampleLibraryList").innerHTML = `<div class="emptyState">No ${label} samples saved yet.</div>`;
|
||||||
|
$("samplePagination").innerHTML = "";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$("sampleLibraryList").innerHTML = items.map((item) => {
|
// Paginate
|
||||||
const when = formatTimestamp(item.reviewed_at || item.received_at || item.created_at);
|
const pages = uiState.samples.pages || { personal: 0, negative: 0 };
|
||||||
const formatSummary = item.final_format ? describeFormat(item.final_format) : "16 kHz, mono, 16-bit";
|
let page = pages[activeBucket] || 0;
|
||||||
const badge = activeBucket === "negative" ? { label: "Negative", cls: "err" } : { label: "Positive", cls: "ok" };
|
const totalPages = Math.ceil(items.length / SAMPLE_PAGE_SIZE);
|
||||||
const subtitleParts = [];
|
if (page >= totalPages) page = Math.max(totalPages - 1, 0);
|
||||||
if (item.original_name && item.original_name !== item.saved_as) subtitleParts.push(`From ${item.original_name}`);
|
const start = page * SAMPLE_PAGE_SIZE;
|
||||||
if (when) subtitleParts.push(`Saved ${when}`);
|
const pageItems = items.slice(start, start + SAMPLE_PAGE_SIZE);
|
||||||
if (item.message) subtitleParts.push(item.message);
|
|
||||||
return `
|
$("sampleLibraryList").innerHTML = pageItems.map((item) => buildSampleCardHtml(item, activeBucket)).join("");
|
||||||
<div class="captureCard">
|
|
||||||
<div class="row space">
|
// Pagination controls
|
||||||
<div>
|
const pagination = $("samplePagination");
|
||||||
<p class="captureTitle">${escapeHtml(item.saved_as)}</p>
|
if (totalPages > 1) {
|
||||||
<p class="captureSubtitle">${escapeHtml(subtitleParts.join(" · ") || "Saved training sample.")}</p>
|
const prevDisabled = page === 0 ? "disabled" : "";
|
||||||
</div>
|
const nextDisabled = page >= totalPages - 1 ? "disabled" : "";
|
||||||
<span class="pill ${badge.cls}">${badge.label}</span>
|
pagination.innerHTML = `
|
||||||
</div>
|
<div class="paginationControls">
|
||||||
<audio class="audioPlayer" controls preload="none" src="${escapeAttr(item.audio_url || `/api/audio/${activeBucket}/${encodeURIComponent(item.saved_as)}`)}"></audio>
|
<button type="button" ${prevDisabled} class="pageBtn" data-page="prev">‹ Prev</button>
|
||||||
<div class="muted">Stored in ${activeBucket === "negative" ? "negative_samples" : "personal_samples"} · ${escapeHtml(formatSummary)}</div>
|
<span class="pageInfo">${page + 1} / ${totalPages} (${items.length} total)</span>
|
||||||
<div class="captureActions">
|
<span class="pageJump">Go to page <input type="number" min="1" max="${totalPages}" value="${page + 1}" class="pageInput" data-total="${totalPages}"> <button type="button" class="pageJumpBtn">Go</button></span>
|
||||||
<button type="button" data-sample-remove="${escapeAttr(item.saved_as)}" data-bucket="${escapeAttr(activeBucket)}" ${uiState.reviewBusy ? "disabled" : ""}>Remove sample</button>
|
<button type="button" ${nextDisabled} class="pageBtn" data-page="next">Next ›</button>
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
}).join("");
|
} else {
|
||||||
|
pagination.innerHTML = "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function rerenderReviewLists() {
|
function rerenderReviewLists() {
|
||||||
@@ -1664,8 +2143,8 @@
|
|||||||
uiState.reviewBusy = true;
|
uiState.reviewBusy = true;
|
||||||
setPill($("status"), "Removing sample...", "warn");
|
setPill($("status"), "Removing sample...", "warn");
|
||||||
syncButtons();
|
syncButtons();
|
||||||
const data = await api(`/api/samples/${encodeURIComponent(bucket)}/${encodeURIComponent(fileName)}`, { method: "DELETE" });
|
await api(`/api/samples/${encodeURIComponent(bucket)}/${encodeURIComponent(fileName)}`, { method: "DELETE" });
|
||||||
renderSampleLibrary(data);
|
await refreshSamples();
|
||||||
await refreshSession();
|
await refreshSession();
|
||||||
setPill($("status"), "Sample removed", "ok");
|
setPill($("status"), "Sample removed", "ok");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -1673,7 +2152,6 @@
|
|||||||
alert(error.message);
|
alert(error.message);
|
||||||
} finally {
|
} finally {
|
||||||
uiState.reviewBusy = false;
|
uiState.reviewBusy = false;
|
||||||
rerenderReviewLists();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2911,7 +3389,65 @@
|
|||||||
});
|
});
|
||||||
$("sampleTabPersonal").addEventListener("click", () => setSampleBucket("personal"));
|
$("sampleTabPersonal").addEventListener("click", () => setSampleBucket("personal"));
|
||||||
$("sampleTabNegative").addEventListener("click", () => setSampleBucket("negative"));
|
$("sampleTabNegative").addEventListener("click", () => setSampleBucket("negative"));
|
||||||
|
function navigateToSamplePage(page) {
|
||||||
|
const header = document.getElementById("sampleLibraryHeader");
|
||||||
|
const topY = header.getBoundingClientRect().top + window.scrollY;
|
||||||
|
const activeBucket = uiState.samples.activeBucket === "negative" ? "negative" : "personal";
|
||||||
|
const pages = uiState.samples.pages || { personal: 0, negative: 0 };
|
||||||
|
pages[activeBucket] = page;
|
||||||
|
uiState.samples.pages = pages;
|
||||||
|
renderSampleLibrary(uiState.samples);
|
||||||
|
syncButtons();
|
||||||
|
window.scrollTo(0, topY);
|
||||||
|
}
|
||||||
|
$("samplePagination").addEventListener("click", (event) => {
|
||||||
|
const btn = event.target.closest(".pageBtn[data-page]");
|
||||||
|
if (btn) {
|
||||||
|
btn.blur();
|
||||||
|
const activeBucket = uiState.samples.activeBucket === "negative" ? "negative" : "personal";
|
||||||
|
const pages = uiState.samples.pages || { personal: 0, negative: 0 };
|
||||||
|
let page = pages[activeBucket] || 0;
|
||||||
|
if (btn.dataset.page === "prev") page = Math.max(page - 1, 0);
|
||||||
|
else if (btn.dataset.page === "next") page = Math.min(page + 1, 999);
|
||||||
|
navigateToSamplePage(page);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const jumpBtn = event.target.closest(".pageJumpBtn");
|
||||||
|
if (jumpBtn) {
|
||||||
|
const input = jumpBtn.parentElement.querySelector(".pageInput");
|
||||||
|
const totalPages = parseInt(input.dataset.total) || 1;
|
||||||
|
let page = parseInt(input.value) - 1;
|
||||||
|
if (isNaN(page) || page < 0) page = 0;
|
||||||
|
if (page >= totalPages) page = totalPages - 1;
|
||||||
|
input.blur();
|
||||||
|
navigateToSamplePage(page);
|
||||||
|
}
|
||||||
|
});
|
||||||
$("sampleLibraryList").addEventListener("click", async (event) => {
|
$("sampleLibraryList").addEventListener("click", async (event) => {
|
||||||
|
// Revert trimmed sample
|
||||||
|
const revertBtn = event.target.closest("button[data-sample-revert][data-bucket]");
|
||||||
|
if (revertBtn) {
|
||||||
|
const ok = confirm(`Revert ${revertBtn.dataset.sampleRevert} to the original (pre-trim) version?`);
|
||||||
|
if (!ok) return;
|
||||||
|
try {
|
||||||
|
const form = new FormData();
|
||||||
|
form.append('bucket', revertBtn.dataset.bucket);
|
||||||
|
form.append('file_name', revertBtn.dataset.sampleRevert);
|
||||||
|
const result = await api('/api/samples/revert', { method: 'POST', body: form });
|
||||||
|
await refreshSamples();
|
||||||
|
setPill($("status"), result.message || 'Reverted', 'ok');
|
||||||
|
} catch (err) {
|
||||||
|
alert('Revert failed: ' + err.message);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Open trim modal
|
||||||
|
const trimBtn = event.target.closest("button[data-sample-trim][data-bucket]");
|
||||||
|
if (trimBtn) {
|
||||||
|
openTrimModal(trimBtn.dataset.bucket, trimBtn.dataset.sampleTrim);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Remove sample
|
||||||
const button = event.target.closest("button[data-sample-remove][data-bucket]");
|
const button = event.target.closest("button[data-sample-remove][data-bucket]");
|
||||||
if (!button) return;
|
if (!button) return;
|
||||||
await removeSample(button.dataset.bucket, button.dataset.sampleRemove);
|
await removeSample(button.dataset.bucket, button.dataset.sampleRemove);
|
||||||
@@ -3027,6 +3563,66 @@
|
|||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Trim Modal Event Listeners ---
|
||||||
|
$("closeTrimBtn").addEventListener('click', closeTrimModal);
|
||||||
|
$("trimCancelBtn").addEventListener('click', closeTrimModal);
|
||||||
|
$("trimOverlay").addEventListener('click', (e) => {
|
||||||
|
if (e.target === $("trimOverlay")) closeTrimModal();
|
||||||
|
});
|
||||||
|
$("trimPlayBtn").addEventListener('click', () => TrimWaveform.playSelection());
|
||||||
|
$("trimSelectFirstVadBtn").addEventListener('click', () => {
|
||||||
|
if (TrimWaveform.vadSegments.length > 0) {
|
||||||
|
const seg = TrimWaveform.vadSegments[0];
|
||||||
|
TrimWaveform.setStartSeconds(seg.start);
|
||||||
|
TrimWaveform.setEndSeconds(seg.end);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
$("trimSaveBtn").addEventListener('click', async () => {
|
||||||
|
const start = TrimWaveform.getStartTime();
|
||||||
|
const end = TrimWaveform.getEndTime();
|
||||||
|
$("trimSaveBtn").disabled = true;
|
||||||
|
$("trimSaveBtn").textContent = 'Saving...';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const blob = await TrimWaveform.getTrimmedWavBlob();
|
||||||
|
const form = new FormData();
|
||||||
|
form.append('file', blob, 'trimmed.wav');
|
||||||
|
form.append('bucket', trimBucket);
|
||||||
|
form.append('source_file', trimFileName);
|
||||||
|
form.append('start_time', start.toFixed(3));
|
||||||
|
form.append('end_time', end.toFixed(3));
|
||||||
|
|
||||||
|
const result = await api('/api/samples/trim', {
|
||||||
|
method: 'POST',
|
||||||
|
body: form,
|
||||||
|
});
|
||||||
|
|
||||||
|
closeTrimModal();
|
||||||
|
await refreshSamples();
|
||||||
|
setPill($("status"), result.message || 'Trim saved', 'ok');
|
||||||
|
} catch (e) {
|
||||||
|
alert('Trim failed: ' + e.message);
|
||||||
|
$("trimSaveBtn").disabled = false;
|
||||||
|
$("trimSaveBtn").textContent = 'Save Trim';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Handle drag (mouse + touch)
|
||||||
|
$("trimStartHandle").addEventListener('mousedown', (e) => TrimWaveform.onPointerDown('start', e));
|
||||||
|
$("trimEndHandle").addEventListener('mousedown', (e) => TrimWaveform.onPointerDown('end', e));
|
||||||
|
$("trimStartHandle").addEventListener('touchstart', (e) => TrimWaveform.onPointerDown('start', e), { passive: false });
|
||||||
|
$("trimEndHandle").addEventListener('touchstart', (e) => TrimWaveform.onPointerDown('end', e), { passive: false });
|
||||||
|
document.addEventListener('mousemove', (e) => TrimWaveform.onPointerMove(e));
|
||||||
|
document.addEventListener('touchmove', (e) => TrimWaveform.onPointerMove(e), { passive: false });
|
||||||
|
document.addEventListener('mouseup', () => TrimWaveform.onPointerUp());
|
||||||
|
document.addEventListener('touchend', () => TrimWaveform.onPointerUp());
|
||||||
|
|
||||||
|
// Redraw waveform on window resize
|
||||||
|
window.addEventListener('resize', () => {
|
||||||
|
if ($('trimOverlay').classList.contains('open')) TrimWaveform.draw();
|
||||||
|
});
|
||||||
|
|
||||||
bootstrap();
|
bootstrap();
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
@@ -37,10 +37,11 @@ STATIC_DIR = Path(os.environ.get("STATIC_DIR", str(ROOT_DIR / "static"))).resolv
|
|||||||
PERSONAL_DIR = Path(os.environ.get("PERSONAL_DIR", str(DATA_DIR / "personal_samples"))).resolve()
|
PERSONAL_DIR = Path(os.environ.get("PERSONAL_DIR", str(DATA_DIR / "personal_samples"))).resolve()
|
||||||
CAPTURED_DIR = Path(os.environ.get("CAPTURED_DIR", str(DATA_DIR / "captured_audio"))).resolve()
|
CAPTURED_DIR = Path(os.environ.get("CAPTURED_DIR", str(DATA_DIR / "captured_audio"))).resolve()
|
||||||
NEGATIVE_DIR = Path(os.environ.get("NEGATIVE_DIR", str(DATA_DIR / "negative_samples"))).resolve()
|
NEGATIVE_DIR = Path(os.environ.get("NEGATIVE_DIR", str(DATA_DIR / "negative_samples"))).resolve()
|
||||||
|
TRIM_HISTORY_DIR = Path(os.environ.get("TRIM_HISTORY_DIR", str(DATA_DIR / "trim_history"))).resolve()
|
||||||
|
TRIM_HISTORY_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
TRAINED_WAKE_WORDS_DIR = Path(
|
TRAINED_WAKE_WORDS_DIR = Path(
|
||||||
os.environ.get("TRAINED_WAKE_WORDS_DIR", str(DATA_DIR / "trained_wake_words"))
|
os.environ.get("TRAINED_WAKE_WORDS_DIR", str(DATA_DIR / "trained_wake_words"))
|
||||||
).resolve()
|
).resolve()
|
||||||
|
|
||||||
CLI_DIR = Path(os.environ.get("CLI_DIR", str(ROOT_DIR / "cli"))).resolve()
|
CLI_DIR = Path(os.environ.get("CLI_DIR", str(ROOT_DIR / "cli"))).resolve()
|
||||||
PIPER_ROOT = DATA_DIR / "tools" / "piper-sample-generator"
|
PIPER_ROOT = DATA_DIR / "tools" / "piper-sample-generator"
|
||||||
PIPER_VOICES_DIR = PIPER_ROOT / "voices"
|
PIPER_VOICES_DIR = PIPER_ROOT / "voices"
|
||||||
@@ -169,11 +170,58 @@ FIRMWARE_LOCK = threading.Lock()
|
|||||||
FIRMWARE_SESSIONS: Dict[str, Dict[str, Any]] = {}
|
FIRMWARE_SESSIONS: Dict[str, Dict[str, Any]] = {}
|
||||||
ANSI_ESCAPE_RE = re.compile(r"\x1B(?:\[[0-?]*[ -/]*[@-~]|[@-Z\\-_])")
|
ANSI_ESCAPE_RE = re.compile(r"\x1B(?:\[[0-?]*[ -/]*[@-~]|[@-Z\\-_])")
|
||||||
|
|
||||||
|
# --- Silero VAD (lazy-loaded) ---
|
||||||
|
_silero_vad_model = None
|
||||||
|
_silero_vad_utils = None
|
||||||
|
_SILERO_VAD_LOCK = threading.Lock()
|
||||||
|
VAD_SELECTION_PAD_START_S = 0.08
|
||||||
|
VAD_SELECTION_PAD_END_S = 0.08
|
||||||
|
|
||||||
|
|
||||||
|
def _load_silero_vad():
|
||||||
|
"""Lazy-load Silero VAD model on first use. Returns (model, utils)."""
|
||||||
|
global _silero_vad_model, _silero_vad_utils
|
||||||
|
if _silero_vad_model is not None:
|
||||||
|
return _silero_vad_model, _silero_vad_utils
|
||||||
|
with _SILERO_VAD_LOCK:
|
||||||
|
if _silero_vad_model is not None:
|
||||||
|
return _silero_vad_model, _silero_vad_utils
|
||||||
|
import torch
|
||||||
|
import silero_vad
|
||||||
|
model = silero_vad.load_silero_vad()
|
||||||
|
model.eval()
|
||||||
|
_silero_vad_model = model
|
||||||
|
_silero_vad_utils = {"torch": torch}
|
||||||
|
return model, _silero_vad_utils
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_speech_segments(wav_bytes: bytes) -> List[Dict[str, float]]:
|
||||||
|
"""Run Silero VAD on 16 kHz mono WAV bytes. Return {start, end} seconds."""
|
||||||
|
model, utils = _load_silero_vad()
|
||||||
|
torch = utils["torch"]
|
||||||
|
import numpy as np
|
||||||
|
from silero_vad.utils_vad import get_speech_timestamps
|
||||||
|
|
||||||
|
with wave.open(io.BytesIO(wav_bytes), "rb") as wf:
|
||||||
|
raw = wf.readframes(wf.getnframes())
|
||||||
|
samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32) / 32768.0
|
||||||
|
audio_tensor = torch.from_numpy(samples)
|
||||||
|
|
||||||
|
timestamps = get_speech_timestamps(
|
||||||
|
audio_tensor,
|
||||||
|
model,
|
||||||
|
sampling_rate=16000,
|
||||||
|
threshold=0.5,
|
||||||
|
min_speech_duration_ms=150,
|
||||||
|
min_silence_duration_ms=100,
|
||||||
|
return_seconds=True,
|
||||||
|
)
|
||||||
|
return [{"start": round(ts["start"], 3), "end": round(ts["end"], 3)} for ts in timestamps]
|
||||||
|
|
||||||
|
|
||||||
class _FirmwareYamlLoader(yaml.SafeLoader):
|
class _FirmwareYamlLoader(yaml.SafeLoader):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class _FirmwareYamlDumper(yaml.SafeDumper):
|
class _FirmwareYamlDumper(yaml.SafeDumper):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -1009,7 +1057,7 @@ def _list_captured_items() -> List[Dict[str, Any]]:
|
|||||||
def _sample_item_from_path(audio_path: Path, bucket: str) -> Dict[str, Any]:
|
def _sample_item_from_path(audio_path: Path, bucket: str) -> Dict[str, Any]:
|
||||||
meta = _load_sidecar_json(audio_path)
|
meta = _load_sidecar_json(audio_path)
|
||||||
stat = audio_path.stat()
|
stat = audio_path.stat()
|
||||||
final_format = meta.get("final_format") or _inspect_wav_bytes(audio_path.read_bytes()) or {}
|
final_format = meta.get("final_format") or meta.get("detected_format") or _inspect_wav_bytes(audio_path.read_bytes()) or {}
|
||||||
return {
|
return {
|
||||||
"bucket": bucket,
|
"bucket": bucket,
|
||||||
"saved_as": audio_path.name,
|
"saved_as": audio_path.name,
|
||||||
@@ -1021,6 +1069,8 @@ def _sample_item_from_path(audio_path: Path, bucket: str) -> Dict[str, Any]:
|
|||||||
"reviewed_at": meta.get("reviewed_at") or "",
|
"reviewed_at": meta.get("reviewed_at") or "",
|
||||||
"created_at": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
|
"created_at": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
|
||||||
"converted": bool(meta.get("converted")),
|
"converted": bool(meta.get("converted")),
|
||||||
|
"trimmed": bool(meta.get("trimmed")),
|
||||||
|
"source_file": meta.get("source_file") or "",
|
||||||
"final_format": final_format,
|
"final_format": final_format,
|
||||||
"message": meta.get("message") or "",
|
"message": meta.get("message") or "",
|
||||||
"size_bytes": stat.st_size,
|
"size_bytes": stat.st_size,
|
||||||
@@ -1036,9 +1086,10 @@ def _list_sample_items(directory: Path, bucket: str) -> List[Dict[str, Any]]:
|
|||||||
items.append(_sample_item_from_path(audio_path, bucket))
|
items.append(_sample_item_from_path(audio_path, bucket))
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
# Untrimmed first (stable sort preserves mtime order within each group).
|
||||||
|
items.sort(key=lambda x: x.get("trimmed", False))
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
def _samples_payload() -> Dict[str, Any]:
|
def _samples_payload() -> Dict[str, Any]:
|
||||||
takes = _sync_personal_samples_state()
|
takes = _sync_personal_samples_state()
|
||||||
personal_items = _list_sample_items(PERSONAL_DIR, "personal")
|
personal_items = _list_sample_items(PERSONAL_DIR, "personal")
|
||||||
@@ -2768,7 +2819,143 @@ def delete_sample(bucket: str, file_name: str):
|
|||||||
_remove_audio_with_sidecar(path)
|
_remove_audio_with_sidecar(path)
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
return JSONResponse({"ok": False, "error": str(e)}, status_code=404)
|
return JSONResponse({"ok": False, "error": str(e)}, status_code=404)
|
||||||
return _samples_payload()
|
return {"ok": True, "deleted_bucket": bucket, "deleted_file": file_name, "message": f"Deleted {file_name}"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/samples/{bucket}/{file_name}/vad")
|
||||||
|
def vad_segments(bucket: str, file_name: str):
|
||||||
|
bucket_map = {"personal": PERSONAL_DIR, "negative": NEGATIVE_DIR}
|
||||||
|
directory = bucket_map.get(bucket)
|
||||||
|
if directory is None:
|
||||||
|
return JSONResponse({"ok": False, "error": "Unknown sample bucket."}, status_code=404)
|
||||||
|
try:
|
||||||
|
path = _resolve_audio_path(directory, file_name)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
return JSONResponse({"ok": False, "error": str(e)}, status_code=404)
|
||||||
|
|
||||||
|
wav_bytes = path.read_bytes()
|
||||||
|
try:
|
||||||
|
all_segments = _detect_speech_segments(wav_bytes)
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"ok": False, "error": f"VAD failed: {str(e)}"}, status_code=500)
|
||||||
|
|
||||||
|
# Only return the first segment longer than 250 ms. Add deterministic
|
||||||
|
# padding so VAD guides trimming without clipping quiet wake-word edges.
|
||||||
|
filtered = [s for s in all_segments if (s["end"] - s["start"]) >= 0.25]
|
||||||
|
if not filtered:
|
||||||
|
return {"ok": True, "file_name": file_name, "segments": [], "segment_count": 0}
|
||||||
|
seg = filtered[0]
|
||||||
|
info = _inspect_wav_bytes(wav_bytes) or {}
|
||||||
|
duration_s = float(info.get("duration_s") or 0.0)
|
||||||
|
start = max(0.0, round(seg["start"] - VAD_SELECTION_PAD_START_S, 3))
|
||||||
|
end = round(seg["end"] + VAD_SELECTION_PAD_END_S, 3)
|
||||||
|
if duration_s > 0:
|
||||||
|
end = min(duration_s, end)
|
||||||
|
if end <= start:
|
||||||
|
end = start + 0.001
|
||||||
|
segment = {"start": start, "end": end}
|
||||||
|
return {"ok": True, "file_name": file_name, "segments": [segment], "segment_count": 1}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/samples/trim")
|
||||||
|
async def trim_sample_upload(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
bucket: str = Form(...),
|
||||||
|
source_file: str = Form(...),
|
||||||
|
start_time: str | None = Form(None),
|
||||||
|
end_time: str | None = Form(None),
|
||||||
|
):
|
||||||
|
bucket_map = {"personal": PERSONAL_DIR, "negative": NEGATIVE_DIR}
|
||||||
|
directory = bucket_map.get(bucket)
|
||||||
|
if directory is None:
|
||||||
|
return JSONResponse({"ok": False, "error": "Unknown sample bucket."}, status_code=404)
|
||||||
|
|
||||||
|
data = await file.read()
|
||||||
|
if not data:
|
||||||
|
return JSONResponse({"ok": False, "error": "Empty audio file."}, status_code=400)
|
||||||
|
|
||||||
|
info = _inspect_wav_bytes(data)
|
||||||
|
if not info:
|
||||||
|
try:
|
||||||
|
data = _normalize_audio_to_target_wav(data, file.filename or "trimmed.wav")
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"ok": False, "error": f"Audio normalization failed: {e}"}, status_code=400)
|
||||||
|
elif not _is_target_wav(info):
|
||||||
|
try:
|
||||||
|
data = _normalize_audio_to_target_wav(data, file.filename or "trimmed.wav")
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"ok": False, "error": f"Audio normalization failed: {e}"}, status_code=400)
|
||||||
|
|
||||||
|
try:
|
||||||
|
orig_path = _resolve_audio_path(directory, source_file)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
return JSONResponse({"ok": False, "error": str(e)}, status_code=404)
|
||||||
|
|
||||||
|
TRIM_HISTORY_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%f")
|
||||||
|
backup_name = f"{ts}_{source_file}"
|
||||||
|
backup_path = TRIM_HISTORY_DIR / backup_name
|
||||||
|
shutil.copy2(orig_path, backup_path)
|
||||||
|
|
||||||
|
orig_sidecar = _audio_sidecar_path(orig_path)
|
||||||
|
if orig_sidecar.exists():
|
||||||
|
shutil.copy2(orig_sidecar, _audio_sidecar_path(backup_path))
|
||||||
|
|
||||||
|
orig_path.write_bytes(data)
|
||||||
|
|
||||||
|
old_sidecar = _load_sidecar_json(orig_path)
|
||||||
|
sidecar = {
|
||||||
|
**old_sidecar,
|
||||||
|
"trimmed": True,
|
||||||
|
"source_file": source_file,
|
||||||
|
"source_bucket": bucket,
|
||||||
|
"trim_start_s": float(start_time) if start_time else None,
|
||||||
|
"trim_end_s": float(end_time) if end_time else None,
|
||||||
|
"undo_backup_file": backup_name,
|
||||||
|
}
|
||||||
|
_write_sidecar_json(orig_path, sidecar)
|
||||||
|
|
||||||
|
updated_item = _sample_item_from_path(orig_path, bucket)
|
||||||
|
updated_item["trimmed"] = True
|
||||||
|
updated_item["source_file"] = source_file
|
||||||
|
return {"ok": True, "updated_sample": updated_item, "message": f"Trimmed {source_file}"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/samples/revert")
|
||||||
|
def revert_trim(
|
||||||
|
bucket: str = Form(...),
|
||||||
|
file_name: str = Form(...),
|
||||||
|
):
|
||||||
|
bucket_map = {"personal": PERSONAL_DIR, "negative": NEGATIVE_DIR}
|
||||||
|
directory = bucket_map.get(bucket)
|
||||||
|
if directory is None:
|
||||||
|
return JSONResponse({"ok": False, "error": "Unknown sample bucket."}, status_code=404)
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_path = _resolve_audio_path(directory, file_name)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
return JSONResponse({"ok": False, "error": str(e)}, status_code=404)
|
||||||
|
|
||||||
|
sidecar = _load_sidecar_json(file_path)
|
||||||
|
backup_name = sidecar.get("undo_backup_file")
|
||||||
|
if not backup_name:
|
||||||
|
return JSONResponse({"ok": False, "error": "No trim backup found for this sample."}, status_code=400)
|
||||||
|
|
||||||
|
backup_path = TRIM_HISTORY_DIR / backup_name
|
||||||
|
if not backup_path.exists():
|
||||||
|
return JSONResponse({"ok": False, "error": "Trim backup file missing."}, status_code=404)
|
||||||
|
|
||||||
|
shutil.copy2(backup_path, file_path)
|
||||||
|
backup_sidecar = _audio_sidecar_path(backup_path)
|
||||||
|
if backup_sidecar.exists():
|
||||||
|
shutil.copy2(backup_sidecar, _audio_sidecar_path(file_path))
|
||||||
|
|
||||||
|
backup_path.unlink()
|
||||||
|
if backup_sidecar.exists():
|
||||||
|
backup_sidecar.unlink()
|
||||||
|
|
||||||
|
updated_item = _sample_item_from_path(file_path, bucket)
|
||||||
|
return {"ok": True, "updated_sample": updated_item, "message": f"Reverted {file_name}"}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/captured_audio/{file_name}/approve_personal")
|
@app.post("/api/captured_audio/{file_name}/approve_personal")
|
||||||
|
|||||||
Reference in New Issue
Block a user