From 6e7396455a52c431cce8bf79368a1c59780cb857 Mon Sep 17 00:00:00 2001 From: MasterPhooey Date: Sat, 18 Apr 2026 09:01:40 -0500 Subject: [PATCH] Automatic Calibration --- README.md | 2 +- __pycache__/trainer_server.cpython-311.pyc | Bin 0 -> 61444 bytes cli/calibrate_detector.py | 405 +++++++++++++++++++++ cli/setup_audioset | 137 +++++-- cli/setup_fma | 63 +++- cli/setup_python_venv | 24 +- cli/wake_word_sample_trainer | 58 ++- dockerfile | 4 +- run_recorder.sh => run.sh | 2 +- trainer_server.py | 39 +- 10 files changed, 656 insertions(+), 78 deletions(-) create mode 100644 __pycache__/trainer_server.cpython-311.pyc create mode 100644 cli/calibrate_detector.py rename run_recorder.sh => run.sh (97%) diff --git a/README.md b/README.md index db3472e..5cc8344 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ That will remove: - browser microphone recording has been removed - personal samples are optional - the server module is now `trainer_server.py` -- the launcher script is still named `run_recorder.sh` for compatibility +- the launcher script is now `run.sh` --- diff --git a/__pycache__/trainer_server.cpython-311.pyc b/__pycache__/trainer_server.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3893d6fdccc8f50a78ced1b8388274b8a8abc6bc GIT binary patch literal 61444 zcmeFac~l%{mM@rlMs6gK>>x-G0ul+3gjS3eHtj+ZLb8RIG$IlSiOrFT7M$4F&s|0u z>KVJ`7Iw+cS@mj>yHrz5PoHU3ot`e)8^_4g`1q5M;}e~@x1msXXfGK@{FJ7 z&Aj>DFLH@UK!vNi|C=EBBI1j0U+#DB{qDWr4_q$04%Z95-weDwqtpE(y(BJo{NeMz z&(P^^>(1zS9j_nM^(c3JkDk37dJODt>@l*tsmH|b<{mSmcbowhs=M0GSmjD_$*47&nCBL zV_+liVNaU^zk4*~@VVNa=&4&ntUNwHEml2h*sz9}1$?14rgl{HY+hrF7R6h%nAN<5 zTy08^%4IDxt7pp^YO{_n)#ek~nwVAUyLQgRR1ci!9eeIr&0{3{pMtKFzgQ=q!v#6R|msi%e4^ZLM! zz^=efUUxyqH}iV9@Pn)4_uR4ad+%^Pt$_^W=I%EXCKA#G+W39Vctbg?m)aBF+BwqG z@yM?S9{KekzaQmjxnt-#v|{e9lsn({imvA{>XR9$PL-|Sz_+7rM^^Cffd5hc!15I1 zZE_IbkMW22P5j|I2L1>Ox2*2!cHobqJ&)o4IQ}~W4gGfh#4DDb<47-Sc`lkVof%Tq%=PvhIkHKfsvaHkM1b9pM7GMz!VF0@ZIuOGLfeU*`N7QUzX$KgvU zWMeE;tJ^@8f{A|uVY~Tr2-_2IVw63J_cQUh)RGPAnE3t_zMo|^(DvZd@P8b&eu8~l z)(4tApF!Ajy#LWHj&ypE-?Iq&B;U*GLbKpJ-k*xUFUtkr`|$l~Rt_c36a5yRhu<^& zTS?yla#ocdIW13#7_rat7pVpL0i@H*UrLtcGTzVIbT%FTHb1Bx)BI4t z$q(ayBrUJec&%6I5tOB&-^o9Ry!%-ECSH$P3CM|8y$A6gVDF(o1LA1slZ79{_kRA0 z)}Ozc_P#vcHN?BXUq`u{FmAE}&Q!l?-_l0KIO1L8pAWeCFj{AVwUE1?<6nUP0RJN1 zsV%537Jd?bmzdv6tcPF5`(;4Sl`-dk6)3s)iWb)LfAoqj4RQ@4{!jv{sqg$Z(qQbX zEA+L2pL)fVmg4ZLCAyK8;>~5H8CkV7|BW^-AZigmy(YiH^l<$~{+sCge~tgw0tIN- znODqdbr@Z>9Ir3Sb9wrDmac(+V_CY-t(xv1YtzM?Te-b&ElX2awY}d=OY={bwRdpU z^4v~~JG(4yh}WOi`M&ij4Vq4$KF0CAp+IP0D1c|v$==XKpE+h~ALt9kj4i|0W2R#R z!BC7lF&Y{e8SWj7nY+hE2LnDs%-R)rZY+Rr)-hplWHd1BGsUb2dV`^slZRvWGoyne zz5IcJ!9dIucSnRFL~|bMK7On#5F8yD4hCY*Bd1SvCf-m0$LUaSXrPZ`1Y=x#OLvRE z{czW~xdDajjoD9kw{#zFBd?ex$U?@PCp)^3M$0kgn>iW~f>cg_uy+XM492W&#}2bk z#=gOUm^m~uG8i1s8y!HvIwefqg}`t?=nahsG0T;afj&gaJ9!w%`p=#?+}3g0f4H-~ z;|c$nu4Ci-FNQ*+!KQ|Wi(?lq3=CiB@9hiJ_l-0N7kh)j(d!LLvPvQiLhsdvq27Vv z2Ial}5(*Z})uh~Y;zTz~c-PlTI1n5eyb@rE$I6rh+t5_UP8{^NwX_}T@OO6~^Plc$ zJJH#GI%eFxd2_5_rBDYBAL|&m)c5uFT?~vDvahYp|ZI2|+h_YMXF=zRT_m?up?#&pv|cq-7QDa zmDH=JPj=wBD`wcTIhIYI;vT2{-6zn9!g@@Mfyuo(-KWGP`II;qu>zI^D<-vacgx{p z{$qzbQF!AvG;hB4^YNA^{H^D@J5CSibewLybZDSY7%zKGV9gOcFTb=~eCV_2`n z=~oSF;`M6|YhEdASdThc0L`qOG2K_jr-B<}djGiN$!B`&#y8jP_CLLG-1>}vW7xm3 zGRDz}p#g)DY7T~kfzgkX&-5HA#L&~#{BH;qVulg=vx{6KGA`$*sAfY89kX+ZEJg)0G}e)M|G7VPa0 z_^H=~L&)F+{)7E+CUsGpbE9K_k(2I#&lW zFO2QLz?A@x`9MKp9BV@=s+k~v$A6$I(1mrx-X917Zt?zJtUxH2Fn|c26~J7d5=C*7 zli~xs-`0gx-&P*>L~7nMXwfmfHXaR@WfyPMF2wD+r>iG`IEQtS9o$F;ryTgCxA^rJzV=3e0-g)jCCfnXp+eWF=C{YXU!AjCEN2d}`H z)GeAa7fgBcro7qEU0yWhNv1~G)F_%77hQ!5uF`o|>Fia>RV}-!mvkm`7IUWbO9rDY zE9!K;cIf7z8%L&(OyTd#MR&oHPH#h)gcEgUPPH!?@%rVLUoKe_@q<~&`lmT9Zo~U- zdrOJ={bD^_P0v~of?D?TJ8=Gho>hT0-uQ~Pr&sJb-jv?c+P>6&MW31DeWrodG>96h z_?2*u>hug^Kkb+RguDbFuf*;R7%tnMzfES*{%X>N{5F2E9D;Gl`GUjFc2M@q`)cA5_DcOH-(?Uh#d`08j zL8+oyu4sm@l(k2O>)0bZ_K2oEEZ3EX$9IqkhKFj6({|T{E~J{!+tCwYUC4w;@r&0J zUnd0KL*fdj@q{&@4U7v!yeeHf5E*u?*KN@SO;-)dgr@1OXZc}}7(0ct$Ury)2hB@u zq@ceY{$nTFjtWnZhjB14{GOf#RL|xTfAIQn-?IMp6B-c^1jKw5|G^t@(A>@n$yq5o zD9Yt?e4EfK$8xh4!@gHRtfL{wum zS!f+^;?p?pyd}lqSWBIT(OL?IW$Eu78W_AT05jIbOoTQF`|&6=kV7p9!qEuKDrUS8 z2nns^+Xe^N*683s$Y*4oDbSKDjKK*KH1&E_E6@c%sephbJ%Kv-<2hbO$k!;!zk!1x z>&l!dxLxu_$?Qcbt6I*g7PIQ^Zk1g1va5c*s!T{{+B&GW8i$+cH@?VUUj zwYsp*nDe6U+$q+6BkvdBKLv>1NYR-|QC2u}*fZo9&ey-UUb1yrW8T)X0t+(NQyZUNr4e;2kv)05j0? z3!s`~=Dv}!;Si9lI7$>$TA>M{gmH47hm#!U)R8v)Q~P}W5gZ`z`XDx&PZRcl_vKd@ z121gAHVf`>4eyx3K*Yf!H{D6={t0u~%$u(nuBlk%Vbiz^`5XHfunHT6>aY=^R0HLT zUMSQ0tUT4iX+xT}pHCv1i8^x^oZfk-7jF-9ilPN2Vy+Lrh()=> zjph_Yi%X*Ug-Z^L&Az0A^O@P^viw#DXR>1{M`yNA)rqD8{A5$XY=>kjSKcI3L40`D z%v43pkpbDeSOooX?>lmtUzeMA)(O&Y(_z$*33vWvH90krmQ zf&?$V#)2A32y`HyupyM0%1&EbfNlf89pTm1&ado5`_;7oiwb>JxFW&Vu%&)8P-*MM zKrdin@Kef$-ietuG8X#EJQnJ&+vT$fBx)j*kb@0mz)r>q0`=L;2V*7yv11lK!00gh zKyY9f>-2CR_+~tqa6Tg_{J?V*EI0};usXDWGsn*eA-}@DiSuoQw~)^q{)4}S1BhwQ zTI5QjS?k2ICNcA|C9~0C`Ai4rx8zK=eZlGMSu>v5^P;U>vX#rWavD=RBi)N`&-D0g z<-5MOeQ#~NvvHwl(|pmUyN^pn+vTF|l6!~j-Z5pOLAaAeV9sY0^|$1t{8oBm2*L7D zpLo{I^51KhJU-du6CI6H+orbN4S~L3D8ipMwQM%NzuDBX&HDaMJ={K2FnJ}hgMWh8 zZ9viAP2g#VqXK5tgFeOE(4>_d=E=PAz(_?*vy^mA+Q8L?XaK}7;FZqAGq4S;pebn? z3`LzX6hQ_;3Mp}c3^Wm*NP{#%2okPHt0RUU7Z}Dc0>SL#1TVZ|7RE&i3J`J939pln zjmDcl6u1@=ZjtAkch_?V-;!^l~teg83-G%Y`lLb?>y8 z;l97#-m(SbL=Tr$E4jKNoQ_})TV4Ma59z~%5KHA+87*7`M}X2^c7x?eX1^*7 zgaXQ_prH~o^J7D!K{hUgcj2KP6<)s*di7yJbLvBi^{?R2u&|n4i<}n=*8*2J&(+QG z54uIJPU4Qp+!2vGf^Q4l#(8ezqBC>)$V}*NrRZ#soDH(GA(~sVWY*a>0b1GYfL64w zYMd_tyPUZ+Iv9GVI8n|yWl{i>i$+H_%JFhbR!gq#{aky?dgJ>Qdbk?UksK4<=shiA zks`pgz+yS6s3!d)D~^J6Vg#$FYFw-#ej4clq9G0E0zsgEsU(b*ZXD}Qh}NF?b(!6T zjIh3Ml@*k5wXe0Z0&h%QX>DT!@Kvq5XVN5lWmK(Vq#?$F=sKWav`f`KC9+#p_J7q zXEjb)7ITX}F0NlF-a22rb*@<|-Y*yL7mE)*Xp?de$+?H7>`}0<9P1*DcZ;O;jUVqk zJ6~~DtT-F>6fW5k$)+;+jI#P|GQZS^)YBS*u)qw>TAsrfPMv3~l-($2H%=a1G8xUe zi-HD#VMCzf0WSrsRmnL}Tu%(Up{BH1r2I(x~IF2lT)IZ;QZ=qQQSZT;9; z7O_dr+UVv+dUQ(8x<%)vyJsY4qwH)HosCgX!IC2pEtSA$l+9G$ern79pQ8DkR zi0qaeb+V&QbnIUAR4sUF=RLJ|cT1k_vS<5}du_=PQI{_|Dx!6}Rw!lEQ@WJD#)JxV zwVOWXif0c?Tvc>a13ey-INzeXV!>TC@2-kmm)u)q_m-t~YfMmEZ&rEMb<3ltS}aKW zNZ)G7ZM7Lcu$fw2)(Bwm^Qb8q!59~8+j9$3g$cJ0X-H!d`?^|vjen1e?~8y zQe)Pd_$x1#k0b-zbu;>>YbNwqXjDt63boQlEQ#`|@4WRMr^U;)Zxe>$%w<@&k!`;0 zPAp8|w^0i%yLg*6tx^4eZo;Tpiqe;BQ`jU}!q^MF43GoBrk@@R<)#u28&}E6o|aS1 zsv#X|Aw6MJL`8?B^3wK~HdaK1daf2@!n~%EJGCi~=B-+CS6WCtpP}_xQ>iSW;#7%& z7@5N6umwoA`}-K%KTM+HdW@&(+R(aGSZ#Z-zO;s|5fxgqT39wVO%q(GJQX6$g(^~S zT6!<2pDqk@tMv3g$Cx^_rk?r##}qo~4;WLKD~>4(5qX8luD_t4u!U_AqDhurYWT=! zy+RCM6&jlgt{s`}X?qoX_A9!39_H{*_* zR$|vOzOaT=r*)wb|DSJyliU||KLuDhp(j#@*Ij07$B!LhLwtWUlS#N4Sk*d(K}5vn z!vJoK-z@wQo-{l(fe5_uYJcw)h&J?|9}FaU!od~xNDILq2-YG-e2>(5NOT^0{rQ>D z?3w%99&Gz*^Zn+jGxy=rA3ouyaAlKx1;nKyQ^$UO01gS~s0T;{e#2Z=r59i$-;4a&BL zQ!FWgc6Ahdp74F)Iyoe35HpW~zb`0TN#7t}LeC+j*E%vNxJ~K>!UgAl6wJe+6#bVyQ;1Y}QO!E<@ub6N5X$)2I9 z_Ql+S*@AaV-YyZVc1nf2JFpkzCW^?GJDV5T_IBxPDPk?UvuAeQZhoUVk|B8-WKVVq#I9~B>x`UrW+Cgz`K%|U ztf%Fyr$K{qMYFr_p1oiB;EH(WtjIksagWQ~_6Gug=) zZ0~2*wCpy#zuVqwF@Ipz!(9f+3G99TH*lCdqD4aisKFybD3=!Kt;~?sh8fkQIsv)` zLOiMsDsAZWm6KMKu;B{!v)L|wq8PX{U#q&rV3sLhS5r;mr7*}&{EFMUi-7G<+et8h z*QHQ^Hw@DbKy1CTAh8w7z*eZ3iGeV{62PB?AP;st`$mRF31SaJh`M(WB6rCpO0_~I z!c`E&rtKz3@Z=p-es@h+pbp#$Cq0ynYFhmv5SiKDaiHbQvF@bYY-;Y&<~AvFhn%@%A+vct zvsudAD`)N%GrK-+?V3CsWtI4NbGx+pfV}wtsvzYZl=BWwo`_mAXHJOL_4q~I1tM3l zXw9G9Ct7RqTXdI+Tv^oNp4!Ih8gnPM-{WEhnr@>J!bay4XnPu+pvkZ($7wRd1?;zg zlGCf@IQ<+Moaw5)G_nSGpt%D}_n3 z%aq3YBmsbM9A^Tp*|BQMX>=+}*%G#_OjB#y0`&__ST*&ts&}zgsGoHe+9$2_!937k zD^uQPEryF$YYVAxY=}iz!dAFe2uJ8O)R0vh^HP#FZbo}g_fIc`QZpB#7!eiIJJliD z7T`Hx;5KbNG2!hBzOCTRX4*mver0Bu^|(ESV1K1=7|+=(`U;Uj{?Sguldd3L9fgTZY8NOh5&L3Mz-4Nj`XWAaqe6`dXmDtYE7QA^a;! zjT8PS+#ur~uXs>+K%odTQ{irZM`1W2Fvu{wAdQ6trlyE? zZ`?^Um{}7!KV^;zLPdT z+VJ{1+ihPjG}a;&Uo_r0xIyOveUOq-P&StVn-o#Hu)JLO|wjdE6bk%6woVBV*Dclx6&kUS0=g1 zC7W^}{rk%2Br)b&BclDNWIrm~kJ1iM<}wkPsz~>bw#{+(E2WM51YRuz72g)| zD4H1}w}JLdh{T9HBqLKrGBPT^)YsG#+X48Uq5i*>uC+p~8o$@Xre$W?71oiUDoCl4Eq8T#;_sS2XV+w z1Lh15i5$c;zLSO*kyRah)#6-fVGs{E7hDVMU!>1Hs}|;Hzd!05&)qRX(pUwJcpJ;b zo?gb6^_o^uQA4T5;T^o=o^u&|vQC(ID6fjThr znItwlVV*v~>Y7E^W%{Uss*rjwhYP2z79@xePscQUiKUaxYK=V_v_MYS3^(_kGz^tu zh0Tv@J!Ne8@;b+@euZ^Ap0kH8?@7{p^`2#VuHKq)tIx~)#|X*$%{Sigo`C@31@Cx; zm&qXx4hDEHlgX_K`n>%kf;T}#d8-8ZP=({D?_Jo#>pVqCNNHWe=E;uC?oceZkr1Gx*e&x^Vep>gDRcbmZH=Ts{ zYz~~XA49>$2pCW=c2E&mM;5HweGLdZMj|*Ap*@atQ=Bp49sK6bAIE!2Q3>{vDByM0 zN!8=VY7CuRFLgAHi&Wol@{VVDdk2-Uqoh<1gAoWus2vLbj>TgNOz}1qKB46QH}a25k<(5@FlM7t zDq5FGEMr9m@A@@#oIt$ZqtsP2@F5_QSsC$5ncL;e?I0XZ^)8&cIDhJ*bn23P>XPUY z-rVtVQSCxe!+cT0+&*#hQROEU9g~ZWNuJ}f=eUU9s4#gfN)nC^Go(cFmvmMe_;T*4 zHZgNw;o%==S{`fckozsyLXDSd8f5_w|XFzraL}%b(t|7j~VyIO^=X?-reX%I`Cy!3~)^I8fZN z*&%3W&RrRsdYBKH_MB*L-el)uc7ZZ7@1Db5DmW#>%|0b(pPD@S&}5x#gFNT#i*S^m zWZEU0c0q9>s(kBy$2fabDyWeQYD5k~g53`qlsjC>bVfFv5lv?v7Oh7knM;{-BV)=o z)3}&Z35koG%~LkCHewtioIgR-l(%^wpVhN zO?46##yO^pAbMu6!@*DDHp<*a5kiSv;q3a^>k#gcIiJY+qFe!K;jRO_2|re+C4=5} zMjv?*h#T&T8+Dcd=}Wyfwy0l~FrOKX#7%_*sYW<<-MYGHQO#m@?#yK=yF$*cKrcD* zQE|)`Hp}}k29uWsPe7`4pT7Y|i&~{5S<|Hs6;TczIvy7+6JT~>IrTB+r41UUO&XyP zp?sEIX-g`YD}4q%*k#kTaY_1^rN4h@G;m?u-7ppum_mQtBbRxwNv!rRXc!p{HC!AS z3Y-@LSJhuOXunV}C8_;8)xp-U7EBcgN_GE$y2Q*^FAnrwR2UMpLq#ekeZ9l{0GY)A z^hiAuLXprPabi&$#!AO3CVYV}*9a&K;9*kt370YLoOwb~)Q)n6QLrJoiYV6*Ir6IBIot)W$x61C6EfhA)7dA+RTjatm>_gO3B0A&0FH>Ai z%_2wy7aI%rP#7lHjCyxVHP(>6;9^G@`jbdAao3IkVxq4__X7yF zQ80%MVIv`Jq-X|Y6Ml$Iqk7q;%>|)UNNFmtHWY6f2b&dP*MR0hH-y+~@e7Dq8oAC} z?trXWnFv3vTV3)I!bCZVRCT%)<+fqKz#yDwQjswu6uCd8;(khf`JNSHaAX)-_b@RK zb0{WD{4iG%h;e-vdxwVugTYu<-23VP4_f^E0PIJA(gqa|zC0KAhLgF{8)eLLjXCf}d4{f! z24ZG`P&&4$3!QY*D*)f1U(jPbLY0LTK2el(xy)=oFwap~xqyw2EB^CTCc%w(ui`&Q zTN@-$?uMR$D{K1Pn*#PhE2aEuIluaDj-mp%q;uLb<|?AO`L~~bnJT(hpZwy8r8*HuKz)<;W9-#zj638|z`E~$%dsQ!Wf zJAP@y4tc|l`&rV4rf6CDyDz-`f>hQZmo-F-yo;slBZuzxN~N3S(#=b5j+Rn#Y$yOL zMaub%3iVrZQhqBvF@pb5p}zd0C@$~*2yKY%Xz-V-+swN6S`Dqcj2~=t;`xJJrq*Wb z2hB!$-lxa&&vo?tbF;n8WBhrx9xnEuhWke}K!v6v11bXAUc11Abt89I)ox8&Ja~h$ zmooC&sxi{2X0-rxJnnO=#!cHNQ17P1LlHoL%E2zUW2tWX>1@x$%ihZHL|Abv7w z^5H)&MH0*K55Sy9uYfsE(^SaEx3?+G&kzP^u>(qzU|U;kRj}wlHXmlNM_(>G%Ohvz z!B;lo!^8N8rzPil*}4AX>fQ6s-AiVCfoy^+i$&^@oL<@K{oaXrXXBEUK5_9+Vs4G( z^vO=&$8`rj%984i%sY=P*(iiP5jmc4mYj)CPryKmQk<4~=Z4SRoD*0W z98BLhjumr_2m>(j1WQsxMrdY4lACn^HP+4u?czhb&$Ji0ysUN?QHOB=DQ<4LL#muzw;+l@+X-5+5B_c=dl0IRWKCb2;srlB-UQpVMO!2a{Y15vf z-~mU(q{w0kF7Ug^3by5sR6{7ckj(5`oLE?Uw_VEIB4=*FMvx}$UNoaEat6}HHQEwHNci6iID?%6uAsPXPDMTA7oh3I^f>}Y~IySGwwwcX8`E1LH` zCi)(m^3UYV^vwt}eYXmy{Pg!A8@lq2Ei*l$V+($fv&x-Z(-wvPKy{KAAu4QnxP=Gp zWKc{_X@^P*3x(#=Dw{dlNwhN6k-qUw)C=iCDcD)uHQljdXqh|NaQRAK4^wr+m!T^% zEd1XnEGPgIZmdrgO9hreW*7>FplCoSovp_Di~Qh#VNm%mVl?^tg^?k~+adrVY($Lz z9{)kw6rq03p4lUMsv@mYMzu`8dNE_$T*ZRxv3b{Hl53Cb+OyzlpLexCI4!vj%dW$d z2Oj1Y1FX$nkLDMX5ZStNddSX;<`n_AV8BTYp)?G3H!@|t;DoE~1#N2r92tUcH7H64 z8(lGIOzFcz*^Q;PJE z3Y!;9*p(@TbXo=sQv=WOHVuGFTlRT7?@)hNYo}?)Fz?iaQwbTQg>&&4>X6|y7WV}w zgd|k#;gsM(*vh+k_r1(CLuy=@yW~vt44 z*oA(~j;OTp68`CFOxV;TCu|dnz(6P4Ov^6qsN+2;2?{I$+1hq`S&tg1S}~QFHkIFp z_JWA>_*sQa_YMTX#C&cHCiHl3Xv7Pu3gk}+>}95j4_0$;$P4Rx-r*74<2=kQJG_?< zjeCbihDW?xcGQ8i@}6uv?tQ%FY(0Z2u%FmF#)Eot3|+nISCaBW`_=;5OW6=*1`WqE zl9F3ItyANMy6gBK7&q2k8HUr_M~*OVs_Sd&joC)~hWx=TJ75wJ?1+rM-ceZh3;3}M zIyx4D&KxOk6I>cEh}TT13Xyu=e#kHf_)q^UE}w^SCWL=N&=jb{4r^d&l$fHi>~1`b z2))Af_Jm!*m_t$SRp_oO6cdL{=!6_Nu)Q&MepDENh0GwV5{wPQ>YHMBkZi#a^90K^ zjB^Muq=`F<%f!eKjSRpj*5wcbN_Q-E@Na%e{)2xD2Y}jDc;lJrXK2mKqBYpQ zm{AmU^2>RjZqIUn-ZWP;d0^3&CEF_R9v5v*lC4R$H7)51%p30aFM9H4Ew4A2+cF1LNk_(h2>vuOH-*}l2T``x0uRdTn= z?$#+2m`Kz6W_#xyWul`ja_t8%e&w zIPAizOOUpFO;|K}q%AdRbxT{&vEiBi#Wy035$%%Fx)#J$+hL#=ddzi5RS$OLmvKz{Ej+b;0 z%gT$L&4FPk77XxS1~k~-R{eN~V#gQ@hT;o-mO-8^5a(%~hzsMg<>V17MO?6^x=mSe zl;vmj@Z-B$;;T=51GEpjVql120a|S_qJ23aralY6CYXA#ib<^NM0N^&vIIWnX%fq-~n|4AK zh1~%MHg8g(y@R5p$Z29Qfy7F-UCf~rgRKcL>&eQb1X7*}y;kNhtp-6tHsjP+f*YR; zYtL5#=?3qFZe;wA_zzxyLo$DE&{5{yix~xC;Z7-Imz=RnTzlxFoR21?+Nb5(rzejt zX04ALm9jR=S(_)1eBvmat(6=bWCy^R-h5uqa(htW6|7oV(yWT%OAUcTq-{zmmit3FM7%%wUGH!uNF@x_oPUNTuGB9 zOuFHS8h`FJwN+YwUag1Qmw+yGx8R?F-YQ@*pjK4hz@{Jp z4uInnrMR*X5caSP>GrAsxYU4_iTOdu4|%1I7$J*Q*bgAmZ`sACG4cS=RtuS?Li~u3 zE2XKTf>MRhV!Hr#Hu_5kv8$PlR0CE9!jsrN0BxF>gPOMC?J3-xcQm6o7c`bRG~b=S z&wI zd_u368GP^tmEDhe7VoRv6(cC1Ii|mw(rWzb6qAVgXLRi2K9-udGjKIo?nLptkaQq} zsfrv9B*Y&_6AH)4IY!P2atPZOPQn3XB{U}`f2r)>m>JeirZUEY7ljcD!EpB{D2jol z7BdX?t1ZMrN26~r(3@&M*u%IWJOxl`KyJZrFqnxw|I7lhXbWr^Odg7woUfc%Fcr?5 z3L}+sIijghGVPE}J4DltPi(HKi?hba1<_V7+3ICmJpmaHc40XCYursv%nC?czs&WE zTz}GQkHqbjxxFH{H|e!3)D8QzeacSY&BIb;4uLlhfj5<3>TBvL4&Ks3!KQ>AnV zDe~sHq+KqlPCYs#89MqS!=6>7sYwiB6~KZoK1+f32#j1GnQ2{cl+HU!XG4;s zQg&2Mwlnj)CNk#;XR)w!c3cJ<_%XR~=adn`DX-aY+Gn;&Tmh^>lBN?ZmtBBXuVQ93jZ7s)mMo3T8XB1@KlWiICq6P)56aa3 z-i2MQJ;wLVbYaFW|` z6FSIgC1)G?8dyUJ2k@)`yxD$s+=IeEj@!)d#}L7pY0ytWXmBOS-oNPfM%G8h#p<1N zT@trT=5{S`d*`{m61QLG_9x8!YYqj778Ht(@~JJa?Yz13#_s9eQ@dvc(NxaZ_CgCA zuYFf}Wj(Mcb}PY28NSk|Mo5SjWWDtdC=g(VQP5Dv>GUh+F%nK()Qb_-d%}M?62kVQ z5HiIeMoOx3fMpANGcY)+84aXg5jL;fcj;zBfs`4Ag4IHke#c5>SS|e5tJ$h)n^G;3 zdeV+Mv;&Fq#IH-1#1o8nsgb%gE`yqePwmuce3S5&nBig5J^S~8F8?rzGEYFi?^2Qz zFljiVBi3xn2m6>!6V}z*C%hcdHLr|jFRQI;yev;;`Z2JFRqc^zjdbSp@5LT=bdFJn zkO-_0Krv$aUKJ+TW&dZy0$EaafdqZKnQ4vpECNxc!eit#k+T&}%t|W?3B3;B&F2&d zlEP-}$iOhe^@MX2vWJ|V^u>IVG;9tg?i$~kxHBQ;ZIVoNvIz!|>sCS+^UcL7bkPT_O6F=su9op-^)@KV zWiXk5v$Oo`@vVIkEKdc$oXMMA|8C9OHIc@*>h9FRD)`+>(Cre}Aajr(h2R~-LKS54 z6L+EPE`t#5Vy0)N?RD-J7_#eb?Sli^plDWZG$$9UDYo1)q4y34k^z}^7?aG=W#&vj zFS@IgAGYXj9R>R)6~znKeWP=_lSE2cd6mO17M(C^t1xL%X#5_!J+EEAdHu%o z)6Xw8pz2FHjq@43{#NUk7KFXBC0Y|REtJ9(wY57tDvZCVFm+T}f8o=^)hy36xfsCz zZ_%YI(c?VZlY?R%LNWvo;g(<8X)R=_fqD8V8H&=GT1HGu}S!VSxtASIS}x5zGJ;$RQo|;&~-24`+yWbOoDHcUgX_zfd`MK zG?}!I$!9HB!vWH{Sy^L*g8b&rAAw+o z=pc#p({Wm$k4e>{CU|*Zbd)?{Ghht*cGp7}Arwd3p=W-`m2m;)%jd|6qS+jdW9HH>L{FKxhv(XVJE#QXMe?2 zQ{FSC!!1A`-FW4)V#fErV8keKuF)Rh1q!;nmSFD=!`@MV-{d_%79ynuYz_vYp*ZLr zQmP82L`G=SwiW}yLIFLoy)*)W!1)*;y^3cXaX?K55zf9KjSsLX6ypuC+=Na+f=R*J zSa=DMjuH$dxhi0L?tCc!y2~RSlG}Ip2<&Qu*tWV~eevZNC2N6f#VI)kvm5YLEIKK1 zr)2Jw$em(V=|6FLMAFvvO?E&`aN?b7aPYf3rrdvVNV${Com_-Wd{$x9y>H28w!>UF92owvTP9lx=G&)E zy?k}@D$C^Hg9@0sRDM#%Avxoa$Q^p<@VvI~=03?$B0IKCKld0cxlaOd<_dgc?bN3mpz56hwdtYKbK2 z62FqZMv|fDt!g9-GoNEK*2Z9qoxuV}S_tPmjtj;#RbrDm%!&|)gsM^yM_rRlrYN=o z#9=TZyEB%r%$DRZWQOD+zB^v2o!ZL+un8_yqT)gYIoHtz!ZX?cc;K2l?;X}IP`_q6zq9EQxNddGNTuOZK zCqBf%gzy5T#Xv*tDj`<`5F`Ocbz^g#W_YY%)#DCJ(!Zz=G*mza0}mDjJiLfP#hmfB z@l)HxT!}ZrJ7#v=-#K|~v9w9dIXH7*=D-8%Nj_R zH^2__a#h1p!IClY@^F2_k~#5$DD{s)StTA2e8ZAT@E+o?N(wf+gU%W#(Ao3B&q#om zQ@<0u9RJ8579UrBQbwno(J6ABOGX{%p1jI{1x$VQUQ>>*(?~_pJZ5m?|3GsQ^D6?6 z<(C$32{L0Z?4<T7GFmrvy0xA(Dg?9R&cPO%=o-7L}HhpJr?T#)P!rNm>e`o+?$R!f+5#5ObtoiR!I^ zBoM(MBy_<$9JtC7^eHhEKK=iNk|{?r2oLc{(m)Va84-dY<`{_i6u!cLB*%-&#yl&O zKw;~ff`Pzg;SCD;$K*64IO9GlniZS^V{T9)CL$T>WJ_!&#Pt2j{wt#=&8&ng5O$f$ z@eXvLqcVAfH;~fH1Xi};0koZ6=giSA;@Qkj%96=x##8=l6E`Pryg2=fQF1yM}7q298ijDz1aPz>-c9LnCIv(}p+}`oV zj@NhH+J&?K%m~85F$af>*-D^-sP;>JO+7I>XoXM~&}^?gfAjg-op*OnJukVp%kJ%v ze96qY?S8{8WtPjCqrkiF9VP=}FLwgFQjz!%$GXpa{Gf&PuDZ9%-(I!k{ zvqOOthc(8OZOTUYOEwA-+=FhKZPB-F(7j(++v+xcV07a71GlNwWBtHmq~|<6J?FO= z@bdE_JGmR2ZQG1LZ`8wu@Rty{qMWkHVR*5pm7$u5yqMq$@EGlpV;Vt`^u0fVi190( z>8^zffM_Zl1ME1k_O#46FoZUM4g#AkeU^f05Bf+wZxxjY!3hIvfI#Xmaecums)f%q z{t}zF2Z2Kjd#No3M#h3(Xyke0^4VS*u-@Qs?`ZJiNGP}oj2oJ#-jU(K>ujdeq`Y3& z%hUX3V_%t^IDGeV&>Ns+2l`n+me5rQdXHT71}_fuhl1*9+=w~QN$2kjf%8Hve`qz~ z052SMEc`J(3Af;ojQKVH<(U3*8seXgGpvRntq09x^dT#SKnfCX(d_yL5>sXuv#ef# znKb2wHFa0!o5q>LuV>uK_?|Ix=-aOEx+G`4?5xMpe6H-7?X%^xU6RWyyO`#n;!NqG z?>f^v(=~H?=JfR5*(W0BBhN+7-+5}@fx~lrQBUz~%WUgxE2PkzWy%jLCsZ(SYDUiF z31#`TE0TqD+NXvuK)uh_&u)}_?dvBg&TB{D$}If!4JSHpb2-D8aC?aDAWe)Z)tO`@ z$sAFQ<@k`sEqxcCDD_C{v+7B^Itn;7Eo>xPs%t8JngjrETe0+%rb*zSO{gk6C<;5X z+3FZyWTX861|b`gLnByg=p5YP5JvM54)f_Bz%Z^4^+SX*z@`DT`@E3#0hWygh*{(X zY7Ew#7esh~gk^jzNyrYRy0zYWpm%UEh#-BJN%j?k5@wp>u`wHZhrNMoU>zt4L!mzq z9Ig(*4@;kRF|1Ka!Xz^{dFdo8_UI!Cs+!J`;lL&@x$y<@Jws>bI9D4lzGiR{`au-- z=ftR3>)=Z5?3~sK7ZLG2gFve(#GXR_A+jML^x;*ds-&1Zt5x)M2SIxvV7y&Mdj|xS zC5E`kv;To+D2cF5LQM3RmJclNb02c@{!^2Rh+hezx4bo&r=44VDHX!O>L;*l)=`j@(@gHnbGHg+3htUjoI+u$Q-G(DOj#L626-Gq z61du@hDatZ-r?(D)G^y8iXCV#*&ic-robu9)FYCJ!I-UhcoWTk z0Yu=_9_?;%SbxKaCPhc3gv4GqOy>Z?3?dp#TIc%tKwuQuGBa03TOk0wLClo_e2?j` z2|uK{`KNH=Sit0@V!PGeOtgbyyw$g2B5L@P2>LfPL8_4SmoVV!-mm=Tp8Gg+L!|ev z=Wd^ZTPMFP=a*5xv1&KrfvP<~)y_{D!wI3Ni7RZlVY*?|R{xI;(*^PB3$!Ny06V08j%0FyQJ;%coYDxxS z?o^1?%BbFzNol5OA=AnnmUJBQrjzH?CTyWgmWh^+U(g*h_FJYLQ@SZbo{~VF$t`x7=3`2V8dzq2jwyP8bqH{@8QFs8;iNz|UDrnRcN!Y7uq~IRdj@ zLnuwTaQ^VJa;>R$Yip@^UIC1c%{!O=;>Xz4Zd9YUVNfk|8@{q$#OudLvDNVL8sk}8~CO_dHw(&W9?!7#1UHE(woMPi94Z>((bb-1A ztHEemOEj?xwWCj2ZG1)J|1ORx5{VlpZK19i8t4bz8YH5&IoR7D@Lz>g_}&D)+@SF! zVud9_+$1QI^j%~$_mR^lI=#UWFLXPhw21>iaQZS5gylgzlFW-&p?m7>Eb<}5hT(4S z)xdy&Pqe)bi;5JGSjCqEUOz6upC|%2%n4-|BG^Yk;MYN*M!{BN7KIiaBkPF2c{}o- z?!N}|IZ5s-yY;|&VaG9^p70%DD`OidI5ziBFqP1F<_VqEM0Nqb$vf`aaBjoU2HwBn z(1zn1P76FDDjM%J8Z?*$@COBJ{Ev8e)3{cWTzDGjGa0MY-yma^%ARUmmP3tmq;^Ww z`y}Z?!J2Pra8^6Mwc^9zHtJFk;C!>`A0pVk&p%L%`7!peK2eTT10LjLV8O~qE z=`v751dEN~a4|zDnAq@}jAPHSOk&SK9Alv0ubkUFzFnov8s}OhhkS~3NRRd@A+xWd zU|*8flk>G*H+Kv0~+?0{I@gx_6k+tYonO1Vo}O;fFl&g|)sTfb|SoHequMs(IdkVz~(b$`oW?!XPdj|_inRh}egm+b5k6Tc-ZWu|1wP*+9M zVvOh#8o6EZsRb5SV6e$6{Dj)=HGE{J6ps;z(ged^59}dRphFa(I96uymWA~ zZ7Tbr1JZ^GX_%J#Rl5>9m7nCQ zg~GQd@7Au#W7xUo9IsrP*?FsZ-dZMF%OYKH?zY}LICtgGpP%2SJ>*|x~GEs&Ejnarp4@3fO}{b~J=V6>9%a}XG$``2(G zZ{vRTg_mEDtogDP24nQ*y4iy%OJ#GOzbO8*;{R^lpRb!Q*(;VnorW|7X7eMEqTD2N zn?!C?)arV5;^m2#UVMd&RGMoe2IbcOgW>NCOI17MsvY+$epdU_+MjIt%T4oD2gIra z@l^66`6)F+YO7bDfBE^BCSIACwZ7|q+btFNC4JH&aSl;K&Adx)9m?o2j3o?I|vJ>rOk3_v*g($d-hDV zPqlx!=!Ogx4x&#v&~T?O*l~oB&H~y$wCEtQ*>tn%M)P#@?2dQ$zP(o}-Y6GuoOf&# z9XOg?0gJ6~S?^fC=ZT#CcJX(Mak8!KtP|;cTY$x`IpNQ*;)dUYvmZXGJW0;evh%d) zJpIt&79GWlc^mJx&uw~O{m?1p9hdWtPaQd;Qp#51rWy&UN$7b?@+Z zF5S()pY>6$Sb9P#Jt3E#5WCJwrDw(7i=uO#Za*GM>dhfm5aBRxr&)K!e1!G7mIeIUh<*hP4=uPd_UiheZ+A6Tc>mirt`e zJYzNK2sL*hoQA7Fwkwe=?o=z7h*#B%H>XslcnjfTP;y%ByOq8}D|5B)9DUbvlQrqv zen)&p8Nm9@YUwkrPqi$q#%Ng5rVQ%fA{kyS{;qw45d&Hz;uj9Y0Q4vBsdBAaf()gF zsDIYnJ1Sb2cWd(j?x_Bn`N*%!;)1>=EGB+ICNGQ6#&Me%(Lbb7%`QmqkUY$ayb(Sp zJ-<~$=B_zp-U=a;{EtMb=&t2IksS8dX;pSKG6~DYFK_~kbTsm?DIs}-)*)ghex>&? zT7F&P8^&x}{}Zo+&yUmoxoV0O2V^Dr80nH!h-DITLf%xm!2fY%7F-x$WG>MsMelGE z*5S-Yge!W-0_miRR63Wk6VDM3&Yf}8t&gSK*K!L~>NrBr`CDZ1@gyZYY1x@c& zhQTT4H>6AHqim}DXdMWz4rdvh|9WKz^j^J(U$?a+Yyfho6AtT(WnVCE}wK zPE9SU!p?6Q()B2o*MYBI6;l5mbvOqIG~s-~Im{{dtNH{iNpCy8X4QP-Yv@X_jHvw8 z6@bwusb*7F>fbAutE=RZsEg0nspy05MRx2!vu;J<_z1lpC5P$z(+*FX(!X#PDKO%m7NF~*lAZ{%8_~VcJTaFz&@wmV9g#Tnm7r1gQ$HuFae7X}_EH#Yb2BWi= z6;@y3{1Sq2#Fs&-N!tY+g_NXv17$LV!i<$uD4cAOOfI1ZvP@|t&Vjg{3fdeOebob8F>r3=XvfS)k34!2m%rFv+ zS)qtCAi!|9iO$PndVazv1$&MhfgGkZNOCTWfKCwfe@7mtDCsUr%bjc{Mder__(|gr z)=r0b+R0Xw0F(NE!X$mv2N)@zpySzHA>|90G9c>%w6dQ#Ph?aTw!Z%!5%bY7!G8^u z8Y1a7m|ZZK_b_KMx9Ik{H_pw5rQA(&?j|vJ>zsj1+N7Pd+x+pS<|z&$pz&xdE<`}h zdmsX;_DiLldLj~Tg;3bb&G+3clJd66dD|vWd_u+I%sZG9txg(w0Hs_R_0=VeIxw3L zX(k|iTQy%FZ4m+l={v@E4vE!!rINi;!9KYF#>e-|p8YWYwvxSxrL07uSjnJxXx;(E z!+1ecMNWHSm0lhSXH1*(#Jr1=X+Sm&h^B!>Q?8h|TQW7trY6zU#9mWhmrPc(OOe!V zyIU=~wo0z8vTG|C6t?`i^KnH+F}F^UMiia(Nv|f!`Izi{Omse$^s17a)v~i%bXIFL zAfXIk1Lq>)zz~5gABG6jeyOjiCsx7bp}zbQ=ao2_^rVXz8Hxj&)6mm|D6Wgq^IOeZ z7CD@iC~?&?S1oeYi(Gl6CdqV&Iy^HWoD)J+xwB!8`?%?-59xeJqYc^< zg%x7nGx*IFDEIpV%Kf7g${p_1!Ks6vP`D%bQ8>C&IJ)D9uNj%smtGsWIU;40$r)v^ z=dh(!tUeLB61nn`l};U5kKy+T>36kCT${|{kd!vYb819)6gA=}aoc2Wo5*eZgb7q@ zyD8JQZtgtOUtJ%~%Ad`RSaF8rVov@$-0R109be2Zzx5&kn{dATynPn|p(s%^~sBnT+`B>-Jmr8T(>p zIgIdUR?C^yFgbzW!>rufg>MwTUVN)~s&%OxM9A`;v9_0i3iX-!En{fc2 z+AkG1^~6fCLMVvOX3S*W+&Q&#(OG@hHn&T1Hp@;3**1Sk2AI)kW}MrRa9CgNn%g24 z?ZPj`6jRE}4*1v-t=pVlUgZN7W($>f3zb*p$3CPv@$#mJg12(vIB)Y7Ji-P}2IGds zGvAoZd;q+DYAjU}Yt5XM1&ODXDed*kZ}y~oqW>%zKPpdr3pQYFn%divZG1o5)RJd? zzfezZV{1iwx$(ntQ~L(%hc$Y*zbMK+SfKmGrk0$8PUA0iPQ3imX*!r`{bi<+o;`Yc z&SB35_JbRYzpT`g>ucTJxkL9i6|I%WvyFdeapL9gvQ5YHtbdnhq~}6CJr}K`=bc%d zImTac6y{esrp^NEuL>y4uZs2bTvABS+x1w(f3?GY(qa6yuJEMA`0K)~6Fbbmu65G$ z4%3My+pn99^t?w;&wJ@v)SD4Yw3rb~bQtM5H|u1hQEW7w+-Vh?^l&8|cWR4K;<8R{ zuu6qadfs3<<#R|rBRy}@({r5(&%&>PKtOdR1A7jmkns$_T2yZ^*azR_MVn#7@%Ws0 z3R*s1%8pwEZNL1|av_3RU2*7Krkj@3?h!-gkn&x2F|8!zm*OaCDO^Y#T06(>%(Q#` z#n8~;`2OaLje8R+kY^4zH{i8-bZ;As9YEZ=nT|H!OGl9n^fjbnH8(K-`sRkwy#v2R zevlD#P^hsW#QtHEEQu>aMlc7ZRJICosWR+uPmWS}~ktB%`1fM|+ zoF|l7H2aj4>BC9r>1SJ+V6T>r?dpsc6k}?W#R51uQ7RK>b{W~3UCx48+d{#{`GSpe zkBbExrGhH3wbSBD}&KU)d*q!>eYKm!G5pDY5r)H(n(9SBDgjMm5z~4>sg}|wGrXM?@ZC%uM z^|-EZnq*m8T$5TqkMTV8YQ6){xVg}eRPiEsW*HENTB`jRUmBc0wV%T4Hfj7AfBWn4 zP1HQfv{tjKIh5LO;Fe}Wf5{Lw!r{;w1s3yDtIE@t7A(XqD~JFlbk#pgW2|`t$dP&fQfQOI=5168pz7w9OcM2mUsGON6S(00sP%3x?7I< z+dB@loH^D_2h|+yI87V^e>_Culkhi`!UyDdspNk`&Ir9eL5@JqQ{*>FPfT2c*qm%T z9wu@Y>CphE)DDy{&xf&BLMzSc$m~ZbY#Ew*v-dgu1OBB z?C^>X91F2vuba2mN%jWW-T;*a+SO(z65Pd+?PB9WvE&f_&Ylz9J(9aec9WF^AeJ+y z$-u*8`zN{B-+j0E?P77mcBycOT)0ClJo$0cNh$Y~oO=q#FcuVJhxq8*N5z`Qq>|?U zueocDiR;Sm_soNNz;JI^Gq*4U1%?h)35&UyF$$w`=ND9x{3M}=!xQ1EaO%HV5JcOSWE{ z<8ucCoGuV=WjbI7Eo9Bh9)ul4CZdOaT6R-uI&UL~C0Vvz^p1TO|G!zVWCBi`A$R2hHzP?H`KOeAH#cT>O@X4mmzQ)^w71kdH+(DE#VqRJ%9Cw}B zW55;{-l;E&JDALYtjU&k)>>lf4;^IE``vOBW*CLNm2vrM9wnE&+iX1wXjy0;{Dlqr z^h~7kEjwA9#BG^5Hm2Fw^ShXz?^qG?+xk7*+YZRC4Kj1kZkBcDiwGhBiT=izJg@A5 z1j(QS0g3n{h?A2wk473DA>i>^uJ2GcpBwUK0cn_P2EMQL)F2fzQ0Ccp^R}CG5ze!> zoIhd{UL(D4mWrNdKY~RyqT}e4-5NUU|KnrOKNe2J&cJP!2p`&9*$aK1%8h$Oc+rA` z+~&wCVX9vf5CrB9O!ss!>t(9EJOy)O#7uasxp}&H{lzktBc7Z7{vn>*u8CzkJRjg? zEXS}2KuAFZ1dbgMxD+A9*XcT<7IotBBb-^d{a<#{3oCRyPg9wo^#r;l)Qa9;QJ?=H z@EZa`=(aWq`q^S=2@k+$CZ{f4iF`?|1o1}?2+(cjPZx{i8W2h3ztHxt3GEgEHR$2o z(=Mm2_Wi^Z+`r-r#vb9mx>*-uy4^aa)U@!jRyDYr2Y1iruY@apWLN6;^2&BKypMXQ91JB*Y%Y9Vr7clryt4EwJb8xg9gU3k_*MOtRs3LWwF|ivX#QR z_?5)S-4~VZ{ctw+^r%vJLM=SO3s21E%;sd=cpQbw9Ybp!ii`V2OY-%fKYs6V+$D|& zRz*Id%15|-L~)uA%M%NE9MoC6LXxSTLB`T#!8WV!_zp9EcW-%~^KY z2rLU7TY!#@OdNQJURUXd%AbiL^tM%?VgtKHpyI5BbHb9vaxt0oY<}%&RBW3;oJyc# zqy|*HHS@(z?k0hXtw6*hE@Egay51OGp; zLlb_6_w#G5cWe!gzhVuJPb*8h_eh`aarbPuerBocX|R4)%_!es2b^{bydVVfS!jT4 z!0{tVzab4;;?%yz&Lgf$mgTn;$1k|?Px$#kuXW4OY>EZu^9IUFAQXCb(tpOl0NGqv zqpt0QVb^FHJu9AMRv*KD($vRhL=;(l40}yeAG>ibvicbIkQ2_W_LB%>`|XgW?JYZT z3t<_Y18qeEory>`2nP+cnR_>H1D#D4+1YJ9Teh`M^0~hrQ>D~Pj`;KI#oFiUpeA+6OJcflw}$a4yoV@N5`FO zgX@Gc+$azZkNhKnDY~n}l>29Z5jdL4(W4YD+YncKaye(>r85XSneGGPSPv5;W@y=; zxd}wRy8Iff5yz$`VU9y{2wsGjMiqS3O^D=W*OhaSZcSj;mUBi#R_N|TKBUeroeN3> zZT4F%7p$rNWM-bOewkg_EH0NzG=>?#n-LfA$-mH|p`tlBqWS*W%dsNZ_XNVR=WaiL z^LYhfjL)cL2YA^5HPFWcee0}5__MX!JJy$#&1mLtMnB0kKeR(kfr_Rw*^DYEf(S@TzXF^HPS=KB|O==y+jF zkx!}eDK4KkM^_#GSmBbRRKZcx3J9uo zwK${&d$hoIE%bs`(S$(f9btb8|MDv$xRXdVZOI0mklj1bQ`qV+1 z_4%{z{XK2g&)RG~`y8KjFu>`;^~DLgW+WdSPp=Xpp;beyQLh1J%a6sXLUrjs zSxq>7OES~!YYc0cfwbIe^vy}u4OS%VWs}VV);$Bx{+6Sgh*A)`CM|i`spQZnd#%`- zuk$j9R{X>70=Ob9^1cz%Z;+ch-g0~sPoj~R&AgJ$J%whk02kKg-1NKU2oD1!Do_7` zbhl%@z!e*93&4S~UKA}N@Y0G|F3GfqErG)*6U?3)NJ))036S5J>Ni;HKoV)7ag0yg zgq3*Xkm_w!Zxg)pZ}kgJIN}BllHs0A&jR>dLjJjBd()FbTsm>JcI47lKFHh2SV`kqU>Ifc7-ERMoy>T9op`_n+K7-1tjS$S-*astcS`Eh7Pz~XiC zwU;ihc~wN69{p%C;}KOX-bZmux=!;`l*3v82dQLdyX-{Ju`vp0yj&o1(5A}1K-RkUK!PhxhldYa0}vFWmJ-OuVs_u!GSBZEVI z!@?_L?WmLL#5y;D zvuz?D%+psw9S25ym;fKHb@DJWuy=K*w36V* zZ+!Gd?1Ea-$V(b$^HzL;TYeEC>S3^XDcG#+=v0GUJlLfK`xkpu-vIXwfV%`8Kzr;0 z3^X4Gwl4*?Ctg(p9X!yX1bP>%RNn#aI{@FM2oaSzGZZh8rzlZxvzEDcubxvp*oPL8K+B~%`J{q`F- z-}vO}{8e7DLk;cZp`CNiIp-f%yyS4n1HHlw&_ZFVnR8;-=_#X2qS$q@#wEE%k!!SY z`JC&Y0&peu<)d9{pqmG}X9rgTO^Ln9^J<`r2fAhl!N#Gua28U!1<;z?NbZ4%aG98I zyVKueBIX;^l=X(9HK{2>V>hd8eXRAIn5#`%Ii+Gu?dSD^ko?>wm$6~}v&+6`7z@2SQf(biMv_^gou zejcnl0Li43cR;q@&u4&NgZe_E3`^OL5#O$$>>ZXX@CKHcZm zIXdMn!Vpst8mA=wU2!r`^m(HIS7z%{`H445F zAt_LVh@c4Vw<5Go1>+%p_?<3t6%*(-BBHx7L~^wChf zBiVw_Ri$S{Z5vTrj`5abs_$9udsda7*wjPxdsFWA-XPpFL@ys<--JGtDcy1Tf$>!JJLlKY_QKE&OJq7KdDk2zFNg;L$F zdfKCI&07|Cs@?|t$XYNoUwXUzX1QhyYqtKc?AB5XL{Oy`MJmQpi(2>X{G0ijEu`5x zP#dJ$AZq=nt*JvK!_#*zkhckC_p{3J7uC9xyzZo0J;tlY6sbZBRm55~Th6Yxo}VrtGiweJ;OuKJPaLQ3LRHNCwS%5n_794 zR~}WvLmbQjDX0a*F>mad`4hKK-aM&1+O=4tmi6;Qv-CP^Il$ z+OA03HOcy(>$+>Ukv!!mTJV|vN$1^8#lKJW@8kY`iw#kiDjnw1VMRKe%CWQ1TnqRp zEELldkvD)M3CspWVJoOyEq+uB?piG_(L6Hl&LJM9ZD?boc0(4GQdRZ=x0`qc?H&ZZp;HT^JUL z8I($1zgAe3@)R>z>tp~fd#*R;i;c%yV&~?|6<{>Na^aK@TAcxQ%}gZ@RC8TcH`mk1X5sj~KjFIuM41`v z%wd)EI3r15I*^z_Q4`Bqz`2ktPCRH)o*I6z=l(0=yBVBdd8}aWTx{~j<&=c18I%_= zU+h>)LK454v?YfZYLdtA<@5SZQDz2pE|wDu#B1Y|iQf3tJIy=@%N~>^Cq!Zf0XN%~ z7{?36b|v*h7-OMj(Ymnb9_|FCW>8wmwk064L$WX_lEo>Jm_a`)quu~< zyGZEN__lcb!T9|bMOHa^S{Xm9oV%!uU*sn*i83?jC}aquy#^X`SVOFrehIu5*`UbE zs^j>mO^oR2CU9q%J;@Tb zltgI~uN0-5zz!cPid{)bNRn2OEZ7!?e+f*45;JI)@hIT|xzm`EkWF4$=)HGMWEV#s z)Z8BxSu<$xvgQOX07-J&f^*UL%lrpU<>_O8o3HG7mhXB_)R@7eZYIa@C?H9AMUuqy z|_nT8WR^2CfGbce%SDhYe_ED9Ua8@E_NlXh} zaNHX^cw=ULmPC$5z?`I;u1%HVN>4$`vW?zhfc49!lBdL^il3xPW!0QjgKxz8F&6}> NZH=i+uMs<>{{e!3Wj+7^ literal 0 HcmV?d00001 diff --git a/cli/calibrate_detector.py b/cli/calibrate_detector.py new file mode 100644 index 0000000..5cd56da --- /dev/null +++ b/cli/calibrate_detector.py @@ -0,0 +1,405 @@ +#!/usr/bin/env python3 +"""Choose detector metadata that better matches the trained model.""" + +from __future__ import annotations + +import argparse +import json +import math +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import Iterable, Sequence + +import numpy as np +import yaml + +from microwakeword.data import FeatureHandler +from microwakeword.inference import Model + + +DEFAULT_WINDOW_SIZES = [3, 4, 5, 6, 7] +DEFAULT_TARGET_FAPH = float(os.environ.get("MWW_CALIBRATION_TARGET_FAPH", "1.0")) +DEFAULT_COOLDOWN_SLICES = int(os.environ.get("MWW_CALIBRATION_COOLDOWN_SLICES", "25")) +DEFAULT_POSITIVE_SKIP_SLICES = int( + os.environ.get("MWW_CALIBRATION_POSITIVE_SKIP_SLICES", "25") +) +DEFAULT_CUTOFF_STEP = float(os.environ.get("MWW_CALIBRATION_CUTOFF_STEP", "0.01")) +DEFAULT_CUTOFF_MIN = float(os.environ.get("MWW_CALIBRATION_CUTOFF_MIN", "0.00")) +DEFAULT_CUTOFF_MAX = float(os.environ.get("MWW_CALIBRATION_CUTOFF_MAX", "1.00")) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Calibrate microWakeWord detector metadata from validation data." + ) + parser.add_argument( + "--training-config", + default="trained_models/wakeword/training_config.yaml", + help="Path to the saved microWakeWord training_config.yaml file.", + ) + parser.add_argument( + "--model", + default=( + "trained_models/wakeword/tflite_stream_state_internal_quant/" + "stream_state_internal_quant.tflite" + ), + help="Path to the quantized streaming TFLite model.", + ) + parser.add_argument( + "--output", + default=( + "trained_models/wakeword/tflite_stream_state_internal_quant/" + "detection_calibration.json" + ), + help="Where to write the selected detector settings as JSON.", + ) + parser.add_argument( + "--window-sizes", + default=",".join(str(value) for value in DEFAULT_WINDOW_SIZES), + help="Comma-separated sliding window sizes to evaluate.", + ) + parser.add_argument( + "--target-faph", + type=float, + default=DEFAULT_TARGET_FAPH, + help="Target ambient false accepts per hour for the selected operating point.", + ) + parser.add_argument( + "--cooldown-slices", + type=int, + default=DEFAULT_COOLDOWN_SLICES, + help="Cooldown slices to use when estimating false accepts per hour.", + ) + parser.add_argument( + "--positive-skip-slices", + type=int, + default=DEFAULT_POSITIVE_SKIP_SLICES, + help="Initial streaming slices to ignore when scoring positive examples.", + ) + parser.add_argument( + "--cutoff-step", + type=float, + default=DEFAULT_CUTOFF_STEP, + help="Cutoff increment to evaluate between cutoff-min and cutoff-max.", + ) + parser.add_argument( + "--cutoff-min", + type=float, + default=DEFAULT_CUTOFF_MIN, + help="Minimum cutoff to evaluate.", + ) + parser.add_argument( + "--cutoff-max", + type=float, + default=DEFAULT_CUTOFF_MAX, + help="Maximum cutoff to evaluate.", + ) + return parser.parse_args() + + +def _parse_window_sizes(raw: str) -> list[int]: + values = [] + for item in (raw or "").split(","): + item = item.strip() + if not item: + continue + value = int(item) + if value < 1: + raise ValueError("window sizes must be >= 1") + values.append(value) + if not values: + raise ValueError("at least one window size is required") + return sorted(set(values)) + + +def _moving_average(values: Sequence[float], window_size: int) -> np.ndarray: + array = np.asarray(values, dtype=np.float32) + if array.size == 0: + return array + if window_size <= 1: + return array + if array.size < window_size: + return np.asarray([float(array.mean())], dtype=np.float32) + cumsum = np.cumsum(np.insert(array, 0, 0.0)) + averaged = (cumsum[window_size:] - cumsum[:-window_size]) / float(window_size) + return averaged.astype(np.float32) + + +def _compute_false_accepts_per_hour( + probabilities_per_track: Iterable[np.ndarray], + cutoffs: np.ndarray, + cooldown_slices: int, + stride: int, + step_seconds: float, +) -> tuple[np.ndarray, float]: + cutoffs = np.asarray(cutoffs, dtype=np.float32) + false_accepts = np.zeros(cutoffs.shape[0], dtype=np.float64) + duration_hours = 0.0 + + for track_probabilities in probabilities_per_track: + if track_probabilities.size == 0: + continue + duration_hours += ( + len(track_probabilities) * stride * step_seconds / 3600.0 + ) + cooldown = np.full(cutoffs.shape[0], cooldown_slices, dtype=np.int32) + for probability in track_probabilities: + cooldown = np.maximum(cooldown - 1, 0) + accepted = (cooldown == 0) & (probability > cutoffs) + false_accepts += accepted.astype(np.float64) + cooldown = np.where(accepted, cooldown_slices, cooldown) + + if duration_hours <= 0: + return np.full(cutoffs.shape[0], math.inf, dtype=np.float64), 0.0 + + return false_accepts / duration_hours, duration_hours + + +def _select_best_candidate( + candidates: list[dict[str, float]], + target_faph: float, +) -> tuple[dict[str, float], float]: + fallback_limits = [ + target_faph, + max(target_faph * 2.0, target_faph + 0.5), + max(target_faph * 4.0, 2.0), + ] + + def tier(candidate: dict[str, float]) -> int: + for index, limit in enumerate(fallback_limits): + if candidate["false_accepts_per_hour"] <= limit + 1e-9: + return index + return len(fallback_limits) + + best = min( + candidates, + key=lambda candidate: ( + tier(candidate), + -candidate["recall"], + candidate["false_accepts_per_hour"], + abs(candidate["sliding_window_size"] - 5), + -candidate["probability_cutoff"], + ), + ) + + tier_index = tier(best) + if tier_index < len(fallback_limits): + return best, fallback_limits[tier_index] + return best, float("inf") + + +def _load_config(config_path: Path) -> dict: + with config_path.open("r", encoding="utf-8") as handle: + return yaml.load(handle.read(), Loader=yaml.Loader) + + +def _load_eval_sets( + handler: FeatureHandler, + config: dict, +) -> tuple[str, str, list[np.ndarray], list[np.ndarray]]: + for positive_mode, ambient_mode in ( + ("validation", "validation_ambient"), + ("testing", "testing_ambient"), + ): + positive_tracks, labels, _ = handler.get_data( + positive_mode, + batch_size=config["batch_size"], + features_length=config["spectrogram_length"], + truncation_strategy="none", + ) + ambient_tracks, _, _ = handler.get_data( + ambient_mode, + batch_size=config["batch_size"], + features_length=config["spectrogram_length"], + truncation_strategy="none", + ) + positives = [ + np.asarray(track) + for track, label in zip(positive_tracks, labels) + if bool(label) + ] + ambient = [np.asarray(track) for track in ambient_tracks] + if positives and ambient: + return positive_mode, ambient_mode, positives, ambient + raise RuntimeError( + "No suitable validation/testing data was found for detector calibration." + ) + + +def _predict_tracks( + model: Model, + tracks: Sequence[np.ndarray], + label: str, +) -> list[np.ndarray]: + predictions: list[np.ndarray] = [] + total = len(tracks) + print(f"→ Running streaming inference on {total} {label} track(s)") + for index, track in enumerate(tracks, start=1): + values = np.asarray(model.predict_spectrogram(track), dtype=np.float32) + predictions.append(values) + if index == total or index % 25 == 0: + print(f" {label}: {index}/{total}") + return predictions + + +def main() -> int: + args = parse_args() + window_sizes = _parse_window_sizes(args.window_sizes) + if args.cutoff_step <= 0: + raise ValueError("cutoff-step must be > 0") + if args.cutoff_max < args.cutoff_min: + raise ValueError("cutoff-max must be >= cutoff-min") + + config_path = Path(args.training_config) + model_path = Path(args.model) + output_path = Path(args.output) + + if not config_path.exists(): + raise FileNotFoundError(f"Training config not found: {config_path}") + if not model_path.exists(): + raise FileNotFoundError(f"Streaming TFLite model not found: {model_path}") + + cutoffs = np.arange( + args.cutoff_min, + args.cutoff_max + (args.cutoff_step / 2.0), + args.cutoff_step, + dtype=np.float32, + ) + cutoffs = np.clip(cutoffs, 0.0, 1.0) + cutoffs = np.unique(np.round(cutoffs, 4)) + + print("===== Detector Calibration =====") + print(f"→ Model: {model_path}") + print(f"→ Training config: {config_path}") + print( + f"→ Evaluating window sizes {window_sizes} with target <= " + f"{args.target_faph:.2f} false accepts/hour" + ) + + config = _load_config(config_path) + config["flags"] = config.get("flags", {}) + handler = FeatureHandler(config) + + positive_mode, ambient_mode, positive_tracks, ambient_tracks = _load_eval_sets( + handler, config + ) + + print( + f"→ Using {positive_mode} positives ({len(positive_tracks)}) and " + f"{ambient_mode} ambient tracks ({len(ambient_tracks)})" + ) + + model = Model(str(model_path), stride=config["stride"]) + positive_predictions = _predict_tracks(model, positive_tracks, "positive") + ambient_predictions = _predict_tracks(model, ambient_tracks, "ambient") + + candidates: list[dict[str, float]] = [] + best_by_window: list[dict[str, float]] = [] + step_seconds = config["window_step_ms"] / 1000.0 + + for window_size in window_sizes: + ambient_averages = [ + _moving_average(track, window_size) for track in ambient_predictions + ] + positive_maxima = [] + for track in positive_predictions: + search = ( + track[args.positive_skip_slices :] + if track.size > args.positive_skip_slices + else track + ) + averaged = _moving_average(search, window_size) + if averaged.size == 0: + averaged = _moving_average(track, window_size) + positive_maxima.append(float(np.max(averaged)) if averaged.size else 0.0) + + positive_maxima_array = np.asarray(positive_maxima, dtype=np.float32) + recall_by_cutoff = np.mean( + positive_maxima_array[None, :] > cutoffs[:, None], axis=1 + ) + faph_by_cutoff, ambient_hours = _compute_false_accepts_per_hour( + ambient_averages, + cutoffs, + args.cooldown_slices, + stride=config["stride"], + step_seconds=step_seconds, + ) + + window_candidates = [] + for cutoff, recall, faph in zip(cutoffs, recall_by_cutoff, faph_by_cutoff): + candidate = { + "probability_cutoff": float(round(float(cutoff), 2)), + "sliding_window_size": int(window_size), + "recall": float(recall), + "false_accepts_per_hour": float(faph), + "ambient_hours": float(ambient_hours), + } + candidates.append(candidate) + window_candidates.append(candidate) + + best_window, _ = _select_best_candidate(window_candidates, args.target_faph) + best_by_window.append(best_window) + print( + " window={window}: cutoff={cutoff:.2f}; recall={recall:.2%}; " + "ambient_faph={faph:.3f}".format( + window=window_size, + cutoff=best_window["probability_cutoff"], + recall=best_window["recall"], + faph=best_window["false_accepts_per_hour"], + ) + ) + + best, selected_limit = _select_best_candidate(candidates, args.target_faph) + if best["false_accepts_per_hour"] > args.target_faph + 1e-9: + print( + "⚠️ No candidate met the target false accepts/hour budget; " + "using the best fallback operating point." + ) + + print( + "✓ Selected cutoff={cutoff:.2f}, window={window}, recall={recall:.2%}, " + "ambient_faph={faph:.3f}".format( + cutoff=best["probability_cutoff"], + window=best["sliding_window_size"], + recall=best["recall"], + faph=best["false_accepts_per_hour"], + ) + ) + + output = { + "probability_cutoff": best["probability_cutoff"], + "sliding_window_size": best["sliding_window_size"], + "target_false_accepts_per_hour": float(args.target_faph), + "selected_false_accepts_per_hour_limit": ( + None if math.isinf(selected_limit) else float(selected_limit) + ), + "selected_metrics": { + "recall": round(best["recall"], 6), + "false_accepts_per_hour": round(best["false_accepts_per_hour"], 6), + "ambient_hours": round(best["ambient_hours"], 6), + }, + "evaluation": { + "positive_dataset": positive_mode, + "ambient_dataset": ambient_mode, + "positive_tracks": len(positive_tracks), + "ambient_tracks": len(ambient_tracks), + "cooldown_slices": int(args.cooldown_slices), + "positive_skip_slices": int(args.positive_skip_slices), + "window_sizes": window_sizes, + "cutoff_min": round(float(cutoffs[0]), 4), + "cutoff_max": round(float(cutoffs[-1]), 4), + "cutoff_step": float(args.cutoff_step), + }, + "per_window_best": best_by_window, + "generated_at": datetime.now(timezone.utc).isoformat(), + } + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(output, indent=2) + "\n", encoding="utf-8") + print(f"📝 Wrote calibration to {output_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cli/setup_audioset b/cli/setup_audioset index 00c62a1..5af3a02 100755 --- a/cli/setup_audioset +++ b/cli/setup_audioset @@ -130,6 +130,73 @@ print(f" AudioSet complete ({ok} ok, {skipped} skipped, {len(audioset_bad)} fa EOF } +converter_from_dataset_api() { + # shellcheck source=/dev/null + source "${DATA_DIR}/.venv/bin/activate" + + python - "${AUDIO16K_DIR}" <<-'EOF' +import sys +from pathlib import Path + +import librosa +import numpy as np +import scipy.io.wavfile +from datasets import load_dataset + +def write_wav(dst: Path, data: np.ndarray, sr: int): + dst.parent.mkdir(parents=True, exist_ok=True) + x = np.clip(data, -1.0, 1.0) + scipy.io.wavfile.write(dst, sr, (x * 32767).astype(np.int16)) + +audioset_out = Path(sys.argv[1]) + +print(" AudioSet FLAC tarballs are unavailable; using Hugging Face datasets API instead.") +dataset = load_dataset( + "agkphysics/AudioSet", + "balanced", + split="train", + streaming=True, +) + +audioset_bad = [] +ok = 0 +skipped = 0 +heartbeat_every = 250 + +for idx, sample in enumerate(dataset, start=1): + try: + video_id = str(sample.get("video_id") or f"audioset_{idx:06d}") + outfile = audioset_out / f"{video_id}.wav" + if outfile.exists(): + skipped += 1 + continue + + audio = sample.get("audio") or {} + y = np.asarray(audio.get("array")) + sr = int(audio.get("sampling_rate") or 0) + if y.size == 0 or sr <= 0: + raise ValueError("missing decoded audio") + if y.ndim > 1: + y = np.mean(y, axis=-1) + if sr != 16000: + y = librosa.resample(y.astype(np.float32), orig_sr=sr, target_sr=16000) + if y.size == 0: + raise ValueError("empty audio") + write_wav(outfile, y, 16000) + ok += 1 + except Exception as exc: + audioset_bad.append(f"{sample.get('video_id', idx)}:{exc}") + + if idx == 1 or (idx % heartbeat_every) == 0: + print(f" AudioSet API progress: {idx} clips processed (ok={ok}, skipped={skipped}, failed={len(audioset_bad)})") + +if audioset_bad: + (audioset_out / "audioset_corrupted_files.log").write_text("\n".join(audioset_bad)) + +print(f" AudioSet complete via datasets API ({ok} ok, {skipped} skipped, {len(audioset_bad)} failed)") +EOF +} + expected_filecount=$(get_total_filecount filecounts) actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || : write_filecount=false @@ -139,40 +206,44 @@ if [ "${actual_filecount}" -ne 0 ] ; then echo " Existing ${AUDIO16K_DIR} present (${actual_filecount} wav); skipping extract/convert" else dl=$(find_rev) - [ -n "$dl" ] || { echo " Could not locate an AudioSet revision with FLAC tarballs still present on HF." ; exit 1 ; } - rev=${dl%%,*} - pattern=${dl##*,} + if [ -z "$dl" ] ; then + rm -rf "${AUDIO16K_DIR}/audioset_corrupted_files.log" || : + converter_from_dataset_api + else + rev=${dl%%,*} + pattern=${dl##*,} - echo " Checking 10 tarballs" - for i in {0..9} ; do - fname="downloads/bal_train0${i}.tar" - if [ ! -f "${fname}" ] ; then - echo " Downloading bal_train0${i}.tar" - url="${AUDIO_URL}/${rev}/${pattern}${i}.tar" - curl -L -s --fail "${url}" -o "${fname}" || { echo "Could not fetch ${fname} at rev ${rev}; continuing." ; continue ; } + echo " Checking 10 tarballs" + for i in {0..9} ; do + fname="downloads/bal_train0${i}.tar" + if [ ! -f "${fname}" ] ; then + echo " Downloading bal_train0${i}.tar" + url="${AUDIO_URL}/${rev}/${pattern}${i}.tar" + curl -L -s --fail "${url}" -o "${fname}" || { echo "Could not fetch ${fname} at rev ${rev}; continuing." ; continue ; } + fi + + tarball_filecount=$(tar -tvf "${fname}" | wc -l ) + filecounts["bal_train0${i}.tar"]=${tarball_filecount} + write_filecount=true + + echo " Untarring bal_train0${i}.tar" + tar -xf "${fname}" -C "${AUDIO_DIR}" + if "${CLEANUP_ARCHIVES}" && [ -f "${fname}" ] ; then + echo " Cleaning up bal_train0${i}.tar" + rm -rf "${fname}" + fi + done + + rm -rf "${AUDIO16K_DIR}/audioset_corrupted_files.log" || : + converter + + # Recompute counts and warn (but do not fail) + expected_filecount=$(get_total_filecount filecounts) + actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || : + if [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then + echo " Converted file count(${actual_filecount}) != expected file count(${expected_filecount})" >&2 + echo " WARNING: mismatch is expected if some AudioSet files are corrupted; continuing." >&2 fi - - tarball_filecount=$(tar -tvf "${fname}" | wc -l ) - filecounts["bal_train0${i}.tar"]=${tarball_filecount} - write_filecount=true - - echo " Untarring bal_train0${i}.tar" - tar -xf "${fname}" -C "${AUDIO_DIR}" - if "${CLEANUP_ARCHIVES}" && [ -f "${fname}" ] ; then - echo " Cleaning up bal_train0${i}.tar" - rm -rf "${fname}" - fi - done - - rm -rf "${AUDIO16K_DIR}/audioset_corrupted_files.log" || : - converter - - # Recompute counts and warn (but do not fail) - expected_filecount=$(get_total_filecount filecounts) - actual_filecount=$(find "${AUDIO16K_DIR}" -name "*.wav" 2>/dev/null | wc -l) || : - if [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then - echo " Converted file count(${actual_filecount}) != expected file count(${expected_filecount})" >&2 - echo " WARNING: mismatch is expected if some AudioSet files are corrupted; continuing." >&2 fi fi @@ -196,4 +267,4 @@ if "${CLEANUP_INTERMEDIATE_FILES}" && [ -d "${AUDIO_DIR}" ] ; then fi echo " Audioset complete" -exit 0 \ No newline at end of file +exit 0 diff --git a/cli/setup_fma b/cli/setup_fma index fe7f090..f9bbca5 100755 --- a/cli/setup_fma +++ b/cli/setup_fma @@ -27,8 +27,11 @@ cd "${DATA_DIR}/training_datasets" echo "***** Checking FMA *****" -AUDIO_URL="https://huggingface.co/datasets/mchl914/fma_xsmall/resolve/main/fma_xs.zip" -AUDIO_ZIPFILE="fma_xs.zip" +AUDIO_URLS=( + "https://os.unil.cloud.switch.ch/fma/fma_small.zip" + "https://huggingface.co/datasets/mchl914/fma_xsmall/resolve/main/fma_xs.zip" +) +AUDIO_ZIPFILE="fma_small.zip" AUDIO_ZIP="./downloads/${AUDIO_ZIPFILE}" AUDIO_DIR="fma" mkdir -p "${AUDIO_DIR}" || : @@ -81,6 +84,52 @@ EOF } +extract_zip_with_python() { + local zip_path="$1" + local dest_dir="$2" + + "${DATA_DIR}/.venv/bin/python" - "${zip_path}" "${dest_dir}" <<-'EOF' +import sys +import zipfile +from pathlib import Path +from tqdm import tqdm + +zip_path = Path(sys.argv[1]) +dest_dir = Path(sys.argv[2]) + +if (not zip_path.exists()) or zip_path.stat().st_size == 0: + raise SystemExit(f"Archive missing or empty: {zip_path}") + +with zipfile.ZipFile(zip_path, "r") as zf: + members = zf.infolist() + size_gb = zip_path.stat().st_size / (1024 ** 3) + print(f" Extracting {zip_path.name} ({len(members)} entries, {size_gb:.1f} GiB)...") + for member in tqdm(members, desc=" FMA zip extract", unit="file"): + zf.extract(member, dest_dir) +EOF +} + +download_with_fallbacks() { + local output="$1" + shift + local urls=( "$@" ) + local rc=1 + + for url in "${urls[@]}" ; do + for attempt in 1 2 3 4 ; do + curl -sfL "${url}" -o "${output}" && [ -s "${output}" ] && return 0 + rc=$? + rm -f "${output}" || : + if [ "${attempt}" -lt 4 ] ; then + echo " Retry ${attempt}/3 after download failure" + sleep $(( attempt * 2 )) + fi + done + done + + return "${rc}" +} + expected_filecount=${filecounts[${AUDIO_ZIPFILE}]} actual_filecount=$(find ${AUDIO16K_DIR} -name '*.wav' 2>/dev/null | wc -l) || : write_filecount=false @@ -92,13 +141,16 @@ else if [ "${actual_filecount}" -eq 0 ] || [ "${actual_filecount}" -ne "${expected_filecount}" ] ; then if [ ! -f "${AUDIO_ZIP}" ] ; then echo " Downloading ${AUDIO_ZIPFILE}" - curl -sfL "${AUDIO_URL}" -o "${AUDIO_ZIP}" + download_with_fallbacks "${AUDIO_ZIP}" "${AUDIO_URLS[@]}" || { + echo " Failed to download ${AUDIO_ZIPFILE} from all configured sources." >&2 + exit 1 + } fi rm -rf "${AUDIO_DIR}" || : mkdir "${AUDIO_DIR}" - echo " Unzipping ${AUDIO_ZIPFILE}" - unzip -q -d "${AUDIO_DIR}" "${AUDIO_ZIP}" + echo " Extracting ${AUDIO_ZIPFILE}" + extract_zip_with_python "${AUDIO_ZIP}" "${AUDIO_DIR}" fi if "${CLEANUP_ARCHIVES}" && [ -f "${AUDIO_ZIP}" ] ; then echo " Cleaning up ${AUDIO_ZIPFILE}" @@ -128,4 +180,3 @@ fi echo " FMA complete" exit 0 - diff --git a/cli/setup_python_venv b/cli/setup_python_venv index 5ea750a..1122287 100755 --- a/cli/setup_python_venv +++ b/cli/setup_python_venv @@ -242,29 +242,7 @@ if [ ! -s "${MODEL_FILE}.json" ] ; then curl -sfL "${MODEL_URL}.json" -o "${MODEL_FILE}.json" fi -# --- Dutch ONNX voices (single-speaker, used with --language=nl) --- -# Working Dutch voices: pim, ronnie (nl_NL) and nathalie (nl_BE). -# nl_NL-mls-medium is intentionally excluded (known Piper issue: outputs gibberish). -HF_VOICES="https://huggingface.co/rhasspy/piper-voices/resolve/main" -declare -a NL_VOICES=( - "nl/nl_NL/pim/medium/nl_NL-pim-medium" - "nl/nl_NL/ronnie/medium/nl_NL-ronnie-medium" - "nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium" -) -echo " ===== Checking Dutch Piper voices =====" -for voice_path in "${NL_VOICES[@]}" ; do - voice_name="$(basename "${voice_path}")" - onnx_file="${VOICES_DIR}/${voice_name}.onnx" - json_file="${VOICES_DIR}/${voice_name}.onnx.json" - if [ ! -f "${onnx_file}" ] ; then - echo " Downloading ${voice_name}.onnx" - curl -sfL "${HF_VOICES}/${voice_path}.onnx?download=true" -o "${onnx_file}" - fi - if [ ! -f "${json_file}" ] ; then - echo " Downloading ${voice_name}.onnx.json" - curl -sfL "${HF_VOICES}/${voice_path}.onnx.json?download=true" -o "${json_file}" - fi -done +echo " Non-English Piper voices will be downloaded on demand for the selected language." ${GPU} && onnxgpu='-gpu[cuda]' || onnxgpu="" echo " ===== Installing onnxruntime${onnxgpu} =====" diff --git a/cli/wake_word_sample_trainer b/cli/wake_word_sample_trainer index 0ce01d8..f4fcda2 100644 --- a/cli/wake_word_sample_trainer +++ b/cli/wake_word_sample_trainer @@ -317,6 +317,7 @@ fi TRAINING_DONE="false" +echo "🏋️ Starting model training and TFLite export (this is the longest stage)…" if run_attempt "Attempt 1/3: GPU training (default runtime profile)" ; then echo "✅ Training complete (GPU path)." TRAINING_DONE="true" @@ -386,12 +387,24 @@ if [ "${TRAINING_DONE}" != "true" ]; then fi source_path="${WORK_DIR}/trained_models/wakeword/tflite_stream_state_internal_quant/stream_state_internal_quant.tflite" +calibration_path="${WORK_DIR}/trained_models/wakeword/tflite_stream_state_internal_quant/detection_calibration.json" if [ ! -f "${source_path}" ] ; then echo "Output model not found! Training didn't complete successfully. See ${TRAIN_LOG}" exit 1 fi +echo "🎯 Calibrating detector settings for on-device use…" +if "${PYTHON_BIN:-python}" "${PROGDIR}/calibrate_detector.py" \ + --training-config "${WORK_DIR}/trained_models/wakeword/training_config.yaml" \ + --model "${source_path}" \ + --output "${calibration_path}"; then + echo "✅ Detector calibration complete." +else + echo "⚠️ Detector calibration failed; packaging with default detector settings." + rm -f "${calibration_path}" || : +fi + cp "${WORK_DIR}/trained_models/wakeword/model_summary.txt" "${OUTPUT_DIR}/logs/" || : cp -a "${WORK_DIR}/trained_models/wakeword/logs/train" "${OUTPUT_DIR}/logs/" || : cp -a "${WORK_DIR}/trained_models/wakeword/logs/validation" "${OUTPUT_DIR}/logs/" || : @@ -404,24 +417,49 @@ tflite_path="${OUTPUT_DIR}/${tflite_filename}" cp "${source_path}" "${tflite_path}" json_path="${OUTPUT_DIR}/${wake_word_filename}.json" -cat <<-EOF > "${json_path}" -{ +export WAKE_WORD_TITLE LANGUAGE JSON_PATH="${json_path}" TFLITE_FILENAME="${tflite_filename}" CALIBRATION_PATH="${calibration_path}" +echo "📦 Packaging final model artifacts…" +"${PYTHON_BIN:-python}" - <<'PY' +import json +import os +from pathlib import Path + +json_path = Path(os.environ["JSON_PATH"]) +calibration_path = Path(os.environ.get("CALIBRATION_PATH", "")) +language = (os.environ.get("LANGUAGE", "en") or "en").strip().lower() +probability_cutoff = 0.97 +sliding_window_size = 5 + +if calibration_path.exists(): + try: + calibration = json.loads(calibration_path.read_text(encoding="utf-8")) + probability_cutoff = float(calibration.get("probability_cutoff", probability_cutoff)) + sliding_window_size = int(calibration.get("sliding_window_size", sliding_window_size)) + print( + f"🎯 Using calibrated detector settings: " + f"cutoff={probability_cutoff:.2f}, window={sliding_window_size}" + ) + except Exception as exc: + print(f"⚠️ Failed to read detector calibration ({exc}); using defaults.") + +meta = { "type": "micro", - "wake_word": "${WAKE_WORD_TITLE}", + "wake_word": os.environ["WAKE_WORD_TITLE"], "author": "Tater Totterson", "website": "https://github.com/TaterTotterson/microWakeWord-Trainer-Nvidia-Docker.git", - "model": "${tflite_filename}", - "trained_languages": ["en"], + "model": os.environ["TFLITE_FILENAME"], + "trained_languages": [language], "version": 2, "micro": { - "probability_cutoff": 0.97, - "sliding_window_size": 5, + "probability_cutoff": round(probability_cutoff, 2), + "sliding_window_size": sliding_window_size, "feature_step_size": 10, "tensor_arena_size": 30000, - "minimum_esphome_version": "2024.7.0" - } + "minimum_esphome_version": "2024.7.0", + }, } -EOF +json_path.write_text(json.dumps(meta, indent=4) + "\n", encoding="utf-8") +PY echo "Name: ${WAKE_WORD_TITLE}" echo "Model: ${tflite_path}" diff --git a/dockerfile b/dockerfile index bf3bfeb..37bb910 100644 --- a/dockerfile +++ b/dockerfile @@ -22,7 +22,7 @@ COPY --chown=root:root --chmod=0755 .bashrc /root/ # Root-level entrypoints COPY --chown=root:root --chmod=0755 \ train_wake_word \ - run_recorder.sh \ + run.sh \ trainer_server.py \ requirements.txt \ /root/mww-scripts/ @@ -37,4 +37,4 @@ RUN chmod -R a+x /root/mww-scripts/cli COPY --chown=root:root --chmod=0644 static/index.html /root/mww-scripts/static/index.html # trainer server -CMD ["/bin/bash", "-lc", "/root/mww-scripts/run_recorder.sh"] +CMD ["/bin/bash", "-lc", "/root/mww-scripts/run.sh"] diff --git a/run_recorder.sh b/run.sh similarity index 97% rename from run_recorder.sh rename to run.sh index 75d1ec9..1ef1fa8 100644 --- a/run_recorder.sh +++ b/run.sh @@ -8,7 +8,7 @@ DATA_DIR="${DATA_DIR:-/data}" HOST="${REC_HOST:-0.0.0.0}" PORT="${REC_PORT:-8888}" -# Keep recorder deps separate from training venv +# Keep trainer UI deps separate from the training venv VENV_DIR="${DATA_DIR}/.recorder-venv" PY="${VENV_DIR}/bin/python" PIP="${PY} -m pip" diff --git a/trainer_server.py b/trainer_server.py index 4e9eb70..cb69cf3 100644 --- a/trainer_server.py +++ b/trainer_server.py @@ -42,6 +42,12 @@ PIPER_VOICES_ROOT_URL = os.environ.get( "https://huggingface.co/rhasspy/piper-voices/resolve/main", ) PIPER_CATALOG_CACHE_TTL_SECONDS = int(os.environ.get("PIPER_CATALOG_CACHE_TTL_SECONDS", "900")) +PIPER_CATALOG_CACHE_FILE = Path( + os.environ.get( + "PIPER_CATALOG_CACHE_FILE", + str(ROOT_DIR / ".cache" / "piper_voices_catalog.json"), + ) +).resolve() DATASET_CLEANUP_ARCHIVES = os.environ.get("REC_DATASET_CLEANUP_ARCHIVES", "false").lower() in ("1", "true", "yes", "y") DATASET_CLEANUP_INTERMEDIATE = os.environ.get("REC_DATASET_CLEANUP_INTERMEDIATE_FILES", "false").lower() in ("1", "true", "yes", "y") @@ -177,6 +183,27 @@ def _fetch_piper_catalog() -> Optional[Dict[str, Any]]: return data if isinstance(data, dict) else None +def _read_cached_piper_catalog_file() -> Optional[Dict[str, Any]]: + try: + if not PIPER_CATALOG_CACHE_FILE.exists(): + return None + data = json.loads(PIPER_CATALOG_CACHE_FILE.read_text(encoding="utf-8")) + return data if isinstance(data, dict) else None + except Exception: + return None + + +def _write_cached_piper_catalog_file(data: Dict[str, Any]): + try: + PIPER_CATALOG_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True) + PIPER_CATALOG_CACHE_FILE.write_text( + json.dumps(data, ensure_ascii=True), + encoding="utf-8", + ) + except Exception: + pass + + def _load_piper_catalog() -> Optional[Dict[str, Any]]: now = time.time() with PIPER_CATALOG_LOCK: @@ -185,6 +212,8 @@ def _load_piper_catalog() -> Optional[Dict[str, Any]]: if cached is not None and (now - fetched_at) < PIPER_CATALOG_CACHE_TTL_SECONDS: return cached + disk_cached = _read_cached_piper_catalog_file() + try: fresh = _fetch_piper_catalog() except Exception: @@ -194,9 +223,15 @@ def _load_piper_catalog() -> Optional[Dict[str, Any]]: if fresh is not None: PIPER_CATALOG_CACHE["entries"] = fresh PIPER_CATALOG_CACHE["fetched_at"] = now + _write_cached_piper_catalog_file(fresh) return fresh - if PIPER_CATALOG_CACHE.get("entries") is None: - PIPER_CATALOG_CACHE["entries"] = {} + if PIPER_CATALOG_CACHE.get("entries") is not None: + return PIPER_CATALOG_CACHE.get("entries") + if disk_cached is not None: + PIPER_CATALOG_CACHE["entries"] = disk_cached + PIPER_CATALOG_CACHE["fetched_at"] = now + return disk_cached + PIPER_CATALOG_CACHE["entries"] = {} PIPER_CATALOG_CACHE["fetched_at"] = now return PIPER_CATALOG_CACHE.get("entries")