From 7b92de81f9cd09341732303495d07e406776c047 Mon Sep 17 00:00:00 2001 From: MasterPhooey Date: Tue, 10 Mar 2026 08:05:36 -0500 Subject: [PATCH] ptx fix --- cli/setup_python_venv | 9 ++++++++- cli/wake_word_sample_trainer | 17 ++++++++++------- train_wake_word | 5 +++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cli/setup_python_venv b/cli/setup_python_venv index 38e6d14..51aefbc 100755 --- a/cli/setup_python_venv +++ b/cli/setup_python_venv @@ -72,7 +72,14 @@ if ${GPU} ; then fi VENV="${DATA_DIR}/.venv" -[ -n "${VIRTUAL_ENV}" ] && deactivate +if [ -n "${VIRTUAL_ENV:-}" ] && [ "${VIRTUAL_ENV}" != "${VENV}" ] ; then + if command -v deactivate >/dev/null 2>&1 ; then + deactivate || : + else + # Recorder process can inherit VIRTUAL_ENV without the shell function. + unset VIRTUAL_ENV + fi +fi if [ -n "${PYTHON}" ] ; then PYTHONS=( "${PYTHON}" ) diff --git a/cli/wake_word_sample_trainer b/cli/wake_word_sample_trainer index 33fdf64..0ce01d8 100644 --- a/cli/wake_word_sample_trainer +++ b/cli/wake_word_sample_trainer @@ -82,14 +82,14 @@ ALLOW_CPU_FALLBACK="$(normalize_bool "${MWW_ALLOW_CPU_FALLBACK:-${ALLOW_CPU_FALL if [ "${IS_BLACKWELL}" = "true" ]; then echo "ℹ️ Blackwell GPU detected (compute capability ${GPU_COMPUTE_CAPABILITY})." echo "ℹ️ Using GPU compatibility retries; CPU fallback is ${ALLOW_CPU_FALLBACK} (override with MWW_ALLOW_CPU_FALLBACK=true|false)." +fi - # Force driver PTX fallback when XLA needs ptxas. - if [ -z "${XLA_FLAGS:-}" ]; then - export XLA_FLAGS="--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found" - echo "ℹ️ Setting XLA_FLAGS=${XLA_FLAGS}" - else - echo "ℹ️ Using user-provided XLA_FLAGS=${XLA_FLAGS}" - fi +# Enable driver-side PTX JIT fallback when ptxas/nvlink are unavailable. +if [ -z "${XLA_FLAGS:-}" ]; then + export XLA_FLAGS="--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found" + echo "ℹ️ Setting XLA_FLAGS=${XLA_FLAGS}" +else + echo "ℹ️ Using user-provided XLA_FLAGS=${XLA_FLAGS}" fi check_directories() { @@ -251,6 +251,9 @@ GPU_FALLBACK_MARKERS=( "cuda_error_out_of_memory" "cuda_error_invalid_handle" "culaunchkernel" + "no ptx compilation provider is available" + "couldn't find a suitable version of ptxas" + "couldn't find a suitable version of nvlink" "failed to allocate" "cudnn" "cublas" diff --git a/train_wake_word b/train_wake_word index ddb058d..aa47be5 100755 --- a/train_wake_word +++ b/train_wake_word @@ -98,13 +98,14 @@ export TF_XLA_FLAGS="${TF_XLA_FLAGS:---tf_xla_auto_jit=0}" if ${IS_BLACKWELL} ; then # TF 2.20 + Blackwell is often unstable with cuda_malloc_async. unset TF_GPU_ALLOCATOR - [ -z "${XLA_FLAGS:-}" ] && export XLA_FLAGS="--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found" echo "ℹ️ Blackwell detected (compute capability ${GPU_COMPUTE_CAPABILITY}): using compatibility GPU defaults." else export TF_GPU_ALLOCATOR="${TF_GPU_ALLOCATOR:-cuda_malloc_async}" - unset XLA_FLAGS fi +# Enable driver-side PTX JIT fallback when ptxas/nvlink are unavailable. +[ -z "${XLA_FLAGS:-}" ] && export XLA_FLAGS="--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found" + export NVIDIA_TF32_OVERRIDE=1 export TF_CUDNN_WORKSPACE_LIMIT_IN_MB=512 export GLOG_minloglevel=2