This commit is contained in:
MasterPhooey
2026-03-10 08:05:36 -05:00
parent 94903783cb
commit 7b92de81f9
3 changed files with 21 additions and 10 deletions

View File

@@ -98,13 +98,14 @@ export TF_XLA_FLAGS="${TF_XLA_FLAGS:---tf_xla_auto_jit=0}"
if ${IS_BLACKWELL} ; then
# TF 2.20 + Blackwell is often unstable with cuda_malloc_async.
unset TF_GPU_ALLOCATOR
[ -z "${XLA_FLAGS:-}" ] && export XLA_FLAGS="--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found"
echo " Blackwell detected (compute capability ${GPU_COMPUTE_CAPABILITY}): using compatibility GPU defaults."
else
export TF_GPU_ALLOCATOR="${TF_GPU_ALLOCATOR:-cuda_malloc_async}"
unset XLA_FLAGS
fi
# Enable driver-side PTX JIT fallback when ptxas/nvlink are unavailable.
[ -z "${XLA_FLAGS:-}" ] && export XLA_FLAGS="--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found"
export NVIDIA_TF32_OVERRIDE=1
export TF_CUDNN_WORKSPACE_LIMIT_IN_MB=512
export GLOG_minloglevel=2