This commit is contained in:
MasterPhooey
2026-01-21 06:14:32 -06:00
parent 423bbd15f5
commit 98a087c87d
2 changed files with 32 additions and 19 deletions

View File

@@ -178,24 +178,31 @@ echo " ===== Installing keras ====="
# keras 3.13 has "issues" so we need to back down to 3.12.
pip_install "keras==3.12.0"
CUDA_DATA_DIR="${DATA_DIR}/cuda"
LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice"
mkdir -p "${LIBDEVICE_DIR}"
TRITON_LIBDEVICE="$(
python - <<'PY'
import glob
import sys
# -----------------------------------------------------------------------------
# Optional CUDA data dir (GPU-only)
# Some stacks expect a CUDA "nvvm/libdevice" tree. We create one in /data/cuda
# and link Triton's libdevice if it exists. This is safe and does NOT enable
# any extra XLA flags by itself.
# -----------------------------------------------------------------------------
if ${GPU} ; then
CUDA_DATA_DIR="${DATA_DIR}/cuda"
LIBDEVICE_DIR="${CUDA_DATA_DIR}/nvvm/libdevice"
mkdir -p "${LIBDEVICE_DIR}"
TRITON_LIBDEVICE="$(
python - <<'PY'
import glob
paths = glob.glob("**/site-packages/triton/backends/nvidia/lib/libdevice.10.bc", recursive=True)
print(paths[0] if paths else "", end="")
PY
)"
)"
if [ -n "${TRITON_LIBDEVICE}" ] ; then
ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc"
echo " Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}"
else
echo " Triton libdevice.10.bc not found; XLA may require --xla_gpu_cuda_data_dir"
if [ -n "${TRITON_LIBDEVICE}" ] ; then
ln -sf "${TRITON_LIBDEVICE}" "${LIBDEVICE_DIR}/libdevice.10.bc"
echo " Linked Triton libdevice.10.bc to ${LIBDEVICE_DIR}"
else
echo " Triton libdevice.10.bc not found (ok)"
fi
fi
"${PROGDIR}/test_python" --data-dir="${DATA_DIR}"
@@ -205,4 +212,4 @@ END_TS=$EPOCHSECONDS
echo "Run 'source ${VENV}/bin/activate' to activate the new virtualenv in the current shell."
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"
print_elapsed_time "${START_TS}" "${END_TS}" "Python package installation complete"