Update notebook to fix issues with environment inheritance.

Two issues: * The notebook cell that actually runs model_train_eval was running it in a subprocess so while it inherited environment variables from the running python kernel, it couldn't inherit the tensorflow environment from it. This resulted in the `set_memory_growth(g, True)` and `mixed_precision.set_global_policy("mixed_float16")` calls in the previous cell to be lost. * TFlite doesn't support "mixed_float16" anyway and causes the model export to fail spectacularly so it's kind of a good thing it wasn't being applied. So.. * The tensorflow environment variable and memory_growth setting code was moved from the notebook cell that also wrote the config yaml to the next cell which does the train and test. This leaves the "config" cell to just write the yaml. This is really just a cosmetic change to group functionality better. * The code that tried to set "mixed_float16" has been removed but since setting memory_growth to true is a good thing, the model_train_eval is now run using runpy instead in a subprocess. This way it's run in the same python kernel instance and tensorflow environment as the rest of the notebook and inherits the memory_growth setting. Resolves: #14
2026-06-12 20:10:19 -06:00 · 2025-12-20 10:22:33 -07:00
parent 5487f0869e
commit dc92dc7d8b
1 changed files with 52 additions and 64 deletions
--- a/microWakeWord_training_notebook.ipynb
+++ b/microWakeWord_training_notebook.ipynb
@@ -742,36 +742,9 @@
   },
   "outputs": [],
   "source": [
    "# GPU memory config (set env BEFORE importing TF)\n",
    "import os, sys, gc\n",
    "\n",
    "if \"tensorflow\" not in sys.modules:\n",
    "    os.environ[\"TF_FORCE_GPU_ALLOW_GROWTH\"] = \"true\"              # grow as needed\n",
    "    os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\"          # modern CUDA allocator\n",
    "    os.environ[\"XLA_FLAGS\"] = \"--xla_gpu_cuda_data_dir=/usr/local/cuda\"\n",
    "    os.environ[\"TF_XLA_FLAGS\"] = \"--tf_xla_auto_jit=0\"            # disable XLA JIT (more stable mem)\n",
    "import tensorflow as tf\n",
    "\n",
    "# Per-device memory growth (belt + suspenders)\n",
    "for g in tf.config.list_physical_devices(\"GPU\"):\n",
    "    try:\n",
    "        tf.config.experimental.set_memory_growth(g, True)\n",
    "    except Exception:\n",
    "        pass\n",
    "print(\"GPUs:\", tf.config.list_physical_devices(\"GPU\"))\n",
    "gc.collect()\n",
    "\n",
    "# Optional but recommended: mixed precision halves activation memory\n",
    "try:\n",
    "    from tensorflow.keras import mixed_precision\n",
    "    mixed_precision.set_global_policy(\"mixed_float16\")\n",
    "    print(\"Mixed precision policy:\", mixed_precision.global_policy())\n",
    "except Exception as e:\n",
    "    print(\"Mixed precision not enabled:\", e)\n",
    "\n",
    "# --- Save a yaml config that controls the training process ---\n",
    "\n",
-    "import yaml\n",
+    "import os, sys, yaml\n",
    "\n",
    "config = {}\n",
    "\n",
@@ -809,7 +782,7 @@
    "with open(\"training_parameters.yaml\", \"w\") as f:\n",
    "    yaml.dump(config, f)\n",
    "\n",
-    "print(\"✅ Wrote training_parameters.yaml (batch_size=16) with allow_growth, cuda_malloc_async, XLA JIT OFF, mixed precision ON.\")"
+    "print(\"✅ Wrote training_parameters.yaml (batch_size=16)\")"
   ]
  },
  {
@@ -822,44 +795,59 @@
   "source": [
    "# Train + export (GPU-friendly env + stable flags)\n",
    "\n",
-    "import os, sys\n",
+    "import os, sys, gc, runpy\n",
    "\n",
    "# --- Runtime env (inherited by the subprocess we're about to launch) ---\n",
    "os.environ.setdefault(\"LD_LIBRARY_PATH\",\n",
    "    \"/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/lib/x86_64-linux-gnu:\" +\n",
    "    os.environ.get(\"LD_LIBRARY_PATH\",\"\")\n",
    ")\n",
    "os.environ.setdefault(\"TF_CPP_MIN_LOG_LEVEL\", \"2\")            # quieter logs\n",
    "os.environ.setdefault(\"TF_FORCE_GPU_ALLOW_GROWTH\", \"true\")    # grow VRAM as needed\n",
    "os.environ.setdefault(\"TF_GPU_ALLOCATOR\", \"cuda_malloc_async\")# modern allocator\n",
    "os.environ.setdefault(\"XLA_FLAGS\", \"--xla_gpu_cuda_data_dir=/usr/local/cuda\")\n",
    "os.environ.setdefault(\"TF_XLA_FLAGS\", \"--tf_xla_auto_jit=0\")  # disable XLA JIT (more stable)\n",
    "os.environ.setdefault(\"NVIDIA_TF32_OVERRIDE\", \"1\")            # allow TF32 (perf/VRAM win on Ampere+)\n",
    "\n",
    "if \"tensorflow\" not in sys.modules:\n",
    "    os.environ[\"TF_FORCE_GPU_ALLOW_GROWTH\"] = \"true\"              # grow as needed\n",
    "    os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\"          # modern CUDA allocator\n",
    "    os.environ[\"TF_XLA_FLAGS\"] = \"--tf_xla_auto_jit=0\"            # disable XLA JIT (more stable mem)\n",
    "    os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"                      # quieter logs\n",
    "    os.environ[\"NVIDIA_TF32_OVERRIDE\"] = \"1\"                      # allow TF32 (perf/VRAM win on Ampere+)\n",
    "# If you still hit GPU memory errors, uncomment to force a smaller workspace:\n",
-    "# os.environ[\"TF_CUDNN_WORKSPACE_LIMIT_IN_MB\"] = \"256\"\n",
+    "#  os.environ[\"TF_CUDNN_WORKSPACE_LIMIT_IN_MB\"] = \"256\"\n",
    "\n",
-    "# --- Kick off training ---\n",
+    "import tensorflow as tf\n",
-    "cmd = f'''\"{sys.executable}\" -m microwakeword.model_train_eval \\\n",
+    "\n",
-    "  --training_config=\"training_parameters.yaml\" \\\n",
+    "allow_growth = \"\"\n",
-    "  --train 1 \\\n",
+    "# Per-device memory growth (belt + suspenders)\n",
-    "  --restore_checkpoint 1 \\\n",
+    "for g in tf.config.list_physical_devices(\"GPU\"):\n",
-    "  --test_tf_nonstreaming 0 \\\n",
+    "    try:\n",
-    "  --test_tflite_nonstreaming 0 \\\n",
+    "        tf.config.experimental.set_memory_growth(g, True)\n",
-    "  --test_tflite_nonstreaming_quantized 0 \\\n",
+    "        allow_growth = \"gpu_allow_growth, \"\n",
-    "  --test_tflite_streaming 0 \\\n",
+    "    except Exception:\n",
-    "  --test_tflite_streaming_quantized 1 \\\n",
+    "        pass\n",
-    "  --use_weights \"best_weights\" \\\n",
+    "print(\"GPUs:\", tf.config.list_physical_devices(\"GPU\"))\n",
-    "  mixednet \\\n",
+    "gc.collect()\n",
-    "  --pointwise_filters \"64,64,64,64\" \\\n",
+    "\n",
-    "  --repeat_in_block \"1,1,1,1\" \\\n",
+    "print(f\"✅ Set environment with {allow_growth}cuda_malloc_async, xla_auto_jit=0, min_log_level=2, nvidia_tf2_override\")\n",
-    "  --mixconv_kernel_sizes \"[5], [7,11], [9,15], [23]\" \\\n",
+    "print(\"   Starting training...\")\n",
-    "  --residual_connection \"0,0,0,0\" \\\n",
+    "\n",
-    "  --first_conv_filters 32 \\\n",
+    "original_argv = list(sys.argv)\n",
-    "  --first_conv_kernel_size 5 \\\n",
+    "try:\n",
-    "  --stride 2'''\n",
+    "    sys.argv = [\n",
-    "print(\"Running:\\n\", cmd)\n",
+    "        'model_train_eval.py',\n",
-    "!$cmd"
+    "        '--training_config', 'training_parameters.yaml',\n",
    "        '--train', '1',\n",
    "        '--restore_checkpoint', '1',\n",
    "        '--test_tf_nonstreaming', '0',\n",
    "        '--test_tflite_nonstreaming', '0',\n",
    "        '--test_tflite_nonstreaming_quantized', '0',\n",
    "        '--test_tflite_streaming', '0',\n",
    "        '--test_tflite_streaming_quantized', '1',\n",
    "        '--use_weights', 'best_weights',\n",
    "        'mixednet',\n",
    "        '--pointwise_filters', '64,64,64,64',\n",
    "        '--repeat_in_block', '1,1,1,1',\n",
    "        '--mixconv_kernel_sizes', '[5], [7,11], [9,15], [23]',\n",
    "        '--residual_connection', '0,0,0,0',\n",
    "        '--first_conv_filters', '32',\n",
    "        '--first_conv_kernel_size', '5',\n",
    "        '--stride', '2'\n",
    "    ]\n",
    "    runpy.run_module(\"microwakeword.model_train_eval\", run_name=\"__main__\", alter_sys=True)\n",
    "finally:\n",
    "    sys.argv = original_argv\n",
    "print(\"✅ Training and testing complete.\")\n"
   ]
  },
  {