[tmp] Clutching, but force a gc between sphinx gallery runs to trigge…

…r CUDA memory free. Sphinx has an explicit gc.collect but I think it's only triggered as part of the memory stats machinery.
apache · Aug 11, 2021 · d718cc4 · d718cc4
1 parent a5deda2
commit d718cc4
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 6 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -29,6 +29,7 @@
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
+import gc
 import sys
 import inspect
 import os, subprocess
@@ -201,19 +202,20 @@ def git_describe_version(original_version):
 
 subsection_order = ExplicitOrder(
     [
-        "../tutorials/get_started",
+        # DO NOT CHECKIN
         "../tutorials/frontend",
-        "../tutorials/language",
+        "../tutorials/get_started",
         "../tutorials/optimize",
-        "../tutorials/autotvm",
-        "../tutorials/auto_scheduler",
+        "../tutorials/language",
         "../tutorials/dev",
         "../tutorials/topi",
         "../tutorials/deployment",
         "../tutorials/micro",
         "../vta/tutorials/frontend",
         "../vta/tutorials/optimize",
         "../vta/tutorials/autotvm",
+        "../tutorials/autotvm",
+        "../tutorials/auto_scheduler",
     ]
 )
 
@@ -234,11 +236,14 @@ def git_describe_version(original_version):
         "relay_quick_start.py",
     ],
     "frontend": [
+        # DO NOT CHECKIN
+        "from_mxnet.py",
+        "from_keras.py",
+        "deploy_quantized.py",
+        "deploy_ssd_gluoncv.py",
         "from_pytorch.py",
         "from_tensorflow.py",
-        "from_mxnet.py",
         "from_onnx.py",
-        "from_keras.py",
         "from_tflite.py",
         "from_coreml.py",
         "from_darknet.py",
@@ -300,6 +305,15 @@ def __call__(self, filename):
         return filename
 
 
+# When running the tutorials on GPUs we are dependent on the Python garbage collector
+# collecting TVM packed function closures for any device memory to also be released. This
+# is not a good setup for machines with lots of CPU ram but constrained GPU ram, so force
+# a gc after each example.
+def force_gc(gallery_cong, fname):
+    print("(Forcing Python gc after '{}' to avoid lag in reclaiming CUDA memory)".format(fname))
+    gc.collect()
+    print("(Remaining garbage: {})".format(gc.garbage))
+
 sphinx_gallery_conf = {
     "backreferences_dir": "gen_modules/backreferences",
     "doc_module": ("tvm", "numpy"),
@@ -317,6 +331,9 @@ def __call__(self, filename):
     "download_all_examples": False,
     "min_reported_time": 60,
     "expected_failing_examples": [],
+    "reset_modules": (force_gc, "matplotlib", "seaborn"),
+    "abort_on_example_error": True,
+    "show_memory": True,
 }
 
 autodoc_default_options = {

diff --git a/src/runtime/cuda/cuda_device_api.cc b/src/runtime/cuda/cuda_device_api.cc
@@ -127,9 +127,11 @@ class CUDADeviceAPI final : public DeviceAPI {
 
   void FreeDataSpace(Device dev, void* ptr) final {
     if (dev.device_type == kDLCUDAHost) {
+      LOG(INFO) << "freeing host memory";
       CUDA_CALL(cudaFreeHost(ptr));
     } else {
       CUDA_CALL(cudaSetDevice(dev.device_id));
+      LOG(INFO) << "freeing device memory";
       CUDA_CALL(cudaFree(ptr));
     }
   }