Skip to content

Commit

Permalink
[tmp] Clutching, but force a gc between sphinx gallery runs to trigge…
Browse files Browse the repository at this point in the history
…r CUDA memory free.

Sphinx has an explicit gc.collect but I think it's only triggered as part of the memory stats machinery.
  • Loading branch information
mbs-octoml committed Aug 11, 2021
1 parent a5deda2 commit d718cc4
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
29 changes: 23 additions & 6 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import gc
import sys
import inspect
import os, subprocess
Expand Down Expand Up @@ -201,19 +202,20 @@ def git_describe_version(original_version):

subsection_order = ExplicitOrder(
[
"../tutorials/get_started",
# DO NOT CHECKIN
"../tutorials/frontend",
"../tutorials/language",
"../tutorials/get_started",
"../tutorials/optimize",
"../tutorials/autotvm",
"../tutorials/auto_scheduler",
"../tutorials/language",
"../tutorials/dev",
"../tutorials/topi",
"../tutorials/deployment",
"../tutorials/micro",
"../vta/tutorials/frontend",
"../vta/tutorials/optimize",
"../vta/tutorials/autotvm",
"../tutorials/autotvm",
"../tutorials/auto_scheduler",
]
)

Expand All @@ -234,11 +236,14 @@ def git_describe_version(original_version):
"relay_quick_start.py",
],
"frontend": [
# DO NOT CHECKIN
"from_mxnet.py",
"from_keras.py",
"deploy_quantized.py",
"deploy_ssd_gluoncv.py",
"from_pytorch.py",
"from_tensorflow.py",
"from_mxnet.py",
"from_onnx.py",
"from_keras.py",
"from_tflite.py",
"from_coreml.py",
"from_darknet.py",
Expand Down Expand Up @@ -300,6 +305,15 @@ def __call__(self, filename):
return filename


# When running the tutorials on GPUs we are dependent on the Python garbage collector
# collecting TVM packed function closures for any device memory to also be released. This
# is not a good setup for machines with lots of CPU ram but constrained GPU ram, so force
# a gc after each example.
def force_gc(gallery_cong, fname):
print("(Forcing Python gc after '{}' to avoid lag in reclaiming CUDA memory)".format(fname))
gc.collect()
print("(Remaining garbage: {})".format(gc.garbage))

sphinx_gallery_conf = {
"backreferences_dir": "gen_modules/backreferences",
"doc_module": ("tvm", "numpy"),
Expand All @@ -317,6 +331,9 @@ def __call__(self, filename):
"download_all_examples": False,
"min_reported_time": 60,
"expected_failing_examples": [],
"reset_modules": (force_gc, "matplotlib", "seaborn"),
"abort_on_example_error": True,
"show_memory": True,
}

autodoc_default_options = {
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/cuda/cuda_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,11 @@ class CUDADeviceAPI final : public DeviceAPI {

void FreeDataSpace(Device dev, void* ptr) final {
if (dev.device_type == kDLCUDAHost) {
LOG(INFO) << "freeing host memory";
CUDA_CALL(cudaFreeHost(ptr));
} else {
CUDA_CALL(cudaSetDevice(dev.device_id));
LOG(INFO) << "freeing device memory";
CUDA_CALL(cudaFree(ptr));
}
}
Expand Down

0 comments on commit d718cc4

Please sign in to comment.