[checkpoint] Finished test_pass_plan_devices.py

apache · Sep 16, 2021 · df99244 · df99244
1 parent 3af68d5
commit df99244
Show file tree

Hide file tree

Showing 5 changed files with 136 additions and 40 deletions.
diff --git a/src/relay/backend/vm/inline_primitives.cc b/src/relay/backend/vm/inline_primitives.cc
@@ -136,13 +136,13 @@ struct PrimitiveInliner : ExprMutator {
         if (n->GetAttr<String>(attr::kCompiler).defined()) continue;
         auto func = GetRef<Function>(n);
 
-        DLOG(INFO) << "Before inlining primitives: " << global << std::endl << AsText(func, false);
+        VLOG(1) << "Before inlining primitives: " << global << std::endl << PrettyPrint(func);
 
         func = Function(func->params, VisitExpr(func->body), func->ret_type, func->type_params,
                         func->attrs);
         module_->Add(global, func, true);
 
-        DLOG(INFO) << "After inlining primitives: " << global << std::endl << AsText(func, false);
+        VLOG(1) << "After inlining primitives: " << global << std::endl << PrettyPrint(func);
       }
     }
     return module_;

diff --git a/src/relay/transforms/device_planner.cc b/src/relay/transforms/device_planner.cc
@@ -971,6 +971,7 @@ class DeviceAnalyzer : public ExprVisitor {
     VLOG_CONTEXT << "DeviceAnalyzer";
     for (const auto& pair : mod_->functions) {
       VLOG(1) << "collecting constraints for '" << PrettyPrint(pair.first) << "'";
+      domains_->UnifyExprExact(pair.first, pair.second);
       VisitExpr(pair.second);
     }
     return std::move(domains_);
@@ -1689,6 +1690,10 @@ TVM_REGISTER_GLOBAL("relay._transform.PlanDevices")
  ****** Visitor/Mutator Helpers
  ******/
 
+// TODO(mbs): These have grown to be pretty substantial and should be hoisted out.
+// TODO(mbs): Probably less code-dup if we redefine the memoizing mutator on top
+// of the generic Functor.
+
 DLDeviceType LexicalOnDeviceMixin::GetInScopeDeviceType(const Expr& expr) const {
   auto props = GetOnDeviceProps(expr);
   if (props.body.defined() && props.is_fixed) {
@@ -1775,6 +1780,7 @@ void DeviceAwareExprVisitor::VisitExpr_(const FunctionNode* function_node) {
 }
 
 void DeviceAwareExprVisitor::VisitExpr_(const LetNode* let_node) {
+  PreVisitLetBlock_(let_node);
   std::vector<const LetNode*> bindings;
   Expr expr = GetRef<Expr>(let_node);
   while (const auto* inner_let_node = expr.as<LetNode>()) {
@@ -1793,6 +1799,7 @@ void DeviceAwareExprVisitor::VisitExpr_(const LetNode* let_node) {
     PopBoundVar((*itr)->var);
     PostVisitLet_(*itr);
   }
+  PostVisitLetBlock_(let_node);
 }
 
 void DeviceAwareExprVisitor::VisitExpr_(const CallNode* call_node) {

diff --git a/src/target/opt/build_cuda_on.cc b/src/target/opt/build_cuda_on.cc
@@ -126,6 +126,11 @@ std::string NVRTCCompile(const std::string& code, bool include_path = false) {
 }
 
 runtime::Module BuildCUDA(IRModule mod, Target target) {
+  VLOG_CONTEXT << "BuildCUDA";
+  VLOG(0) << "Building module:" << std::endl
+          << PrettyPrint(mod) << std::endl
+          << "for target '" << target->str() << "'";
+
   using tvm::runtime::Registry;
   bool output_ssa = false;
   CodeGenCUDA cg;

diff --git a/src/tir/analysis/verify_memory.cc b/src/tir/analysis/verify_memory.cc
@@ -172,8 +172,9 @@ std::vector<String> VerifyMemory_(const PrimFunc& func) {
   auto target = func->GetAttr<Target>(tvm::attr::kTarget);
   ICHECK(target.defined()) << "VerifyMemory: Require the target attribute";
 
-  DLOG(INFO) << "verifying memory for target '" << target.value()->str() << "' for primitive\n"
-             << PrettyPrint(func);
+  VLOG(1) << "verifying memory for target '" << target.value()->str()
+          << "' for primitive:" << std::endl
+          << PrettyPrint(func);
 
   if (func->GetAttr<Integer>(tvm::attr::kCallingConv, Integer(CallingConv::kDefault)) ==
       CallingConv::kDefault) {

diff --git a/tests/python/relay/test_pass_plan_devices.py b/tests/python/relay/test_pass_plan_devices.py
@@ -16,7 +16,7 @@
 # under the License
 
 # TODO(mbs): All the input/expected programs should be directly quoted using @script
-# TODO(mbs): Not testing Match, Constructor, or Refs
+# TODO(mbs): Not testing Match and Constructor since not supported in python?
 
 import tvm
 from tvm import relay
@@ -586,7 +586,7 @@ def expected():
                                                                 [False]))),
                 [GPU, CPU], CPU))
 
-    # Don't try to execute this -- it's too fiddly to setup.
+    # Don't try to execute, too fiddly to setup.
     exercise(input(), expected(), None, None)
 
 
@@ -645,7 +645,7 @@ def expected():
             [CPU, CPU], GPU)
         return mod
 
-    # Don't try to execute.
+    # Don't try to execute, too fiddly to setup.
     exercise(input(), expected(), None, None)
 
 
@@ -681,7 +681,7 @@ def expected():
             [GPU], GPU)
         return mod
 
-    # Don't try to execute.
+    # Don't try to execute, too fiddly to setup.
     exercise(input(), expected(), None, None)
 
 
@@ -871,7 +871,7 @@ def expected():
                 relay.Function([data1, data2, weight], conv2d_3),
                 [CPU, CPU, CPU], CPU))
 
-    # Don't try to execute.
+    # Don't try to execute, we don't have a reference conv2d
     exercise(input(), expected(), None, None)
 
 
@@ -908,8 +908,11 @@ def expected():
                                              cpu_to_gpu(fixed_cpu(relay.TupleGetItem(t, 1)))))),
                 [CPU], GPU))
 
-    # Don't try to execute
-    exercise(input(), expected(), None, None)
+    def ref(x):
+        t = np.split(x, 3)
+        return np.subtract(t[0], t[1])
+
+    exercise(input(), expected(), ref, rands(shape, 1))
 
 
 def test_propogation():
@@ -1075,7 +1078,8 @@ def input():
 
     # def @main(x, y, z, on_device={param_device_types=[1,1,1], result_device_type=1}) {
     #   let f = fn(a, on_device={param_device_types=[1], result_device_type=1}) { add(a, y) }
-    #   let g = fn(a, on_device={param_device_types=[1], result_device_type=1}) { subtract(a, y) }
+    #   let g = fn
+    #   (a, on_device={param_device_types=[1], result_device_type=1}) { subtract(a, y) }
     #   let h = if (x) {
     #     f
     #   } else {
@@ -1114,35 +1118,114 @@ def g(a):
 
 
 def test_global():
+    shape = (N, M)
+    a = relay.var("a", shape=shape)
+    b = relay.var("b", shape=shape)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    f = relay.GlobalVar("f")
+    main = relay.GlobalVar("main")
+
+    # def @f(a, b) { add(a, on_cpu(b)) }
+    # def @main(x, y) { @f(y, x) }
+    def input():
+        mod = tvm.IRModule()
+        mod[f] = relay.Function([a, b], relay.add(a, on_cpu(b)), relay.ty.TensorType(shape, "float32"))
+        mod[main] = relay.Function([x, y], relay.Call(f, [y, x]), relay.ty.TensorType(shape, "float32"))
+        return mod
+
+    # def @f(a, b, on_device={param_device_types=[2,1], result_device_type=2}) { add(a, on_cpu(b)) }
+    # def @main(x, y, on_device={param_device_types=[1,2], result_device_type=2}) { @f(y, x) }
+    def expected():
+        mod = tvm.IRModule()
+        mod[f] = relay.annotation.function_on_device(
+            relay.Function([a, b], relay.add(a, cpu_to_gpu(b)), relay.ty.TensorType(shape, "float32")),
+            [GPU, CPU], GPU)
+        mod[main] = relay.annotation.function_on_device(
+            relay.Function([x, y], relay.Call(f, [y, x]), relay.ty.TensorType(shape, "float32")),
+            [CPU, GPU], GPU)
+        return mod
+
+    def ref(x, y):
+        def f(a, b):
+            return np.add(a, b)
+
+        return f(x, y)
+
+    exercise(input(), expected(), ref, rands(shape, 2))
 
 
-# test_match
-# test_refs
+# Note that match and ADTs don't appear to be supported for direct AST
+# construction.
+
+def test_ref():
+    shape = (N, M)
+    x = relay.var("x", shape=shape)
+    y = relay.var("y", shape=shape)
+    r = relay.var("r")
+    dummy = relay.var("dummy")
+
+    # def @main(x, y) {
+    #   r = ref(x)
+    #   ref_write(r, on_cpu(y))
+    #   add(x, ref_read(r))
+    # }
+    def input():
+        return tvm.IRModule.from_expr(
+            relay.Function([x, y],
+                           relay.Let(r, relay.RefCreate(x),
+                                     relay.Let(dummy, relay.RefWrite(r, on_cpu(y)),
+                                               relay.add(x, relay.RefRead(r))))))
+
+    # def @main(x, y, on_device={param_device_types=[GPU, CPU], result_device_type=GPU}) {
+    #   r = ref(x)
+    #   ref_write(r, cpu_to_gpu(y))
+    #   add(x, ref_read(r))
+    # }
+    def expected():
+        return tvm.IRModule.from_expr(
+            relay.annotation.function_on_device(
+                relay.Function([x, y],
+                               relay.Let(r, relay.RefCreate(x),
+                                         relay.Let(dummy, relay.RefWrite(r, cpu_to_gpu(y)),
+                                                   relay.add(x, relay.RefRead(r))))),
+                [GPU, CPU], GPU))
+
+    def ref(x, y):
+        r = {"value": x}
+        r["value"] = y
+        return np.add(x, r["value"])
+
+    # Don't try to execute, no backend currently supports both cross-devices and references.
+    exercise(input(), expected(), None, None)
+
 
 if __name__ == "__main__":
-    # test_plain()
-    # test_left_add_on_cpu()
-    # test_left_add_on_cpu_via_copy()
-    # test_both_adds_on_cpu()
-    # test_sharing()
-    # test_let_on_cpu()
-    # test_func_param_on_cpu()
-    # test_func_result_on_cpu()
-    # test_higher_order()
-    # test_function_in_tuple()
-    # test_device_copy()
-    # test_shape_func()
-    # test_shape_of()
-    # test_alloc_storage()
-    # test_alloc_tensor()
-    # test_reshape_tensor()
-    # test_dynamic_input()
-    # test_redundant_annotation()
-    # test_annotate_expr()
-    # test_annotate_all()
-    # test_conv_network()
-    # test_tuple_get_item()
-    # test_propogation()
-    # test_fusible_network()
-    # test_unpropagatable_graph()
-    # test_conditional()
+    test_plain()
+    test_left_add_on_cpu()
+    test_left_add_on_cpu_via_copy()
+    test_both_adds_on_cpu()
+    test_sharing()
+    test_let_on_cpu()
+    test_func_param_on_cpu()
+    test_func_result_on_cpu()
+    test_higher_order()
+    test_function_in_tuple()
+    test_device_copy()
+    test_shape_func()
+    test_shape_of()
+    test_alloc_storage()
+    test_alloc_tensor()
+    test_reshape_tensor()
+    test_dynamic_input()
+    test_redundant_annotation()
+    test_annotate_expr()
+    test_annotate_all()
+    test_conv_network()
+    test_tuple_get_item()
+    test_propogation()
+    test_fusible_network()
+    test_unpropagatable_graph()
+    test_conditional()
+    test_global()
+    test_ref()