Remove gpu_device_num (#8516)

* Remove gpu_device_num * fix
Oneflow-Inc · Jun 29, 2022 · 327a19f · 327a19f
1 parent 3654513
commit 327a19f
Show file tree

Hide file tree

Showing 10 changed files with 6 additions and 164 deletions.
diff --git a/oneflow/core/control/ctrl_test.cpp b/oneflow/core/control/ctrl_test.cpp
@@ -47,7 +47,6 @@ EnvProto GetEnvProto(int port) {
 Resource GetResource() {
   Resource ret;
   ret.set_machine_num(1);
-  ret.set_gpu_device_num(0);
   ret.set_cpu_device_num(1);
   ret.set_comm_net_worker_num(1);
   return ret;

diff --git a/oneflow/core/framework/multi_client_session_context.cpp b/oneflow/core/framework/multi_client_session_context.cpp
@@ -46,16 +46,6 @@ namespace oneflow {
 
 namespace {
 
-int32_t GetGpuDeviceNum() {
-#ifndef WITH_CUDA
-  return 0;
-#else
-  int device_count = 0;
-  cudaGetDeviceCount(&device_count);
-  return device_count;
-#endif
-}
-
 int32_t GetCpuDeviceNum() { return std::thread::hardware_concurrency(); }
 
 }  // namespace
@@ -82,18 +72,16 @@ Maybe<void> MultiClientSessionContext::TryInit(const ConfigProto& config_proto)
 
     {
       // NOTE(chengcheng):
-      //   In multi-client, user can NOT config gpu_device_num and cpu_device_num.
+      //   In multi-client, user can NOT config cpu_device_num.
       //
       //   cpu_device_num is a confusing name, it should be explained as:
       //       in current rank, assign CPU actor compute stream in this optional range.
       //       That is, the number of independent CPU devices that can be abstracted from
       //       this machine and this process.
-      //   gpu_device_num is the number of visible GPUs one current machine.
       //
-      //   NOTE: gpu_device_num and cpu_device_num NOT necessarily equal to the num of process
+      //   NOTE: cpu_device_num NOT necessarily equal to the num of process
       //       on this machine.
       resource.set_machine_num(GlobalProcessCtx::NodeSize());
-      resource.set_gpu_device_num(GetGpuDeviceNum());
       resource.set_cpu_device_num(GetCpuDeviceNum());
     }
 

diff --git a/oneflow/core/job/env_global_objects_scope.cpp b/oneflow/core/job/env_global_objects_scope.cpp
@@ -75,16 +75,6 @@ void InitLogging(const CppLoggingConf& logging_conf) {
 
 int32_t GetDefaultCpuDeviceNum() { return std::thread::hardware_concurrency(); }
 
-int32_t GetDefaultGpuDeviceNum() {
-#ifndef WITH_CUDA
-  return 0;
-#else
-  int device_count = 0;
-  cudaGetDeviceCount(&device_count);
-  return device_count;
-#endif
-}
-
 Resource GetDefaultResource(const EnvProto& env_proto) {
   Resource resource;
   if (env_proto.has_ctrl_bootstrap_conf()) {
@@ -93,7 +83,6 @@ Resource GetDefaultResource(const EnvProto& env_proto) {
     resource.set_machine_num(env_proto.machine_size());
   }
   resource.set_cpu_device_num(GetDefaultCpuDeviceNum());
-  resource.set_gpu_device_num(GetDefaultGpuDeviceNum());
   return resource;
 }
 

diff --git a/oneflow/core/job/id_manager_test.cpp b/oneflow/core/job/id_manager_test.cpp
@@ -40,7 +40,6 @@ EnvProto GetEnvProto() {
 Resource GetResource() {
   Resource ret;
   ret.set_machine_num(10);
-  ret.set_gpu_device_num(8);
   ret.set_cpu_device_num(5);
   ret.set_comm_net_worker_num(4);
   return ret;

diff --git a/oneflow/core/job/resource.proto b/oneflow/core/job/resource.proto
@@ -36,7 +36,6 @@ message CudnnConfig {
 
 message Resource {
   optional int32 machine_num = 1 [default = 0];
-  optional int32 gpu_device_num = 4 [default = 0];
   optional int32 cpu_device_num = 5 [default = 0];
   optional int32 comm_net_worker_num = 6 [default = 4];
   optional int32 max_mdsave_worker_num = 7 [default = 64];

diff --git a/oneflow/core/vm/virtual_machine_engine.h b/oneflow/core/vm/virtual_machine_engine.h
@@ -22,7 +22,6 @@ limitations under the License.
 #include "oneflow/core/vm/stream.h"
 #include "oneflow/core/vm/thread_ctx.h"
 #include "oneflow/core/vm/vm_object.h"
-#include "oneflow/core/vm/vm_resource_desc.h"
 #include "oneflow/core/common/range.h"
 #include "oneflow/core/intrusive/mutexed_list.h"
 #include "oneflow/core/intrusive/object_pool.h"

diff --git a/oneflow/core/vm/vm_resource_desc.cpp b/oneflow/core/vm/vm_resource_desc.cpp
diff --git a/oneflow/core/vm/vm_resource_desc.h b/oneflow/core/vm/vm_resource_desc.h
diff --git a/python/oneflow/framework/config_util.py b/python/oneflow/framework/config_util.py
@@ -81,28 +81,11 @@ def machine_num(val):
     sess.config_proto.resource.machine_num = val
 
 
-def api_gpu_device_num(val: int) -> None:
-    """Set number of GPUs on each machine to run oneflow on.
-
-    Args:
-        val (int): number of GPUs. It is identical on every machine. In other words,
-        you can't specify different number of GPUs you would like to use on each machine.
-    """
-    if oneflow._oneflow_internal.flags.with_cuda():
-        return enable_if.unique([gpu_device_num, do_nothing])(val)
-    else:
-        print(
-            "INFO: for CPU-only OneFlow, oneflow.config.gpu_device_num is equivalent to oneflow.config.cpu_device_num"
-        )
-        print(traceback.format_stack()[-2])
-        return enable_if.unique([cpu_device_num, do_nothing])(val)
-
-
 @enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
 def gpu_device_num(val):
-    sess = session_ctx.GetDefaultSession()
-    assert type(val) is int
-    sess.config_proto.resource.gpu_device_num = val
+    print(
+        "'gpu_device_num' has been deprecated, has no effect and will be removed in the future."
+    )
 
 
 def api_cpu_device_num(val: int) -> None:

diff --git a/python/oneflow/serving/inference_session.py b/python/oneflow/serving/inference_session.py
@@ -149,10 +149,9 @@ def _make_config_proto(self):
             self.config_proto_ = config_proto
             # self.config_proto_ = session_util._GetDefaultConfigProto()
         if self.option_.device_tag == "cuda":
-            self.config_proto_.resource.gpu_device_num = self.option_.device_num
+            pass
         elif self.option_.device_tag == "cpu":
             self.config_proto_.resource.cpu_device_num = self.option_.device_num
-            self.config_proto_.resource.gpu_device_num = 0
         else:
             raise NotImplementedError(
                 "not supported device tag {}".format(self.option_.device_tag)