Skip to content

Commit

Permalink
Remove gpu_device_num (#8516)
Browse files Browse the repository at this point in the history
* Remove gpu_device_num

* fix
  • Loading branch information
liujuncheng authored Jun 29, 2022
1 parent 3654513 commit 327a19f
Show file tree
Hide file tree
Showing 10 changed files with 6 additions and 164 deletions.
1 change: 0 additions & 1 deletion oneflow/core/control/ctrl_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ EnvProto GetEnvProto(int port) {
Resource GetResource() {
Resource ret;
ret.set_machine_num(1);
ret.set_gpu_device_num(0);
ret.set_cpu_device_num(1);
ret.set_comm_net_worker_num(1);
return ret;
Expand Down
16 changes: 2 additions & 14 deletions oneflow/core/framework/multi_client_session_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,6 @@ namespace oneflow {

namespace {

int32_t GetGpuDeviceNum() {
#ifndef WITH_CUDA
return 0;
#else
int device_count = 0;
cudaGetDeviceCount(&device_count);
return device_count;
#endif
}

int32_t GetCpuDeviceNum() { return std::thread::hardware_concurrency(); }

} // namespace
Expand All @@ -82,18 +72,16 @@ Maybe<void> MultiClientSessionContext::TryInit(const ConfigProto& config_proto)

{
// NOTE(chengcheng):
// In multi-client, user can NOT config gpu_device_num and cpu_device_num.
// In multi-client, user can NOT config cpu_device_num.
//
// cpu_device_num is a confusing name, it should be explained as:
// in current rank, assign CPU actor compute stream in this optional range.
// That is, the number of independent CPU devices that can be abstracted from
// this machine and this process.
// gpu_device_num is the number of visible GPUs one current machine.
//
// NOTE: gpu_device_num and cpu_device_num NOT necessarily equal to the num of process
// NOTE: cpu_device_num NOT necessarily equal to the num of process
// on this machine.
resource.set_machine_num(GlobalProcessCtx::NodeSize());
resource.set_gpu_device_num(GetGpuDeviceNum());
resource.set_cpu_device_num(GetCpuDeviceNum());
}

Expand Down
11 changes: 0 additions & 11 deletions oneflow/core/job/env_global_objects_scope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,6 @@ void InitLogging(const CppLoggingConf& logging_conf) {

int32_t GetDefaultCpuDeviceNum() { return std::thread::hardware_concurrency(); }

int32_t GetDefaultGpuDeviceNum() {
#ifndef WITH_CUDA
return 0;
#else
int device_count = 0;
cudaGetDeviceCount(&device_count);
return device_count;
#endif
}

Resource GetDefaultResource(const EnvProto& env_proto) {
Resource resource;
if (env_proto.has_ctrl_bootstrap_conf()) {
Expand All @@ -93,7 +83,6 @@ Resource GetDefaultResource(const EnvProto& env_proto) {
resource.set_machine_num(env_proto.machine_size());
}
resource.set_cpu_device_num(GetDefaultCpuDeviceNum());
resource.set_gpu_device_num(GetDefaultGpuDeviceNum());
return resource;
}

Expand Down
1 change: 0 additions & 1 deletion oneflow/core/job/id_manager_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ EnvProto GetEnvProto() {
Resource GetResource() {
Resource ret;
ret.set_machine_num(10);
ret.set_gpu_device_num(8);
ret.set_cpu_device_num(5);
ret.set_comm_net_worker_num(4);
return ret;
Expand Down
1 change: 0 additions & 1 deletion oneflow/core/job/resource.proto
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ message CudnnConfig {

message Resource {
optional int32 machine_num = 1 [default = 0];
optional int32 gpu_device_num = 4 [default = 0];
optional int32 cpu_device_num = 5 [default = 0];
optional int32 comm_net_worker_num = 6 [default = 4];
optional int32 max_mdsave_worker_num = 7 [default = 64];
Expand Down
1 change: 0 additions & 1 deletion oneflow/core/vm/virtual_machine_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ limitations under the License.
#include "oneflow/core/vm/stream.h"
#include "oneflow/core/vm/thread_ctx.h"
#include "oneflow/core/vm/vm_object.h"
#include "oneflow/core/vm/vm_resource_desc.h"
#include "oneflow/core/common/range.h"
#include "oneflow/core/intrusive/mutexed_list.h"
#include "oneflow/core/intrusive/object_pool.h"
Expand Down
47 changes: 0 additions & 47 deletions oneflow/core/vm/vm_resource_desc.cpp

This file was deleted.

66 changes: 0 additions & 66 deletions oneflow/core/vm/vm_resource_desc.h

This file was deleted.

23 changes: 3 additions & 20 deletions python/oneflow/framework/config_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,28 +81,11 @@ def machine_num(val):
sess.config_proto.resource.machine_num = val


def api_gpu_device_num(val: int) -> None:
"""Set number of GPUs on each machine to run oneflow on.
Args:
val (int): number of GPUs. It is identical on every machine. In other words,
you can't specify different number of GPUs you would like to use on each machine.
"""
if oneflow._oneflow_internal.flags.with_cuda():
return enable_if.unique([gpu_device_num, do_nothing])(val)
else:
print(
"INFO: for CPU-only OneFlow, oneflow.config.gpu_device_num is equivalent to oneflow.config.cpu_device_num"
)
print(traceback.format_stack()[-2])
return enable_if.unique([cpu_device_num, do_nothing])(val)


@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
def gpu_device_num(val):
sess = session_ctx.GetDefaultSession()
assert type(val) is int
sess.config_proto.resource.gpu_device_num = val
print(
"'gpu_device_num' has been deprecated, has no effect and will be removed in the future."
)


def api_cpu_device_num(val: int) -> None:
Expand Down
3 changes: 1 addition & 2 deletions python/oneflow/serving/inference_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,9 @@ def _make_config_proto(self):
self.config_proto_ = config_proto
# self.config_proto_ = session_util._GetDefaultConfigProto()
if self.option_.device_tag == "cuda":
self.config_proto_.resource.gpu_device_num = self.option_.device_num
pass
elif self.option_.device_tag == "cpu":
self.config_proto_.resource.cpu_device_num = self.option_.device_num
self.config_proto_.resource.gpu_device_num = 0
else:
raise NotImplementedError(
"not supported device tag {}".format(self.option_.device_tag)
Expand Down

0 comments on commit 327a19f

Please sign in to comment.