Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[ENV] update runtime setting default values #18987

Merged
merged 1 commit into from
Sep 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/resource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ class ResourceManagerImpl : public ResourceManager {
cpu_temp_space_copy_ = dmlc::GetEnv("MXNET_CPU_TEMP_COPY", 4);
gpu_temp_space_copy_ = dmlc::GetEnv("MXNET_GPU_TEMP_COPY", 1);
cpu_native_rand_copy_ = dmlc::GetEnv("MXNET_CPU_PARALLEL_RAND_COPY", 1);
gpu_native_rand_copy_ = dmlc::GetEnv("MXNET_GPU_PARALLEL_RAND_COPY", 4);
gpu_native_rand_copy_ = dmlc::GetEnv("MXNET_GPU_PARALLEL_RAND_COPY", 1);
#if MXNET_USE_CUDNN == 1
gpu_cudnn_dropout_state_copy_ = dmlc::GetEnv("MXNET_GPU_CUDNN_DROPOUT_STATE_COPY", 4);
gpu_cudnn_dropout_state_copy_ = dmlc::GetEnv("MXNET_GPU_CUDNN_DROPOUT_STATE_COPY", 1);
szha marked this conversation as resolved.
Show resolved Hide resolved
#endif // MXNET_USE_CUDNN == 1
engine_ref_ = Engine::_GetSharedRef();
storage_ref_ = Storage::_GetSharedRef();
Expand Down
15 changes: 10 additions & 5 deletions src/storage/pooled_storage_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,19 @@ void PooledStorageManager<BucketingStrategy, StoringMethod>::Alloc(Storage::Hand
void *ret = nullptr;
auto e = contextHelper_->Malloc(&ret, roundSize);
if (e) {
const std::string err(
// retry in case of fragmentation
ReleaseAllNoLock(false);
e = contextHelper_->Malloc(&ret, roundSize);
if (e) {
const std::string err(
#if MXNET_USE_CUDA
dev_type_ == Context::kGPU?
cudaGetErrorString(static_cast<cudaError_t>(e)) :
dev_type_ == Context::kGPU?
cudaGetErrorString(static_cast<cudaError_t>(e)) :
#endif
std::strerror(errno));
std::strerror(errno));

LOG(FATAL) << "Memory allocation failed " << err;
LOG(FATAL) << "Memory allocation failed " << err;
}
}

UNSET_DEVICE(device_store);
Expand Down
3 changes: 2 additions & 1 deletion src/storage/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ StorageManager *CreateStorageManager(const Context &ctx, const char *context,
int num_gpu_device, std::string *pStrategy) {
const auto env_var = env_var_name(context, pool_type);
const char *type = getenv(env_var.c_str());
if (type == nullptr)
if (type == nullptr) {
type = "Naive"; // default pool
}

*pStrategy = type;
StorageManager *ptr = nullptr;
Expand Down