Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
change temp space allocation to dynamic size
Browse files Browse the repository at this point in the history
  • Loading branch information
tqchen committed Sep 16, 2015
1 parent bc8ffb2 commit d91a5fc
Show file tree
Hide file tree
Showing 12 changed files with 184 additions and 105 deletions.
7 changes: 7 additions & 0 deletions include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,13 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym,
//--------------------------------------------
// Part 4: Executor interface
//--------------------------------------------
/*!
* \brief Print the content of execution plan, used for debug.
* \param handle the executor.
* \param out_str pointer to hold the output string of the printing.
* \return 0 when success, -1 when failure happens
*/
MXNET_DLL int MXExecutorPrint(ExecutorHandle symbol, const char **out_str);
/*!
* \brief Executor forward method
*
Expand Down
22 changes: 13 additions & 9 deletions include/mxnet/resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,19 @@ struct ResourceRequest {
enum Type {
/*! \brief mshadow::Random<xpu> object */
kRandom,
/*! \brief Temporal space */
/*! \brief A dynamic temp space that can be arbitrary size */
kTempSpace
};
/*! \brief type of resources */
Type type;
/*! \brief size of space requested, in terms of number of reals */
size_t space_num_reals;
/*! \brief default constructor */
ResourceRequest() {}
/*!
* \brief constructor, allow implicit conversion
* \param type type of resources
*/
ResourceRequest(Type type, size_t space_num_reals = 0) // NOLINT(*)
: type(type), space_num_reals(space_num_reals) {}
ResourceRequest(Type type) // NOLINT(*)
: type(type) {}
};


Expand All @@ -48,11 +46,15 @@ struct Resource {
ResourceRequest req;
/*! \brief engine variable */
engine::VarHandle var;
/*! \brief identifier of id information, used for debug purpose */
int32_t id;
/*!
* \brief pointer to the resource, do not use directly,
* access using member functions
*/
void *ptr_;
/*! \brief default constructor */
Resource() : id(0) {}
/*!
* \brief Get random number generator.
* \param The stream to use in the random number generator.
Expand All @@ -70,7 +72,8 @@ struct Resource {
}
/*!
* \brief Get space requested as mshadow Tensor.
* The resulting tensor must fit in space requsted.
* The caller can request arbitrary size.
*
* \param shape the Shape of returning tensor.
* \param stream the stream of retruning tensor.
* \return the mshadow tensor requested.
Expand All @@ -81,9 +84,11 @@ struct Resource {
inline mshadow::Tensor<xpu, ndim, real_t> get_space(
mshadow::Shape<ndim> shape, mshadow::Stream<xpu> *stream) const {
CHECK_EQ(req.type, ResourceRequest::kTempSpace);
CHECK_GE(req.space_num_reals, shape.Size());
mshadow::TensorContainer<xpu, 1, real_t> *space =
static_cast<mshadow::TensorContainer<xpu, 1, real_t>*>(ptr_);
space->Resize(mshadow::Shape1(shape.Size()));
return mshadow::Tensor<xpu, ndim, real_t>(
static_cast<real_t*>(ptr_), shape, shape[ndim - 1], stream);
space->dptr_, shape, shape[ndim - 1], stream);
}
};

Expand All @@ -97,7 +102,6 @@ class ResourceManager {
* \return the requested resource.
* \note The returned resource's ownership is
* still hold by the manager singleton.
*
*/
virtual Resource Request(Context ctx, const ResourceRequest &req) = 0;
/*!
Expand Down
5 changes: 5 additions & 0 deletions include/mxnet/symbolic.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,11 @@ class Executor {
* \param head_grads the gradient of head nodes to be backproped.
*/
virtual void Backward(const std::vector<NDArray> &head_grads) = 0;
/*!
* \brief print the execution plan info to output stream.
* \param os the output stream we like to print to.
*/
virtual void Print(std::ostream &os) const {} // NOLINT(*)
/*!
* \brief get array of outputs in the executor.
* \return array of outputs in the executor.
Expand Down
17 changes: 15 additions & 2 deletions python/mxnet/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import ctypes
from .base import _LIB
from .base import c_array, mx_uint, NDArrayHandle, ExecutorHandle
from .base import check_call
from .base import mx_uint, NDArrayHandle, ExecutorHandle
from .base import check_call, c_array, py_str
from .ndarray import NDArray

class Executor(object):
Expand Down Expand Up @@ -81,6 +81,19 @@ def backward(self, head_grads=None):
ndarray = c_array(NDArrayHandle, [item.handle for item in head_grads])
check_call(_LIB.MXExecutorBackward(self.handle, len(head_grads), ndarray))

def debug_str(self):
"""Get a debug string about internal execution plan.
Returns
-------
debug_str : string
Debug string of the executor.
"""
debug_str = ctypes.c_char_p()
check_call(_LIB.MXExecutorPrint(
self.handle, ctypes.byref(debug_str)))
return py_str(debug_str.value)

@property
def outputs(self):
"""list all heads' output ndarray
Expand Down
11 changes: 11 additions & 0 deletions src/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,17 @@ int MXSymbolInferShape(SymbolHandle sym,
API_END();
}

int MXExecutorPrint(ExecutorHandle handle, const char **out_str) {
Executor *exec = static_cast<Executor*>(handle);
MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
API_BEGIN();
std::ostringstream os;
exec->Print(os);
ret->ret_str = os.str();
*out_str = (ret->ret_str).c_str();
API_END();
}

int MXExecutorForward(ExecutorHandle handle, bool is_train) {
API_BEGIN();
Executor *exec = static_cast<Executor*>(handle);
Expand Down
3 changes: 2 additions & 1 deletion src/engine/stream_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ template <std::size_t kNumGpus, std::size_t kStreams>
RunContext StreamManager<kNumGpus, kStreams>::GetIORunContext(
Context const& ctx) {
RunContext ret;
ret.stream = nullptr;
switch (ctx.dev_mask) {
case cpu::kDevMask: ret.stream = nullptr; break;
case cpu::kDevMask: break;
case gpu::kDevMask: {
#if MXNET_USE_CUDA
CUDA_CALL(cudaSetDevice(ctx.dev_id));
Expand Down
4 changes: 1 addition & 3 deletions src/operator/batch_norm-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,7 @@ class BatchNormProp : public OperatorProperty {

std::vector<ResourceRequest> BackwardResource(
const std::vector<TShape> &in_shape) const override {
const TShape &dshape = in_shape[0];
size_t nspace = dshape[1] * 3;
return {{ResourceRequest::kTempSpace, nspace}};
return {ResourceRequest::kTempSpace};
}

int NumVisibleOutputs() const override {
Expand Down
4 changes: 2 additions & 2 deletions src/operator/convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,12 @@ class ConvolutionProp : public OperatorProperty {

virtual std::vector<ResourceRequest> ForwardResource(
const std::vector<TShape> &in_shape) const {
return {{ResourceRequest::kTempSpace, param_.workspace}};
return {ResourceRequest::kTempSpace};
}

virtual std::vector<ResourceRequest> BackwardResource(
const std::vector<TShape> &in_shape) const {
return {{ResourceRequest::kTempSpace, param_.workspace}};
return {ResourceRequest::kTempSpace};
}

Operator* CreateOperator(Context ctx) const;
Expand Down
103 changes: 85 additions & 18 deletions src/resource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
* \brief Implementation of resource manager.
*/
#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/base.h>
#include <mxnet/engine.h>
#include <mxnet/resource.h>
#include <limits>
#include <atomic>
#include "./common/lazy_alloc_array.h"

namespace mxnet {
Expand All @@ -15,10 +18,15 @@ namespace resource {
// implements resource manager
class ResourceManagerImpl : public ResourceManager {
public:
ResourceManagerImpl() : global_seed_(0) {
ResourceManagerImpl() noexcept(false)
: global_seed_(0) {
cpu_temp_space_copy_ = dmlc::GetEnv("MXNET_CPU_TEMP_COPY", 16);
gpu_temp_space_copy_ = dmlc::GetEnv("MXNET_GPU_TEMP_COPY", 4);
engine_ref_ = Engine::_GetSharedRef();
cpu_rand_ = new ResourceRandom<cpu>(
Context(cpu::kDevMask, 0), global_seed_);
cpu_space_ = new ResourceTempSpace<cpu>(
Context(cpu::kDevMask, 0), cpu_temp_space_copy_);
}
~ResourceManagerImpl() {
// need explicit delete, before engine get killed
Expand All @@ -32,21 +40,31 @@ class ResourceManagerImpl : public ResourceManager {

// request resources
Resource Request(Context ctx, const ResourceRequest &req) override {
if (req.type == ResourceRequest::kRandom) {
if (ctx.dev_mask == cpu::kDevMask) {
return cpu_rand_->resource;
} else {
CHECK_EQ(ctx.dev_mask, gpu::kDevMask);
if (ctx.dev_mask == cpu::kDevMask) {
switch (req.type) {
case ResourceRequest::kRandom: return cpu_rand_->resource;
case ResourceRequest::kTempSpace: return cpu_space_->GetNext();
default: LOG(FATAL) << "Unknown supported type " << req.type;
}
} else {
CHECK_EQ(ctx.dev_mask, gpu::kDevMask);
#if MSHADOW_USE_CUDA
return gpu_rand_.Get(ctx.dev_id, [ctx, this]() {
return new ResourceRandom<gpu>(ctx, global_seed_);
})->resource;
switch (req.type) {
case ResourceRequest::kRandom: {
return gpu_rand_.Get(ctx.dev_id, [ctx, this]() {
return new ResourceRandom<gpu>(ctx, global_seed_);
})->resource;
}
case ResourceRequest::kTempSpace: {
return gpu_space_.Get(ctx.dev_id, [ctx, this]() {
return new ResourceTempSpace<gpu>(ctx, gpu_temp_space_copy_);
})->GetNext();
}
default: LOG(FATAL) << "Unknown supported type " << req.type;
}
#else
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
#endif
}
} else {
LOG(FATAL) << "Unknown supported type " << req.type;
}
Resource ret;
return ret;
Expand All @@ -67,16 +85,13 @@ class ResourceManagerImpl : public ResourceManager {
static constexpr std::size_t kMaxNumGPUs = 16;
/*! \brief Random number magic number to seed different random numbers */
static constexpr uint32_t kRandMagic = 127UL;
/*! \brief Reference to the engine */
std::shared_ptr<Engine> engine_ref_;

// the random number resources
template<typename xpu>
struct ResourceRandom {
/*! \brief pointer to PRNG */
mshadow::Random<xpu> *prnd;
/*! \brief the context of the PRNG */
Context ctx;
/*! \brief pointer to PRNG */
mshadow::Random<xpu> *prnd;
/*! \brief resource representation */
Resource resource;
/*! \brief constructor */
Expand All @@ -103,13 +118,65 @@ class ResourceManagerImpl : public ResourceManager {
}, ctx, {}, {resource.var});
}
};
// temporal space resource.
template<typename xpu>
struct ResourceTempSpace {
/*! \brief the context of the device */
Context ctx;
/*! \brief the underlying space */
std::vector<mshadow::TensorContainer<xpu, 1, real_t>*> space;
/*! \brief resource representation */
std::vector<Resource> resource;
/*! \brief current pointer to the round roubin alloator */
std::atomic<size_t> curr_ptr;
/*! \brief constructor */
explicit ResourceTempSpace(Context ctx, size_t ncopy)
: ctx(ctx), space(ncopy), resource(ncopy), curr_ptr(0) {
mshadow::SetDevice<xpu>(ctx.dev_id);
for (size_t i = 0; i < space.size(); ++i) {
space[i] = new mshadow::TensorContainer<xpu, 1, real_t>();
resource[i].var = Engine::Get()->NewVariable();
resource[i].id = static_cast<int32_t>(i);
resource[i].ptr_ = space[i];
resource[i].req = ResourceRequest(ResourceRequest::kTempSpace);
}
}
~ResourceTempSpace() {
for (size_t i = 0; i < space.size(); ++i) {
mshadow::TensorContainer<xpu, 1, real_t>* r = space[i];
Engine::Get()->DeleteVariable(
[r](RunContext rctx){ delete r; }, ctx, resource[i].var);
}
}
// get next resource in round roubin matter
inline Resource GetNext() {
const size_t kMaxDigit = std::numeric_limits<size_t>::max() / 2;
size_t ptr = ++curr_ptr;
// reset ptr to avoid undefined behavior during overflow
// usually this won't happen
if (ptr > kMaxDigit) {
curr_ptr.store((ptr + 1) % space.size());
}
return resource[ptr % space.size()];
}
};
/*! \brief number of copies in CPU temp space */
int cpu_temp_space_copy_;
/*! \brief number of copies in GPU temp space */
int gpu_temp_space_copy_;
/*! \brief Reference to the engine */
std::shared_ptr<Engine> engine_ref_;
/*! \brief internal seed to the random number generator */
uint32_t global_seed_;
/*! \brief CPU random number resources */
ResourceRandom<cpu> *cpu_rand_;
/*! \brief CPU temp space resources */
ResourceTempSpace<cpu> *cpu_space_;
#if MXNET_USE_CUDA
/*! \brief random number generator for GPU */
common::LazyAllocArray<ResourceRandom<gpu> > gpu_rand_;
/*! \brief temp space for GPU */
common::LazyAllocArray<ResourceTempSpace<gpu> > gpu_space_;
#endif
};
} // namespace resource
Expand Down
Loading

0 comments on commit d91a5fc

Please sign in to comment.