Skip to content

Commit

Permalink
[Relay] Fix invalid shape function for "copy" operator
Browse files Browse the repository at this point in the history
The 'script' for of the shape function was ill-formed,
resulting in a TIR shape function which did not assign
to it's output, which in turn caused either OOM or
assert fails as uninitialized dimensions worked their
way downstream. That fix is in python/tvm/relay/op/tensor.py.

Everything else is for testing and debugging as I tracked
this down.

Special thanks to Lily for helping me with the scalar vs
tensor switch in the copy shape function.

[This is CORE-112 in OctoML JIRA.]
  • Loading branch information
mbs-octoml committed Dec 17, 2021
1 parent bd61d18 commit e10ae1a
Show file tree
Hide file tree
Showing 10 changed files with 364 additions and 113 deletions.
54 changes: 54 additions & 0 deletions include/tvm/runtime/debug.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file tvm/runtime/debug.h
* \brief Helpers for debugging at runtime.
*/
#ifndef TVM_RUNTIME_DEBUG_H_
#define TVM_RUNTIME_DEBUG_H_

#include <tvm/runtime/container/adt.h>
#include <tvm/runtime/ndarray.h>

#include <ostream>
#include <string>

namespace tvm {
namespace runtime {

/*!
* \brief Helpers to describe runtime objects in human-friendly form. For \p nd_arrays we show their
* shapes and dtypes, but also their contents if 'small' and on the \p host_device (mostly so that
* we can see dynamic shapes as they are computed). For \p adts we show the ADT fields. For
* \p objects we dispatch to one of the above as appropriate.
*/
void AppendNDArray(std::ostream& os, const NDArray& nd_array, const DLDevice& host_device,
bool show_content = true);
void AppendADT(std::ostream& os, const ADT& adt, const DLDevice& host_device,
bool show_content = true);
void AppendRuntimeObject(std::ostream& os, const ObjectRef& object, const DLDevice& host_device,
bool show_content = true);
std::string RuntimeObject2String(const ObjectRef& object, const DLDevice& host_device,
bool show_content = true);

} // namespace runtime
} // namespace tvm

#endif // TVM_RUNTIME_DEBUG_H_
19 changes: 16 additions & 3 deletions python/tvm/relay/op/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1178,16 +1178,29 @@ def copy(data):


@script
def _copy_shape_func(data_shape):
return data_shape
def _copy_shape_func_tensor(data_shape):
ndim = data_shape.shape[0]
out = output_tensor((ndim,), "int64")
for i in const_range(ndim):
out[i] = data_shape[i]
return out


@script
def _copy_shape_func_scalar(data_shape):
out = output_tensor((), "int64")
return out


@reg.register_shape_func("copy", False)
def copy_shape_func(attrs, inputs, _):
"""
Shape function for copy op.
"""
return [_copy_shape_func(inputs[0])]
input = inputs[0]
if len(input.shape) == 0:
return [_copy_shape_func_scalar(input)]
return [_copy_shape_func_tensor(input)]


def device_copy(data, src_device, dst_device):
Expand Down
2 changes: 1 addition & 1 deletion src/relay/backend/te_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ class TECompilerImpl : public TECompilerNode {

// implement lowered shape func
CCacheValue LowerShapeFuncInternal(const CCacheKey& key) {
VLOG(1) << "lowering dynamic shape function:" << std::endl
VLOG(1) << "lowering dynamic shape function for:" << std::endl
<< PrettyPrint(key->source_func) << std::endl
<< "for target:" << std::endl
<< key->target->ToDebugString();
Expand Down
31 changes: 16 additions & 15 deletions src/relay/backend/te_compiler_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class ScheduleBuilder : public backend::MemoizedExprTranslator<Array<te::Tensor>
candidate_name = truncated_name.str();
}

// TODO(mbs): This should be the definititive global by which the PrimFunc is known and
// TODO(mbs): This should be the definitive global by which the PrimFunc is known and
// no other GlobalVar ctors should appear inside the lowering machinery.
auto prim_fn_var = GlobalVar(renamer(candidate_name));
prim_fn_var->checked_type_ = relay_func->checked_type();
Expand Down Expand Up @@ -371,6 +371,7 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>

CachedFunc Create(const Function& prim_func, const Target& target,
std::function<std::string(std::string)> renamer) {
VLOG_CONTEXT << "MakeShapeFunc";
TShapeDataDependent shape_func_param_states;

for (auto param : prim_func->params) {
Expand Down Expand Up @@ -399,11 +400,12 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
// Setup the name;
readable_name_stream_ << "shape_func";

// Create the `te::Tensor`s which represent the output.
auto outputs = VisitExpr(prim_func->body);
// Create the tensor expressions representing the output shapes.
Array<te::Tensor> outputs = VisitExpr(prim_func->body);

// Generate a name.
auto candidate_name = readable_name_stream_.str();

constexpr static size_t kMaxFuncNameLength = 80;
// WARNING: Please make sure to also update TVM_CRT_MAX_STRLEN_FUNCTION_NAME
// whenever the value of kMaxFuncNameLength changes
Expand Down Expand Up @@ -463,7 +465,7 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
for (auto t : outputs) {
out_ops.push_back(t->op);
}
auto schedule = te::create_schedule(out_ops);
te::Schedule schedule = te::create_schedule(out_ops);
tvm::te::AutoInlineInjective(schedule);
for (const auto& scalar : scalars_) {
auto scalar_op = scalar->op;
Expand Down Expand Up @@ -589,12 +591,15 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
}

Array<te::Tensor> VisitExpr_(const CallNode* call_node) final {
VLOG(1) << "considering call:" << std::endl << PrettyPrint(GetRef<Call>(call_node));
if (auto* func = call_node->op.as<FunctionNode>()) {
VLOG(1) << "user function";
for (size_t i = 0; i < func->params.size(); ++i) {
param_arg_map_[func->params[i]] = call_node->args[i];
}
return VisitExpr(func->body);
}

static auto fshape_func = Op::GetAttrMap<FShapeFunc>("FShapeFunc");
static auto tshape_data_dependent = Op::GetAttrMap<TShapeDataDependent>("TShapeDataDependent");
ICHECK(call_node->op.as<OpNode>()) << "Primitive function only allows call into primitive ops";
Expand Down Expand Up @@ -635,20 +640,16 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator<Array<te::Tensor>>
// Get output ndims
auto ret_type = call_node->checked_type();
Array<IndexExpr> out_ndims;
if (const auto* ttype = ret_type.as<TensorTypeNode>()) {
for (const auto& ttype : FlattenTupleType(ret_type)) {
out_ndims.push_back(IntImm(DataType::Int(32), ttype->shape.size()));
} else {
auto rtype = ret_type.as<TupleTypeNode>();
// TODO(@icemelon): Allow recursive tuple
ICHECK(rtype);
for (size_t i = 0; i < rtype->fields.size(); ++i) {
auto ttype = rtype->fields[i].as<TensorTypeNode>();
ICHECK(ttype);
out_ndims.push_back(IntImm(DataType::Int(32), ttype->shape.size()));
}
}

// Call shape function
auto outputs = fshape_func[op](call_node->attrs, inputs, out_ndims);
Array<te::Tensor> outputs = fshape_func[op](call_node->attrs, inputs, out_ndims);
VLOG(1) << "shape function for '" << op->name << "' with inputs:" << std::endl
<< inputs << std::endl
<< "yielded outputs:" << std::endl
<< outputs;
readable_name_stream_ << "_" << op->name;
return outputs;
}
Expand Down
128 changes: 128 additions & 0 deletions src/runtime/debug.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file src/runtime/debug.cc
* \brief Helpers for debugging at runtime.
*/

#include <tvm/runtime/debug.h>

namespace tvm {
namespace runtime {

template <typename T>
void AppendMembers(std::ostream& os, const NDArray& nd_array, int64_t dim0) {
os << "=[";
for (int64_t i = 0; i < dim0; ++i) {
if (i > 0) {
os << ",";
}
os << reinterpret_cast<T*>(nd_array->data)[i];
}
os << "]";
}

void AppendNDArray(std::ostream& os, const NDArray& nd_array, const DLDevice& host_device,
bool show_contents) {
os << "NDArray[";
os << "(";
for (int dim = 0; dim < nd_array->ndim; ++dim) {
if (dim > 0) {
os << ",";
}
os << nd_array->shape[dim];
}
std::string basic_type = DLDataType2String(nd_array->dtype);
os << ")," << basic_type;
os << ",(" << nd_array->device.device_type;
os << "," << nd_array->device.device_id;
os << ")]";
if (show_contents && nd_array->device.device_type == host_device.device_type &&
nd_array->device.device_id == host_device.device_id) {
int64_t dim0;
if (nd_array->ndim == 0) {
dim0 = 1;
} else if (nd_array->ndim == 1) {
dim0 = nd_array->shape[0];
if (dim0 > 10) {
// Too large.
dim0 = 0;
}
} else {
// Not rank-1.
dim0 = 0;
}
if (dim0 > 0) {
if (basic_type == "bool") {
AppendMembers<bool>(os, nd_array, dim0);
} else if (basic_type == "int8") {
AppendMembers<int8_t>(os, nd_array, dim0);
} else if (basic_type == "int16") {
AppendMembers<int16_t>(os, nd_array, dim0);
} else if (basic_type == "int32") {
AppendMembers<int32_t>(os, nd_array, dim0);
} else if (basic_type == "int64") {
AppendMembers<int64_t>(os, nd_array, dim0);
} else if (basic_type == "uint8") {
AppendMembers<uint8_t>(os, nd_array, dim0);
} else if (basic_type == "uint16") {
AppendMembers<uint16_t>(os, nd_array, dim0);
} else if (basic_type == "uint32") {
AppendMembers<uint32_t>(os, nd_array, dim0);
} else if (basic_type == "uint64") {
AppendMembers<uint64_t>(os, nd_array, dim0);
} else if (basic_type == "float32") {
AppendMembers<float>(os, nd_array, dim0);
} else if (basic_type == "float64") {
AppendMembers<double>(os, nd_array, dim0);
}
}
}
}

void AppendADT(std::ostream& os, const ADT& adt, const DLDevice& host_device, bool show_contents) {
os << "ADT(" << adt->tag;
for (size_t i = 0; i < adt->size; ++i) {
os << ",";
AppendRuntimeObject(os, adt[i], host_device, show_contents);
}
os << ")";
}

void AppendRuntimeObject(std::ostream& os, const ObjectRef& object, const DLDevice& host_device,
bool show_contents) {
if (const auto* adt_obj = object.as<ADTObj>()) {
AppendADT(os, GetRef<ADT>(adt_obj), host_device, show_contents);
} else if (const auto* nd_array_cont = object.as<NDArray::Container>()) {
AppendNDArray(os, GetRef<NDArray>(nd_array_cont), host_device, show_contents);
} else {
os << "?";
}
}

std::string RuntimeObject2String(const ObjectRef& object, const DLDevice& host_device,
bool show_contents) {
std::ostringstream os;
AppendRuntimeObject(os, object, host_device, show_contents);
return os.str();
}

} // namespace runtime
} // namespace tvm
19 changes: 3 additions & 16 deletions src/runtime/vm/executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <dmlc/memory_io.h>
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/debug.h>
#include <tvm/runtime/registry.h>
#include <tvm/runtime/vm/executable.h>
#include <tvm/runtime/vm/vm.h>
Expand Down Expand Up @@ -171,27 +172,13 @@ std::string Executable::GetBytecode() const {
return oss.str();
}

namespace {
String ShapeString(const ShapeTuple& shape_tuple, DLDataType dtype) {
std::stringstream sizes;
sizes << DLDataType2String(dtype) << "[";
for (size_t i = 0; i < shape_tuple.size(); i++) {
if (i != 0) {
sizes << ", ";
}
sizes << shape_tuple.data()[i];
}
sizes << "]";
return String(sizes.str());
}
} // namespace

std::string Executable::GetConstants() const {
std::ostringstream oss;
for (size_t i = 0; i < constants.size(); ++i) {
const auto& constant = constants[i];
auto ndarray = Downcast<NDArray>(constant);
oss << "VM Const[" << i << "]: has shape " << ShapeString(ndarray.Shape(), ndarray->dtype)
oss << "VM Const[" << i
<< "]: " << RuntimeObject2String(ndarray, virtual_devices[host_device_index])
<< " on device index " << const_device_indexes[i] << std::endl;
}
return oss.str();
Expand Down
Loading

0 comments on commit e10ae1a

Please sign in to comment.