From 76d6411f505cc39bdb6f0a159bc311f603152cc8 Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Thu, 21 Jul 2022 10:04:28 +0000 Subject: [PATCH] [ETHOSN] Get buffer sizes from the compiled network The NPU support library compiler sometimes adds padding to input tensors which means the buffer sizes calculated at runtime can sometimes be smaller than necessary. Instead, buffer sizes are now collected at compile time and passed to the runtime so that they match the sizes expected by the compiled network. This was seen when running a fully connected operation with an input that is not a multiple of 1024, so testing has been added to cover this case. Additionally changed the fully connected test case to use pytest parameterization as part of a general cleanup, and fixed the fully connected testing to support output channels > 1. Change-Id: Iad319d75326b9ac41950de982603660a084dc27b --- src/relay/backend/contrib/ethosn/codegen.cc | 17 ++++ .../backend/contrib/ethosn/codegen_ethosn.h | 13 +++ src/runtime/contrib/ethosn/ethosn_device.cc | 38 ++++---- src/runtime/contrib/ethosn/ethosn_device.h | 8 +- src/runtime/contrib/ethosn/ethosn_runtime.cc | 14 ++- src/runtime/contrib/ethosn/ethosn_runtime.h | 4 + .../test_ethosn/test_fullyconnected.py | 95 ++++++++++--------- .../contrib/test_ethosn/test_networks.py | 10 +- 8 files changed, 126 insertions(+), 73 deletions(-) diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc index 67ae1d20e3d0e..587c338205611 100644 --- a/src/relay/backend/contrib/ethosn/codegen.cc +++ b/src/relay/backend/contrib/ethosn/codegen.cc @@ -629,6 +629,7 @@ runtime::ethosn::OrderedCompiledNetwork EthosnCompiler::CompileEthosnFunc(const // Determine the order that the inputs/outputs are in and how that corresponds to the // order that the TVM runtime will expect them in auto input_output_order = GetInputOutputOrder(network_with_ids, compiled_network); + auto io_sizes = GetIOSizes(compiled_network); // Use the order information to create an 'ordered' network with includes how to map // the inputs/outputs from the TVM runtime to the inputs/outputs of the compiled network runtime::ethosn::OrderedCompiledNetwork ordered_network; @@ -636,6 +637,8 @@ runtime::ethosn::OrderedCompiledNetwork EthosnCompiler::CompileEthosnFunc(const ordered_network.compiled_cmm = std::move(compiled_network); ordered_network.inputs = input_output_order.first; ordered_network.outputs = input_output_order.second; + ordered_network.input_sizes = io_sizes.first; + ordered_network.output_sizes = io_sizes.second; return ordered_network; } @@ -686,6 +689,20 @@ std::pair, std::vector> EthosnCompiler::GetInput return std::make_pair(input_order, output_order); } +std::pair, std::vector> EthosnCompiler::GetIOSizes( + const std::unique_ptr& compiled_network) { + std::vector input_sizes; + std::vector output_sizes; + for (const sl::InputBufferInfo info : compiled_network->GetInputBufferInfos()) { + input_sizes.push_back(info.m_Size); + } + for (const sl::OutputBufferInfo info : compiled_network->GetOutputBufferInfos()) { + output_sizes.push_back(info.m_Size); + } + + return std::make_pair(input_sizes, output_sizes); +} + std::unique_ptr EthosnCompiler::m_Queries; EthosnError EthosnCompiler::SupportedSetup() { diff --git a/src/relay/backend/contrib/ethosn/codegen_ethosn.h b/src/relay/backend/contrib/ethosn/codegen_ethosn.h index 9da4e5b18bd5d..f3c9c77826e98 100644 --- a/src/relay/backend/contrib/ethosn/codegen_ethosn.h +++ b/src/relay/backend/contrib/ethosn/codegen_ethosn.h @@ -350,6 +350,19 @@ class EthosnCompiler { static std::pair, std::vector> GetInputOutputOrder( NetworkWithIDs network, const std::unique_ptr& compiled_network); + /*! + * \brief Determine the input and output sizes of a compiled network. + * + * These need to be queried from the compiled network as the compiler can choose + * to add additional padding on the input/output in certain cases. + * + * \param compiled_network The network compiled by the NPU compiler. + * \return Pair of vectors of buffer sizes for both the inputs and outputs of the + * network. + */ + static std::pair, std::vector> GetIOSizes( + const std::unique_ptr& compiled_network); + /*! * \brief Query interface used to determine if the Ethos-N hardware supports an operation * with the supplied parameters. diff --git a/src/runtime/contrib/ethosn/ethosn_device.cc b/src/runtime/contrib/ethosn/ethosn_device.cc index 98717036385da..5f657da1787ad 100644 --- a/src/runtime/contrib/ethosn/ethosn_device.cc +++ b/src/runtime/contrib/ethosn/ethosn_device.cc @@ -95,28 +95,28 @@ void CopyOutput(dl::Buffer* source_buffers[], std::vector* outputs) { } void CreateBuffers(std::vector >* fm, - const std::vector& tensors, bool input) { - int index = 0; - for (auto buffer : tensors) { - auto* data = static_cast(buffer->data); - // The NPU only needs the size of the tensor * uint8_t. - auto data_size = static_cast(GetDataSize(*buffer)); + const std::vector& tensors, const std::vector& tensor_sizes, + bool input) { + for (size_t i = 0; i < tensors.size(); i++) { + auto* data = static_cast(tensors[i]->data); if (input) { - (*fm)[index++] = std::make_shared(data, data_size, dl::DataFormat::NHWC); + (*fm)[i] = std::make_shared(data, tensor_sizes[i], dl::DataFormat::NHWC); } else { - (*fm)[index++] = std::make_shared(data_size, dl::DataFormat::NHWC); + (*fm)[i] = std::make_shared(tensor_sizes[i], dl::DataFormat::NHWC); } } } #if _ETHOSN_API_VERSION_ <= 2102 bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, - const std::vector& input_order, - const std::vector& output_order) { + const std::vector& input_order, const std::vector& output_order, + const std::vector& input_sizes, + const std::vector& output_sizes) { #else bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu, - const std::vector& input_order, - const std::vector& output_order) { + const std::vector& input_order, const std::vector& output_order, + const std::vector& input_sizes, + const std::vector& output_sizes) { #endif // Unpack parameters uint8_t argc = 0; @@ -133,11 +133,11 @@ bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu, // Set up input buffers std::vector > ifm(inputs.size()); - CreateBuffers(&ifm, inputs, true); + CreateBuffers(&ifm, inputs, input_sizes, true); // Set up output buffers std::vector > ofm(outputs.size()); - CreateBuffers(&ofm, outputs, false); + CreateBuffers(&ofm, outputs, output_sizes, false); // Raw pointers for the inference dl::Buffer* ifm_raw[inputs.size()]; @@ -222,12 +222,14 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result") // Allow the ethos-n support code to be tested without a device #if _ETHOSN_API_VERSION_ <= 2102 bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, - const std::vector& input_order, - const std::vector& output_order) { + const std::vector& input_order, const std::vector& output_order, + const std::vector& input_sizes, + const std::vector& output_sizes) { #else bool Inference(tvm::runtime::TVMArgs args, dl::Network* /* npu */, - const std::vector& input_order, - const std::vector& output_order) { + const std::vector& input_order, const std::vector& output_order, + const std::vector& input_sizes, + const std::vector& output_sizes) { #endif std::vector outputs; for (int argc = input_order.size(); argc < args.size(); argc++) { diff --git a/src/runtime/contrib/ethosn/ethosn_device.h b/src/runtime/contrib/ethosn/ethosn_device.h index d69be62aa603c..2d1e536ef8e78 100644 --- a/src/runtime/contrib/ethosn/ethosn_device.h +++ b/src/runtime/contrib/ethosn/ethosn_device.h @@ -41,10 +41,12 @@ using tvm::runtime::TVMArgs; #if _ETHOSN_API_VERSION_ <= 2102 bool Inference(TVMArgs args, sl::CompiledNetwork* npu, const std::vector& input_order, - const std::vector& output_order); + const std::vector& output_order, const std::vector& input_sizes, + const std::vector& output_sizes); #else -bool Inference(TVMArgs args, dl::Network* npu, const std::vector& input_order, - const std::vector& output_order); +bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu, + const std::vector& input_order, const std::vector& output_order, + const std::vector& input_sizes, const std::vector& output_sizes); #endif } // namespace ethosn diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.cc b/src/runtime/contrib/ethosn/ethosn_runtime.cc index 962d4db47eb93..295ff537b3795 100644 --- a/src/runtime/contrib/ethosn/ethosn_runtime.cc +++ b/src/runtime/contrib/ethosn/ethosn_runtime.cc @@ -60,6 +60,8 @@ EthosnModule::EthosnModule(std::vector* cmms) { #endif network_map_[it.name].inputs = it.inputs; network_map_[it.name].outputs = it.outputs; + network_map_[it.name].input_sizes = it.input_sizes; + network_map_[it.name].output_sizes = it.output_sizes; } } @@ -69,10 +71,12 @@ PackedFunc EthosnModule::GetFunction(const std::string& name, return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) { #if _ETHOSN_API_VERSION_ <= 2102 *rv = Inference(args, network_map_[name].compiled_cmm.get(), network_map_[name].inputs, - network_map_[name].outputs); + network_map_[name].outputs, network_map_[name].input_sizes, + network_map_[name].output_sizes); #else *rv = Inference(args, network_map_[name].runtime_cmm.get(), network_map_[name].inputs, - network_map_[name].outputs); + network_map_[name].outputs, network_map_[name].input_sizes, + network_map_[name].output_sizes); #endif }); } else { @@ -90,8 +94,10 @@ void EthosnModule::SaveToBinary(dmlc::Stream* stream) { stream->Write(ss.str()); stream->Write(it.second.inputs.size()); stream->Write(&it.second.inputs[0], sizeof(uint32_t) * it.second.inputs.size()); + stream->Write(&it.second.input_sizes[0], sizeof(uint32_t) * it.second.input_sizes.size()); stream->Write(it.second.outputs.size()); stream->Write(&it.second.outputs[0], sizeof(uint32_t) * it.second.outputs.size()); + stream->Write(&it.second.output_sizes[0], sizeof(uint32_t) * it.second.output_sizes.size()); } } @@ -128,12 +134,16 @@ Module EthosnModule::LoadFromBinary(void* strm) { compiled.inputs.resize(size); // Read the order of inputs stream->Read(&compiled.inputs[0], sizeof(uint32_t) * size); + compiled.input_sizes.resize(size); + stream->Read(&compiled.input_sizes[0], sizeof(uint32_t) * size); // Read the number of outputs stream->Read(&output_size); size = static_cast(output_size); compiled.outputs.resize(size); // Read the order of outputs stream->Read(&compiled.outputs[0], sizeof(uint32_t) * size); + compiled.output_sizes.resize(size); + stream->Read(&compiled.output_sizes[0], sizeof(uint32_t) * size); } auto n = make_object(&cmms); return Module(n); diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.h b/src/runtime/contrib/ethosn/ethosn_runtime.h index ed5d04143e8e5..b60250754b31e 100644 --- a/src/runtime/contrib/ethosn/ethosn_runtime.h +++ b/src/runtime/contrib/ethosn/ethosn_runtime.h @@ -52,6 +52,8 @@ struct OrderedCompiledNetwork { std::string name; std::vector inputs; std::vector outputs; + std::vector input_sizes; + std::vector output_sizes; }; class EthosnModule : public ModuleNode { @@ -88,8 +90,10 @@ class EthosnModule : public ModuleNode { * std::string : serialized command stream * size_t : number of inputs * std::vector : order of inputs + * std::vector : buffer sizes for inputs * size_t : number of outputs * std::vector : order of outputs + * std::vector : buffer sizes for outputs * ] * number of functions */ static Module LoadFromBinary(void* strm); diff --git a/tests/python/contrib/test_ethosn/test_fullyconnected.py b/tests/python/contrib/test_ethosn/test_fullyconnected.py index 7a636561f9cef..5e15e5a7c3423 100644 --- a/tests/python/contrib/test_ethosn/test_fullyconnected.py +++ b/tests/python/contrib/test_ethosn/test_fullyconnected.py @@ -42,9 +42,9 @@ def _get_model( units=weight_shape[0], out_dtype="int32", ) - b = tvm.nd.array(np.random.randint(0, high=255, size=(shape[0],), dtype="int32")) + b = tvm.nd.array(np.random.randint(0, high=255, size=(weight_shape[0],), dtype="int32")) biasc = relay.const(b, "int32") - bias = relay.nn.bias_add(fc, biasc, axis=0) + bias = relay.nn.bias_add(fc, biasc) req = relay.qnn.op.requantize( bias, relay.const(input_sc * kernel_sc, "float32"), # input zero scale @@ -58,55 +58,60 @@ def _get_model( @requires_ethosn -@pytest.mark.parametrize("dtype", ["uint8"]) -def test_fullyconnected(dtype): - zp_min = np.iinfo(dtype).min - zp_max = np.iinfo(dtype).max - trials = [ - ((1, 1024), zp_min + 71, 0.580, zp_max - 176, 1.498), - ((1, 4096), zp_min + 166, 1.724, zp_max - 138, 0.180), - ((1, 16384), zp_min + 101, 1.372, zp_max - 234, 1.346), - ] +@pytest.mark.parametrize( + "shape,out_channels", + [ + ((1, 1024), 64), + ((1, 16384), 1), + ((1, 1280), 1000), + ], +) +@pytest.mark.parametrize( + "dtype,input_zp,input_sc,kernel_zp,kernel_sc", + [ + ("uint8", 71, 0.580, 176, 1.498), + ("uint8", 166, 1.724, 138, 0.180), + ("int8", 71, 0.580, 0, 1.498), + ("int8", 120, 1.724, 0, 0.180), + ], +) +def test_fullyconnected(shape, out_channels, dtype, input_zp, input_sc, kernel_zp, kernel_sc): + """ + Test fully connected offloading. + """ np.random.seed(0) - for shape, input_zp, input_sc, kernel_zp, kernel_sc in trials: - kernel_zp = ( - 0 - if dtype == "int8" - else np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max) + 1 - ) - inputs = { - "a": tvm.nd.array( - np.random.randint( - np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype - ) - ), - } - outputs = [] - output_zp, output_sc = tei.get_conv2d_qnn_params( - dtype, + inputs = { + "a": tvm.nd.array( + np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype) + ), + } + + outputs = [] + output_zp, output_sc = tei.get_conv2d_qnn_params( + dtype, + input_zp, + input_sc, + kernel_zp, + kernel_sc, + shape[0], + shape[1], + 1, + ) + for npu in [False, True]: + model, params = _get_model( + shape, + (out_channels, shape[1]), input_zp, input_sc, kernel_zp, kernel_sc, - shape[0], - shape[1], - 1, + output_zp, + output_sc, + dtype, ) - for npu in [False, True]: - model, params = _get_model( - shape, - shape, - input_zp, - input_sc, # input zp, sc - kernel_zp, - kernel_sc, # kernel - output_zp, - output_sc, # output - dtype, - ) - mod = tei.make_module(model, params) - outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu)) - tei.verify(outputs, dtype, 1) + mod = tei.make_module(model, params) + outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu)) + tei.verify(outputs, dtype, 1) @requires_ethosn diff --git a/tests/python/contrib/test_ethosn/test_networks.py b/tests/python/contrib/test_ethosn/test_networks.py index 143ec0b88dfe4..3ab609afd9fb5 100644 --- a/tests/python/contrib/test_ethosn/test_networks.py +++ b/tests/python/contrib/test_ethosn/test_networks.py @@ -125,7 +125,7 @@ def test_mobilenet_v1(): # on hardware that isn't available in CI. _compile_hash = {"393a19dfb980345cdd3bbeddbc36424d"} if tei.get_ethosn_api_version() == 2111: - _compile_hash = {"5d1c6a6bd4df8963866cc90405bf92dd"} + _compile_hash = {"c523c3c2bb9add1fee508217eb73af1a"} if tei.get_ethosn_api_version() == 2102: _compile_hash = {"46ccafc840633633aca441645e41b444"} if tei.get_ethosn_variant() == "Ethos-N78_1TOPS_2PLE_RATIO": @@ -152,7 +152,7 @@ def test_resnet_50_int8(): # on hardware that isn't available in CI. if tei.get_ethosn_api_version() > 2011: if tei.get_ethosn_variant() == "Ethos-N78_1TOPS_2PLE_RATIO": - _compile_hash = {"c0a01c547ed1b2e3308094508fa1bfea", "434f0c65c41e24d5482142c88b3438fe"} + _compile_hash = {"60404ad60fc2bfbb68464d8a14cc0452", "5b9d72b9accfea7ed89eb09ca0aa5487"} _test_image_network( model_url="https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/" "models/Quantized/resnet_50_quantized.tflite", @@ -174,7 +174,7 @@ def test_inception_v3(): # on hardware that isn't available in CI. _compile_hash = {"2c7ff5487e1a21e62b3b42eec624fed4"} if tei.get_ethosn_api_version() == 2111: - _compile_hash = {"e6abe33a7bc4a4170da53eefa6577bba"} + _compile_hash = {"88db2c7928240be9833c1b5ef367de28"} if tei.get_ethosn_api_version() == 2102: _compile_hash = {"43dc2097127eb224c0191b1a15f8acca"} if tei.get_ethosn_variant() == "Ethos-N78_1TOPS_2PLE_RATIO": @@ -200,7 +200,7 @@ def test_inception_v4(): # on hardware that isn't available in CI. _compile_hash = {"4245dbd02e1432dc261a67fc8e632a00"} if tei.get_ethosn_api_version() == 2111: - _compile_hash = {"42e43c323ed8202f7b720ba9029bbcb7"} + _compile_hash = {"37648682f97cbbcecdc13945b7f2212f"} if tei.get_ethosn_api_version() == 2102: _compile_hash = {"fab6c2297502f95d33079c6ce1a737f9"} if tei.get_ethosn_variant() == "Ethos-N78_1TOPS_2PLE_RATIO": @@ -227,7 +227,7 @@ def test_ssd_mobilenet_v1(): _compile_hash = {"5ee8ed6af9a7f31fc14957b51a8e7423", "e6a91ccc47ba4c6b4614fcd676bd726f"} if tei.get_ethosn_api_version() == 2111: # TODO(Leo-arm): review split operator - _compile_hash = {"a37f900601b9493bd142e8aed16205e5", "afb68ca8f452d1f4a674b457b5e30f59"} + _compile_hash = {"6b699f94795785d31b39940a5cf84a81", "7b8b0a3ad7cfe1695dee187f21f03785"} if tei.get_ethosn_api_version() == 2102: _compile_hash = {"7795b6c67178da9d1f9b98063bad75b1", "10826406ae724e52f360a06c35ced09d"} if tei.get_ethosn_variant() == "Ethos-N78_1TOPS_2PLE_RATIO":