[microNPU] Fix bug in channels extraction in the matcher (apache#11335)

* [microNPU] Fix bug in channels extraction in the matcher If the input tensor layout is in NHCWB16, we were passing W value instead of the channels to get_valid_block_configs. * Add test for conv2d
driazati · May 18, 2022 · 2b1e5ce · 2b1e5ce
1 parent f88a10f
commit 2b1e5ce
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 9 deletions.
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
@@ -287,7 +287,9 @@ def match_ethosu_conv2d(output_tensor, device_config):
     ifm_dtype = input_tensors[0].dtype
     ofm_dtype = output_tensor.dtype
 
-    ifm_channels = int(input_tensors[0].shape[3])
+    # Use channels from the weights tensor since that its shape doesn't change during layout
+    # conversion
+    ifm_channels = int(input_tensors[1].shape[3])
     ofm_channels, kernel_height, kernel_width = (int(axis) for axis in input_tensors[1].shape[0:3])
     kernel_elements = kernel_height * kernel_width
 

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py b/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
@@ -279,8 +279,7 @@ def match_ethosu_depthwise_conv2d(output_tensor, device_config):
     ifm_dtype = input_tensors[0].dtype
     ofm_dtype = output_tensor.dtype
 
-    ifm_channels = int(input_tensors[0].shape[3])
-    ofm_channels, kernel_height, kernel_width = (int(axis) for axis in input_tensors[1].shape[0:3])
+    channels, kernel_height, kernel_width = (int(axis) for axis in input_tensors[1].shape[0:3])
 
     subkernels = len(
         device_config.get_kernel_steps(depthwise2d.op.name, kernel_height, kernel_width, ifm_dtype)
@@ -294,8 +293,8 @@ def match_ethosu_depthwise_conv2d(output_tensor, device_config):
         propagators[0],
         depthwise2d.op.attrs,
         output_tensor.shape,
-        ofm_channels,
-        ifm_channels,
+        channels,
+        channels,
         output_layout,
         input_layout,
         ifm_dtype,

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/pooling.py b/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
@@ -239,8 +239,8 @@ def match_ethosu_pooling(output_tensor, device_config):
     ifm_dtype = input_tensors[0].dtype
     ofm_dtype = output_tensor.dtype
 
-    ifm_channels = int(input_tensors[0].shape[3])
-    ofm_channels = ifm_channels
+    # Use channels from a stage of TE graph where the IFM is always NHWC
+    channels = int(pool2d.shape[3])
     pool_shape_h = int(pool2d.op.attrs["pool_shape_h"])
     pool_shape_w = int(pool2d.op.attrs["pool_shape_w"])
 
@@ -256,8 +256,8 @@ def match_ethosu_pooling(output_tensor, device_config):
         propagators[0],
         pool2d.op.attrs,
         output_tensor.shape,
-        ofm_channels,
-        ifm_channels,
+        channels,
+        channels,
         output_layout,
         input_layout,
         ifm_dtype,

diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py
@@ -98,5 +98,85 @@ def test_ethosu_conv2d_matcher(
     assert part.propagators[2].offset == scale_bias_offset
 
 
+@pytest.mark.parametrize(
+    "ifm_layout, ofm_layout, ifm_channels, expected_cycles",
+    [
+        ("NHWC", "NHWC", 24, 2304),
+        ("NHCWB16", "NHWC", 12, 2352),
+        ("NHWC", "NHCWB16", 38, 7056),
+        ("NHCWB16", "NHCWB16", 55, 4608),
+    ],
+)
+def test_ethosu_conv2d_block_config_from_matcher(
+    ifm_layout, ofm_layout, ifm_channels, expected_cycles
+):
+    ofm_channels = 10
+    ifm_height = 123
+    ifm_width = 155
+
+    ifm_shape = (
+        (1, ifm_height, ifm_width, ifm_channels)
+        if ifm_layout == "NHWC"
+        else (1, ifm_height, 1 + ((ifm_channels - 1) // 16), ifm_width, 16)
+    )
+    weight_shape = (ofm_channels, 3, 3, ifm_channels)
+    scale_bias_shape = (ofm_channels, 10)
+
+    ifm = te.placeholder(ifm_shape, dtype="int8")
+    weight = te.placeholder(weight_shape, dtype="int8")
+    scale_bias = te.placeholder(scale_bias_shape, dtype="uint8")
+    lut = te.placeholder((), dtype="uint8")
+    out = conv2d_compute(
+        ifm=ifm,
+        weight=weight,
+        scale_bias=scale_bias,
+        lut=lut,
+        ifm_scale=1,
+        ifm_zero_point=0,
+        ofm_scale=1,
+        ofm_zero_point=0,
+        weight_zero_point=0,
+        strides=(1, 1),
+        padding=(0, 0, 0, 0),
+        dilation=(1, 1),
+        activation="NONE",
+        clip_min=0,
+        clip_max=0,
+        upscale="NONE",
+        rounding_mode="TFL",
+        ifm_layout=ifm_layout,
+        ofm_layout=ofm_layout,
+    )
+
+    device_config = cs.EthosuDeviceConfig("ethos-u55-256")
+    part = match_ethosu_conv2d(out, device_config)
+
+    ofm_shape = [int(i) for i in part.subgraph.output_tensor.shape]
+
+    # Add inputs and outputs to the part
+    input_tensor = cs.Tensor(ifm_shape, "int8")
+    part.set_input(0, input_tensor)
+    weight_tensor = cs.Tensor(weight_shape, "int8")
+    part.set_input(1, weight_tensor)
+    scale_bias_tensor = cs.Tensor(scale_bias_shape, "int8")
+    part.set_input(2, scale_bias_tensor)
+    output_tensor = cs.Tensor(ofm_shape, "int8")
+    part.set_output(output_tensor)
+
+    # Create a stripe of a size of the output tensor
+    order = [1, 2, 3, 4] if ofm_layout == "NHWC" else [1, 2, 4, 3, 0]
+    stripes = [1] * len(order)
+    offset = [0] * len(order)
+
+    stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset)
+
+    block = part.get_block_config(stripe_config)
+
+    # Since we dont know the values of the variables we passed to the get_valid_block_configs in
+    # the matcher, best we can do is to verify the compute cycle count since the channels have a
+    # significant effect on it
+    assert block.compute_cycles == expected_cycles
+
+
 if __name__ == "__main__":
     pytest.main([__file__])