#14049: making this branch up to date with conv.cpp

tenstorrent · Nov 28, 2024 · 4ce0d84 · 4ce0d84
1 parent 06293b4
commit 4ce0d84
Show file tree

Hide file tree

Showing 34 changed files with 618 additions and 476 deletions.
diff --git a/models/demos/convnet_mnist/tt/convnet_mnist.py b/models/demos/convnet_mnist/tt/convnet_mnist.py
@@ -50,8 +50,6 @@ def convnet_mnist(
         conv_op_cache={},
         debug=True,
         groups=1,
-        return_output_size=True,
-        return_prepared_device_weights=True,
     )
     x = ttnn.relu(x)
 
@@ -95,8 +93,6 @@ def convnet_mnist(
         conv_op_cache={},
         debug=False,
         groups=1,
-        return_output_size=True,
-        return_prepared_device_weights=True,
     )
 
     x = ttnn.relu(x)

diff --git a/models/demos/segformer/tt/common.py b/models/demos/segformer/tt/common.py
@@ -72,8 +72,6 @@ def __call__(self, device, input_tensor):
             input_width=input_tensor.shape[2],
             conv_config=conv_config,
             groups=self.groups,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         return output_tensor, _out_height, _out_width
diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_large_new_conv_api.py
@@ -167,7 +167,7 @@ def run_downsample_if_req(
             shard_layout = (
                 ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
             )
-            ds_out, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
+            ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
                 input_tensor=x,
                 weight_tensor=self.ds_conv_weight_tensor,
                 in_channels=self.ds_conv_input_channels,
@@ -190,7 +190,6 @@ def run_downsample_if_req(
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
                 conv_op_cache=conv_op_cache,
-                return_prepared_device_weights=True,
             )
             ttnn.deallocate(x)
             ds_out = ttnn.reallocate(ds_out)
@@ -233,14 +232,12 @@ def __call__(
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                 math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         act_block_h_override = 0
@@ -301,19 +298,17 @@ def __call__(
                 deallocate_activation=True,
                 reallocate_halo_output=reallocate_halo_output,
                 act_block_h_override=act_block_h_override,
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         # conv3 is 1x1 conv
         # print("Running conv3")
-        out, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
+        out, _, _, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv3_weight_tensor,
             in_channels=self.conv3_input_channels,
@@ -330,13 +325,12 @@ def __call__(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                 math_fidelity=self.model_config["MATH_FIDELITY"],
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
             conv_op_cache=conv_op_cache,
-            return_prepared_device_weights=True,
         )
 
         if not self.run_downsample_before_conv2:
@@ -575,8 +569,6 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 act_block_h_override=act_block_h_override,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
         # Relu is fused with conv1
 
@@ -888,8 +880,6 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 act_block_h_override=act_block_h_override,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
         # Relu is fused with conv1
 

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py
@@ -160,7 +160,7 @@ def run_downsample_if_req(
     ):
         if self.downsample:
             logger.debug(f"Running downsample")
-            ds_out, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
+            ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
                 input_tensor=x,
                 weight_tensor=self.ds_conv_weight_tensor,
                 in_channels=self.ds_conv_input_channels,
@@ -177,11 +177,9 @@ def run_downsample_if_req(
                     dtype=self.model_config["ACTIVATIONS_DTYPE"],
                     weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                     math_fidelity=self.model_config["MATH_FIDELITY"],
-                    shard_layout=(
-                        ttnn.TensorMemoryLayout.HEIGHT_SHARDED
-                        if height_sharding
-                        else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                    ),
+                    shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                    if height_sharding
+                    else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                     deallocate_activation=True,
                     reallocate_halo_output=not (is_wormhole_b0() and batch_size == 16),
                     reshard_if_not_optimal=reshard_if_not_optimal,
@@ -197,7 +195,6 @@ def run_downsample_if_req(
                     enable_subblock_padding=enable_subblock_padding,
                 ),
                 conv_op_cache=conv_op_cache,
-                return_prepared_device_weights=True,
             )
             ttnn.deallocate(x)
             ds_out = ttnn.reallocate(ds_out)
@@ -247,16 +244,14 @@ def __call__(
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                 math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=transpose_shards,
                 packer_l1_accum_enabled=packer_l1_acc,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         act_block_h_override = 0
@@ -333,9 +328,9 @@ def __call__(
                 deallocate_activation=True,
                 reallocate_halo_output=reallocate_halo_output,
                 act_block_h_override=act_block_h_override,
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=transpose_shards,
                 packer_l1_accum_enabled=packer_l1_acc,
@@ -345,8 +340,6 @@ def __call__(
                 enable_subblock_padding=enable_subblock_padding,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         logger.debug(
@@ -382,16 +375,14 @@ def __call__(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                 math_fidelity=self.model_config["MATH_FIDELITY"],
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
                 transpose_shards=transpose_shards,
                 packer_l1_accum_enabled=packer_l1_acc,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         if not run_downsample_before_conv2:
@@ -743,8 +734,6 @@ def run(self, input_tensor, device, ops_parallel_config, conv_op_cache={}) -> tt
             input_width=self.conv1_input_width,
             conv_config=self.conv1_config,
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
         # Relu is fused with conv1
         if self.batch_size == 20:

diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_xlarge_new_conv_api.py
@@ -162,7 +162,7 @@ def run_downsample_if_req(
         height_sharding=None,
     ):
         if self.downsample:
-            ds_out, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
+            ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
                 input_tensor=x,
                 weight_tensor=self.ds_conv_weight_tensor,
                 in_channels=self.ds_conv_input_channels,
@@ -179,17 +179,14 @@ def run_downsample_if_req(
                     dtype=self.model_config["ACTIVATIONS_DTYPE"],
                     weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                     math_fidelity=self.model_config["MATH_FIDELITY"],
-                    shard_layout=(
-                        ttnn.TensorMemoryLayout.HEIGHT_SHARDED
-                        if height_sharding
-                        else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                    ),
+                    shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                    if height_sharding
+                    else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                     deallocate_activation=True,
                     reallocate_halo_output=True,
                     reshard_if_not_optimal=reshard_if_not_optimal,
                 ),
                 conv_op_cache=conv_op_cache,
-                return_prepared_device_weights=True,
             )
             ttnn.deallocate(x)
             ds_out = ttnn.reallocate(ds_out)
@@ -230,14 +227,12 @@ def __call__(
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                 math_fidelity=self.model_config["MATH_FIDELITY"],
                 activation="relu",
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         act_block_h_override = 0
@@ -296,19 +291,17 @@ def __call__(
                 deallocate_activation=True,
                 reallocate_halo_output=reallocate_halo_output,
                 act_block_h_override=act_block_h_override,
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
 
         # conv3 is 1x1 conv
         # print("Running conv3")
-        out, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
+        out, _, _, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
             input_tensor=out,
             weight_tensor=self.conv3_weight_tensor,
             in_channels=self.conv3_input_channels,
@@ -325,13 +318,12 @@ def __call__(
                 dtype=self.model_config["ACTIVATIONS_DTYPE"],
                 weights_dtype=self.model_config["WEIGHTS_DTYPE"],
                 math_fidelity=self.model_config["MATH_FIDELITY"],
-                shard_layout=(
-                    ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
-                ),
+                shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
+                if height_sharding
+                else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
                 reshard_if_not_optimal=reshard_if_not_optimal,
             ),
             conv_op_cache=conv_op_cache,
-            return_prepared_device_weights=True,
         )
 
         if not self.run_downsample_before_conv2:
@@ -547,8 +539,6 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
                 act_block_h_override=act_block_h_override,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
         # Relu is fused with conv1
 
@@ -852,8 +842,6 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
                 act_block_h_override=act_block_h_override,
             ),
             conv_op_cache=conv_op_cache,
-            return_output_size=True,
-            return_prepared_device_weights=True,
         )
         # Relu is fused with conv1