Skip to content

Commit

Permalink
#14049: making this branch up to date with conv.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
shwetankTT committed Nov 28, 2024
1 parent 06293b4 commit 4ce0d84
Show file tree
Hide file tree
Showing 34 changed files with 618 additions and 476 deletions.
4 changes: 0 additions & 4 deletions models/demos/convnet_mnist/tt/convnet_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ def convnet_mnist(
conv_op_cache={},
debug=True,
groups=1,
return_output_size=True,
return_prepared_device_weights=True,
)
x = ttnn.relu(x)

Expand Down Expand Up @@ -95,8 +93,6 @@ def convnet_mnist(
conv_op_cache={},
debug=False,
groups=1,
return_output_size=True,
return_prepared_device_weights=True,
)

x = ttnn.relu(x)
Expand Down
2 changes: 0 additions & 2 deletions models/demos/segformer/tt/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ def __call__(self, device, input_tensor):
input_width=input_tensor.shape[2],
conv_config=conv_config,
groups=self.groups,
return_output_size=True,
return_prepared_device_weights=True,
)

return output_tensor, _out_height, _out_width
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def run_downsample_if_req(
shard_layout = (
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
)
ds_out, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
input_tensor=x,
weight_tensor=self.ds_conv_weight_tensor,
in_channels=self.ds_conv_input_channels,
Expand All @@ -190,7 +190,6 @@ def run_downsample_if_req(
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_prepared_device_weights=True,
)
ttnn.deallocate(x)
ds_out = ttnn.reallocate(ds_out)
Expand Down Expand Up @@ -233,14 +232,12 @@ def __call__(
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)

act_block_h_override = 0
Expand Down Expand Up @@ -301,19 +298,17 @@ def __call__(
deallocate_activation=True,
reallocate_halo_output=reallocate_halo_output,
act_block_h_override=act_block_h_override,
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)

# conv3 is 1x1 conv
# print("Running conv3")
out, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
out, _, _, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
input_tensor=out,
weight_tensor=self.conv3_weight_tensor,
in_channels=self.conv3_input_channels,
Expand All @@ -330,13 +325,12 @@ def __call__(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_prepared_device_weights=True,
)

if not self.run_downsample_before_conv2:
Expand Down Expand Up @@ -575,8 +569,6 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
act_block_h_override=act_block_h_override,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)
# Relu is fused with conv1

Expand Down Expand Up @@ -888,8 +880,6 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
act_block_h_override=act_block_h_override,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)
# Relu is fused with conv1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def run_downsample_if_req(
):
if self.downsample:
logger.debug(f"Running downsample")
ds_out, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
input_tensor=x,
weight_tensor=self.ds_conv_weight_tensor,
in_channels=self.ds_conv_input_channels,
Expand All @@ -177,11 +177,9 @@ def run_downsample_if_req(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
deallocate_activation=True,
reallocate_halo_output=not (is_wormhole_b0() and batch_size == 16),
reshard_if_not_optimal=reshard_if_not_optimal,
Expand All @@ -197,7 +195,6 @@ def run_downsample_if_req(
enable_subblock_padding=enable_subblock_padding,
),
conv_op_cache=conv_op_cache,
return_prepared_device_weights=True,
)
ttnn.deallocate(x)
ds_out = ttnn.reallocate(ds_out)
Expand Down Expand Up @@ -247,16 +244,14 @@ def __call__(
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=transpose_shards,
packer_l1_accum_enabled=packer_l1_acc,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)

act_block_h_override = 0
Expand Down Expand Up @@ -333,9 +328,9 @@ def __call__(
deallocate_activation=True,
reallocate_halo_output=reallocate_halo_output,
act_block_h_override=act_block_h_override,
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=transpose_shards,
packer_l1_accum_enabled=packer_l1_acc,
Expand All @@ -345,8 +340,6 @@ def __call__(
enable_subblock_padding=enable_subblock_padding,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)

logger.debug(
Expand Down Expand Up @@ -382,16 +375,14 @@ def __call__(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=transpose_shards,
packer_l1_accum_enabled=packer_l1_acc,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)

if not run_downsample_before_conv2:
Expand Down Expand Up @@ -743,8 +734,6 @@ def run(self, input_tensor, device, ops_parallel_config, conv_op_cache={}) -> tt
input_width=self.conv1_input_width,
conv_config=self.conv1_config,
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)
# Relu is fused with conv1
if self.batch_size == 20:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def run_downsample_if_req(
height_sharding=None,
):
if self.downsample:
ds_out, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
ds_out, _, _, self.ds_conv_weight_tensor, self.ds_conv_bias_tensor = ttnn.conv2d(
input_tensor=x,
weight_tensor=self.ds_conv_weight_tensor,
in_channels=self.ds_conv_input_channels,
Expand All @@ -179,17 +179,14 @@ def run_downsample_if_req(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
deallocate_activation=True,
reallocate_halo_output=True,
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_prepared_device_weights=True,
)
ttnn.deallocate(x)
ds_out = ttnn.reallocate(ds_out)
Expand Down Expand Up @@ -230,14 +227,12 @@ def __call__(
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)

act_block_h_override = 0
Expand Down Expand Up @@ -296,19 +291,17 @@ def __call__(
deallocate_activation=True,
reallocate_halo_output=reallocate_halo_output,
act_block_h_override=act_block_h_override,
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)

# conv3 is 1x1 conv
# print("Running conv3")
out, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
out, _, _, self.conv3_weight_tensor, self.conv3_bias_tensor = ttnn.conv2d(
input_tensor=out,
weight_tensor=self.conv3_weight_tensor,
in_channels=self.conv3_input_channels,
Expand All @@ -325,13 +318,12 @@ def __call__(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=(
ttnn.TensorMemoryLayout.HEIGHT_SHARDED if height_sharding else ttnn.TensorMemoryLayout.BLOCK_SHARDED
),
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
conv_op_cache=conv_op_cache,
return_prepared_device_weights=True,
)

if not self.run_downsample_before_conv2:
Expand Down Expand Up @@ -547,8 +539,6 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
act_block_h_override=act_block_h_override,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)
# Relu is fused with conv1

Expand Down Expand Up @@ -852,8 +842,6 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
act_block_h_override=act_block_h_override,
),
conv_op_cache=conv_op_cache,
return_output_size=True,
return_prepared_device_weights=True,
)
# Relu is fused with conv1

Expand Down
Loading

0 comments on commit 4ce0d84

Please sign in to comment.