Skip to content

Commit

Permalink
Restored previous state.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergey Shtin committed May 25, 2022
1 parent 08563ad commit 8cf733a
Showing 1 changed file with 15 additions and 33 deletions.
48 changes: 15 additions & 33 deletions python/tvm/topi/x86/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
from ..utils import get_const_int, const_vector


def _concat(a_tuple, axis=0):
"""Join a sequence of arrays along an existing axis.
def concatenate(data: tvm.te.Tensor, axis: Optional[int] = 0):
"""Join a sequence of arrays along an existing axis. Optimized for CPU exeution.
Parameters
----------
a_tuple : tuple of tvm.te.Tensor
data : tuple of tvm.te.Tensor
The arrays to concatenate
axis : int, optional
Expand All @@ -45,7 +45,7 @@ def gen_ir_1d(data_bufs, in_outers_tensor, in_cumsum_tensor, out_buf):
out_buf = i_b.buffer_ptr(out_buf)
outers = i_b.buffer_ptr(in_outers_tensor)
cumsum = i_b.buffer_ptr(in_cumsum_tensor)
for i in range(len(a_tuple)):
for i in range(len(data)):
with i_b.for_range(0, outers[i], name="j") as j:
out_buf[cumsum[i] + j] = data_bufs1[i][j]
return i_b.get()
Expand All @@ -60,39 +60,39 @@ def gen_ir(data_bufs, in_outers_tensor, in_cumsum_tensor, out_buf, inner, outer)
if inner > 1:
with i_b.for_range(0, inner, name="inn", kind="parallel") as inn:
pos = inn * outer
for i in range(len(a_tuple)):
for i in range(len(data)):
offset = inn * outers[i]
with i_b.for_range(0, outers[i], name="j") as j:
out_buf[pos + cumsum[i] + j] = data_bufs1[i][offset + j]
else:
for i in range(len(a_tuple)):
for i in range(len(data)):
with i_b.for_range(0, outers[i], name="j", kind="parallel") as j:
out_buf[cumsum[i] + j] = data_bufs1[i][j]
return i_b.get()

if axis < 0:
axis += len(a_tuple[0].shape)
concat_axis_sizes = [int(t.shape[axis]) for t in a_tuple]
axis += len(data[0].shape)
concat_axis_sizes = [int(t.shape[axis]) for t in data]
join_size = int(np.sum(concat_axis_sizes))
in_outers = [int(np.prod(i.shape[axis:])) for i in a_tuple]
in_outers = [int(np.prod(i.shape[axis:])) for i in data]
in_outers_cumsum = [0, *np.cumsum(in_outers, dtype="int64")[0:-1]]
dtype = a_tuple[0].dtype
out_shape = a_tuple[0].shape[:axis] + [join_size] + a_tuple[0].shape[axis + 1 :]
dtype = data[0].dtype
out_shape = data[0].shape[:axis] + [join_size] + data[0].shape[axis + 1 :]
in_outers_tensor = const_vector(in_outers)
in_cumsum_tensor = const_vector(in_outers_cumsum, name="cumsum")
right_val = np.prod(out_shape[axis:])
left_val = np.prod(out_shape[:axis])

if (
len(a_tuple[0].shape) == 1
len(data[0].shape) == 1
or right_val == 1
or (left_val == 1 and axis == len(a_tuple[0].shape) - 1)
or (left_val == 1 and axis == len(data[0].shape) - 1)
or (left_val == 1 and right_val == 1)
):
# badly parallelized case
return te.extern(
[out_shape],
list(a_tuple) + [in_outers_tensor, in_cumsum_tensor],
list(data) + [in_outers_tensor, in_cumsum_tensor],
lambda ins, outs: gen_ir_1d(ins, ins[-2], ins[-1], outs[0]),
dtype=dtype,
name="concatenate_ext",
Expand All @@ -102,26 +102,8 @@ def gen_ir(data_bufs, in_outers_tensor, in_cumsum_tensor, out_buf, inner, outer)
outer = get_const_int(int(right_val))
return te.extern(
[out_shape],
list(a_tuple) + [in_outers_tensor, in_cumsum_tensor],
list(data) + [in_outers_tensor, in_cumsum_tensor],
lambda ins, outs: gen_ir(ins, ins[-2], ins[-1], outs[0], inner, outer),
dtype=dtype,
name="concatenate_ext",
)


def concatenate(data: tvm.te.Tensor, axis: Optional[int] = 0):
"""Join a sequence of arrays along an existing axis. Optimized for CPU exeution.
Parameters
----------
data : tuple of tvm.te.Tensor
The arrays to concatenate
axis : int, optional
The axis along which the arrays will be joined. Default is 0.
Returns
-------
ret : tvm.te.Tensor
"""
return _concat(data, axis=axis)

0 comments on commit 8cf733a

Please sign in to comment.