Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RELAY][OP] Dynamic conv2d batch size for cuda #6598

Merged
merged 1 commit into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions include/tvm/tir/expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -1085,9 +1085,11 @@ class Reduce : public PrimExpr {
/*! \brief Any shape. */
class AnyNode : public PrimExprNode {
public:
void VisitAttrs(AttrVisitor* v) {}
void VisitAttrs(AttrVisitor* v) { v->Visit("dtype", &dtype); }

bool SEqualReduce(const AnyNode* other, SEqualReducer equal) const { return true; }
bool SEqualReduce(const AnyNode* other, SEqualReducer equal) const {
return equal(dtype, other->dtype);
}

void SHashReduce(SHashReducer hash_reduce) const {}

Expand Down
4 changes: 3 additions & 1 deletion python/tvm/topi/cuda/conv2d_direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,6 @@ def schedule_direct_cuda(cfg, s, conv):

N, CO, OH, OW = get_const_tuple(output.shape)
_, KH, KW, CI = get_const_tuple(kernel.shape)
cfg.add_flop(2 * N * OH * OW * CO * CI * KH * KW)

if isinstance(N, int):
cfg.add_flop(2 * N * OH * OW * CO * CI * KH * KW)
14 changes: 12 additions & 2 deletions python/tvm/topi/cuda/conv2d_nhwc_winograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,15 @@ def nhwc_winograd_cuda(
tile_size = _infer_tile_size(data, kernel)
N, H, W, CI = get_const_tuple(data.shape)

if isinstance(N, tvm.tir.Any):
N = tvm.te.size_var("n")

if not isinstance(H, int) or not isinstance(W, int):
raise RuntimeError(
"cuda winograd nhwc conv2d doesn't support dynamic \
input height or width."
)

if isinstance(dilation, int):
dilation_h = dilation_w = dilation
else:
Expand Down Expand Up @@ -330,7 +339,7 @@ def nhwc_winograd_cuda(
H = (H + pt + pb - KH) // HSTR + 1
W = (W + pl + pr - KW) // WSTR + 1
nH, nW = (H + m - 1) // m, (W + m - 1) // m
P = N * nH * nW
P = N * nH * nW if isinstance(N, int) else nH * nW
zhiics marked this conversation as resolved.
Show resolved Hide resolved

# Determine whether the shape is available with tensorcore
shape_judge = (
Expand Down Expand Up @@ -432,7 +441,8 @@ def nhwc_winograd_cuda(
name="output",
tag="conv2d_nhwc_winograd",
)
cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)
if isinstance(N, int):
cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevinthesun @icemelon9 @comaniac is this okay to autotvm?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's okay in terms of the functionality, but the output message would be weird. Since the AutoTVM progress bar shows throughput instead of latency, users will always see 0 GFLOPS during the tuning process (https://github.com/apache/incubator-tvm/blob/master/python/tvm/autotvm/tuner/callback.py#L159).

Maybe we can still have the FLOPS with N=1 and pop a message saying we are tuning the kernel with N=1 but it can be used by the kernel with any batch size?

Copy link
Member Author

@zhiics zhiics Sep 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I thought about 1 as well. But it actually maybe not 1

Copy link
Contributor

@kevinthesun kevinthesun Oct 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine since generally AutoTVM can't be used for dynamic shape op. User won't see any flops info when N is symbolic.

return output


Expand Down
16 changes: 14 additions & 2 deletions python/tvm/topi/cuda/conv2d_winograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, out_dtype, pre_

N, CI, H, W = get_const_tuple(data.shape)

if isinstance(N, tvm.tir.Any):
N = tvm.te.size_var("n")

if not isinstance(H, int) or not isinstance(W, int):
raise RuntimeError(
"cuda winograd conv2d doesn't support dynamic input\
height or width."
)

if isinstance(dilation, int):
dilation_h = dilation_w = dilation
else:
Expand Down Expand Up @@ -73,7 +82,8 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, out_dtype, pre_
H = (H + pt + pb - KH) // HSTR + 1
W = (W + pl + pr - KW) // WSTR + 1
nH, nW = (H + m - 1) // m, (W + m - 1) // m
P = N * nH * nW

P = N * nH * nW if isinstance(N, int) else nH * nW
zhiics marked this conversation as resolved.
Show resolved Hide resolved

# transform kernel
if not pre_computed:
Expand Down Expand Up @@ -141,7 +151,9 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, out_dtype, pre_
name="output",
tag="conv2d_nchw_winograd",
)
cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)

if isinstance(N, int):
cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)

return output

Expand Down
6 changes: 5 additions & 1 deletion src/tir/ir/expr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,11 @@ TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
});

// Any
Any::Any() { data_ = make_object<AnyNode>(); }
Any::Any() {
auto n = make_object<AnyNode>();
n->dtype = DataType::Int(32);
data_ = std::move(n);
}

TVM_REGISTER_GLOBAL("tir.Any").set_body_typed([]() { return Any(); });

Expand Down
6 changes: 3 additions & 3 deletions tests/python/relay/test_any.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def verify_any_conv2d(


# TODO(@kevinthesun): Support dynamic input height and width.
# TODO(@kevinthesun): Support gpu to enable gpu tests.
@tvm.testing.uses_gpu
def test_any_conv2d():
verify_any_conv2d(
(relay.Any(), 64, 224, 224),
Expand Down Expand Up @@ -501,7 +501,7 @@ def verify_any_conv2d_NCHWc(


# TODO(@kevinthesun): Support dynamic input height and width.
# TODO(@kevinthesun): Support gpu to enable gpu tests.
@tvm.testing.uses_gpu
def test_any_conv2d_NCHWc():
verify_any_conv2d_NCHWc(
(relay.Any(), 8, 224, 224, 8),
Expand Down Expand Up @@ -563,7 +563,7 @@ def verify_any_conv2d_transpose_nchw(


# TODO(@kevinthesun): Support dynamic input height and width.
# TODO(@kevinthesun): Support gpu to enable gpu tests.
@tvm.testing.uses_gpu
def test_any_conv2d_transpose_nchw():
verify_any_conv2d_transpose_nchw(
(relay.Any(), 64, 224, 224),
Expand Down