Skip to content

Commit

Permalink
initialization bug fixed in cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed May 29, 2021
1 parent 5623e3f commit c5718e2
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions python/tvm/topi/cuda/nms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,18 @@ def _collect_selected_indices_and_scores_ir(
ib.scope_attr(by, "thread_extent", nthread_by)
zero = cast(0, "int64")

with ib.new_scope():
idx = bx * nthread_tx + tx
idy = cast(by, "int64")
batch_id = idy // num_class
class_id = idy % num_class

with ib.if_scope(idx < num_boxes):
offset = idx + class_id * num_boxes
collected_indices[batch_id, offset, 0] = zero
collected_indices[batch_id, offset, 1] = zero
collected_scores[batch_id, offset] = -1.0

with ib.new_scope():
idx = bx * nthread_tx + tx
idy = cast(by, "int64")
Expand All @@ -1033,11 +1045,6 @@ def _collect_selected_indices_and_scores_ir(
collected_indices[batch_id, offset, 0] = class_id
collected_indices[batch_id, offset, 1] = cast(selected_indices[idy, idx], "int64")
collected_scores[batch_id, offset] = selected_scores[idy, idx]
with ib.else_scope():
with ib.if_scope(idx < num_boxes):
collected_indices[batch_id, offset, 0] = zero
collected_indices[batch_id, offset, 1] = zero
collected_scores[batch_id, offset] = -1.0

return ib.get()

Expand Down

0 comments on commit c5718e2

Please sign in to comment.