From 6dba68488b5a1ef399740e1415f75ade4361c918 Mon Sep 17 00:00:00 2001 From: laugh12321 Date: Wed, 18 Dec 2024 09:27:56 +0800 Subject: [PATCH] fix(custom plugin): Resolved duplicate oriented bounding boxes in RotatedNMS plugin under FP16 precision (#71) --- .../efficientRotatedNMSInference.cu | 12 +- .../efficientRotatedNMSInference.cuh | 417 ++---------------- 2 files changed, 47 insertions(+), 382 deletions(-) diff --git a/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cu b/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cu index 33fa486..68e5d3c 100644 --- a/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cu +++ b/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cu @@ -36,17 +36,7 @@ __device__ float IOU(EfficientRotatedNMSParameters param, RotatedBoxCorner bo RotatedBoxCorner b2 = box2; b1.reorder(); b2.reorder(); - float intersectArea = RotatedBoxCorner::intersect_area(b1, b2); - if (intersectArea <= 0.f) - { - return 0.f; - } - float unionArea = b1.area() + b2.area() - intersectArea; - if (unionArea <= 0.f) - { - return 0.f; - } - return intersectArea / unionArea; + return RotatedBoxCorner::probiou(b1, b2); } template diff --git a/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cuh b/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cuh index ca61a3a..f700bf5 100644 --- a/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cuh +++ b/plugin/efficientRotatedNMSPlugin/efficientRotatedNMSInference.cuh @@ -58,18 +58,6 @@ bool __device__ __inline__ gte_mp(const float a, const float b) { return a >= b; } -float __device__ __inline__ abs_mp(const float a) -{ - return fabsf(a); -} -float __device__ __inline__ cos_mp(const float a) -{ - return __cosf(a); -} -float __device__ __inline__ sin_mp(const float a) -{ - return __sinf(a); -} #if __CUDA_ARCH__ >= 530 @@ -111,18 +99,6 @@ bool __device__ __inline__ gte_mp(const __half a, const __half b) { return __hge(a, b); } -__half __device__ __inline__ abs_mp(const __half a) -{ - return __habs(a); -} -__half __device__ __inline__ cos_mp(const __half a) -{ - return hcos(a); -} -__half __device__ __inline__ sin_mp(const __half a) -{ - return hsin(a); -} #else @@ -164,18 +140,6 @@ bool __device__ __inline__ gte_mp(const __half a, const __half b) { return __float2half(gte_mp(__half2float(a), __half2float(b))); } -__half __device__ __inline__ abs_mp(const __half a) -{ - return __float2half(fabsf(__half2float(a))); -} -__half __device__ __inline__ cos_mp(const __half a) -{ - return __float2half(cos_mp(__half2float(a))); -} -__half __device__ __inline__ sin_mp(const __half a) -{ - return __float2half(sin_mp(__half2float(a))); -} #endif @@ -185,317 +149,28 @@ struct __align__(1 * sizeof(T)) RotatedBoxCorner; template struct __align__(1 * sizeof(T)) RotatedBoxCenterSize; -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu -template -struct Point -{ - T x, y; - __device__ __inline__ Point(const T &px = 0, const T &py = 0) : x(px), y(py) {} - __device__ __inline__ Point operator+(const Point &p) const - { - return Point(add_mp(x, p.x), add_mp(y, p.y)); - } - __device__ __inline__ Point &operator+=(const Point &p) - { - x = add_mp(x, p.x); - y = add_mp(y, p.y); - return *this; - } - __device__ __inline__ Point operator-(const Point &p) const - { - return Point(sub_mp(x, p.x), sub_mp(y, p.y)); - } - __device__ __inline__ Point operator*(const T coeff) const - { - return Point(mul_mp(x, coeff), mul_mp(y, coeff)); - } +struct CovarianceMatrix{ + float a, b, c; }; -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu template -__device__ __inline__ T dot_2d(const Point &A, const Point &B) -{ - return add_mp(mul_mp(A.x, B.x), mul_mp(A.y, B.y)); -} +__device__ __inline__ void get_covariance_matrix(const RotatedBoxCenterSize& box, CovarianceMatrix &matrix) { + float w = float(box.w); + float h = float(box.h); + float r = float(box.r); -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu -template -__device__ __inline__ T cross_2d(const Point &A, const Point &B) -{ - return sub_mp(mul_mp(A.x, B.y), mul_mp(B.x, A.y)); -} - -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu -template -__device__ __inline__ void get_rotated_vertices(const RotatedBoxCenterSize &box, Point (&pts)[4]) -{ - // M_PI / 180. == 0.01745329251 - // double theta = box.a * 0.01745329251; - // MODIFIED - T cosTheta2 = mul_mp(cos_mp(box.r), T(0.5f)); - T sinTheta2 = mul_mp(sin_mp(box.r), T(0.5f)); - - // y: top --> down; x: left --> right - pts[0].x = sub_mp(box.x, add_mp(mul_mp(sinTheta2, box.h), mul_mp(cosTheta2, box.w))); - pts[0].y = add_mp(box.y, sub_mp(mul_mp(cosTheta2, box.h), mul_mp(sinTheta2, box.w))); - pts[1].x = add_mp(box.x, sub_mp(mul_mp(sinTheta2, box.h), mul_mp(cosTheta2, box.w))); - pts[1].y = sub_mp(box.y, add_mp(mul_mp(cosTheta2, box.h), mul_mp(sinTheta2, box.w))); - pts[2].x = sub_mp(mul_mp(T(2), box.x), pts[0].x); - pts[2].y = sub_mp(mul_mp(T(2), box.y), pts[0].y); - pts[3].x = sub_mp(mul_mp(T(2), box.x), pts[1].x); - pts[3].y = sub_mp(mul_mp(T(2), box.y), pts[1].y); -} - -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu -template -__device__ __inline__ int get_intersection_points(const Point (&pts1)[4], const Point (&pts2)[4], Point (&intersections)[24]) -{ - // Line vector - // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] - Point vec1[4], vec2[4]; - #pragma unroll 4 - for (int i = 0; i < 4; i++) - { - vec1[i] = pts1[(i + 1) % 4] - pts1[i]; - vec2[i] = pts2[(i + 1) % 4] - pts2[i]; - } - - // Line test - test all line combos for intersection - int num = 0; // number of intersections - #pragma unroll 4 - for (int i = 0; i < 4; i++) - { - #pragma unroll 4 - for (int j = 0; j < 4; j++) - { - // Solve for 2x2 Ax=b - T det = cross_2d(vec2[j], vec1[i]); - - // This takes care of parallel lines - if (lte_mp(abs_mp(det), T(1e-14))) - { - continue; - } + float a = w * w * 0.08333333333333333f; + float b = h * h * 0.08333333333333333f; - auto vec12 = pts2[j] - pts1[i]; + float cos = __cosf(r); + float sin = __sinf(r); - T t1 = cross_2d(vec2[j], vec12) / det; - T t2 = cross_2d(vec1[i], vec12) / det; + float cos2 = cos * cos; + float sin2 = sin * sin; - if (gte_mp(t1, T(0.0)) && lte_mp(t1, T(1.0)) && gte_mp(t2, T(0.0)) && lte_mp(t2, T(1.0))) - { - intersections[num++] = pts1[i] + vec1[i] * t1; - } - } - } - - // Check for vertices of rect1 inside rect2 - { - const auto &AB = vec2[0]; - const auto &DA = vec2[3]; - auto ABdotAB = dot_2d(AB, AB); - auto ADdotAD = dot_2d(DA, DA); - #pragma unroll 4 - for (int i = 0; i < 4; i++) - { - // assume ABCD is the rectangle, and P is the point to be judged - // P is inside ABCD iff. P's projection on AB lies within AB - // and P's projection on AD lies within AD - - auto AP = pts1[i] - pts2[0]; - - auto APdotAB = dot_2d(AP, AB); - auto APdotAD = -dot_2d(AP, DA); - - if (gte_mp(APdotAB, T(0.0)) && gte_mp(APdotAD, T(0.0)) && lte_mp(APdotAB, ABdotAB) && lte_mp(APdotAD, ADdotAD)) - { - intersections[num++] = pts1[i]; - } - } - } - - // Reverse the check - check for vertices of rect2 inside rect1 - { - const auto &AB = vec1[0]; - const auto &DA = vec1[3]; - auto ABdotAB = dot_2d(AB, AB); - auto ADdotAD = dot_2d(DA, DA); - #pragma unroll 4 - for (int i = 0; i < 4; i++) - { - auto AP = pts2[i] - pts1[0]; - - auto APdotAB = dot_2d(AP, AB); - auto APdotAD = -dot_2d(AP, DA); - - if (gte_mp(APdotAB, T(0.0)) && gte_mp(APdotAD, T(0.0)) && lte_mp(APdotAB, ABdotAB) && lte_mp(APdotAD, ADdotAD)) - { - intersections[num++] = pts2[i]; - } - } - } - - return num; -} - -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu -template -__device__ __inline__ int convex_hull_graham(const Point (&p)[24], const int &num_in, Point (&q)[24], bool shift_to_zero = false) -{ - assert(num_in >= 2); - - // Step 1: - // Find point with minimum y - // if more than 1 points have the same minimum y, - // pick the one with the minimum x. - int t = 0; - for (int i = 1; i < num_in; i++) - { - if (lt_mp(p[i].y, p[t].y) || (lte_mp(p[i].y, p[t].y) && lt_mp(p[i].x, p[t].x))) - { - t = i; - } - } - auto &start = p[t]; // starting point - - // Step 2: - // Subtract starting point from every points (for sorting in the next step) - for (int i = 0; i < num_in; i++) - { - q[i] = p[i] - start; - } - - // Swap the starting point to position 0 - auto tmp = q[0]; - q[0] = q[t]; - q[t] = tmp; - - // Step 3: - // Sort point 1 ~ num_in according to their relative cross-product values - // (essentially sorting according to angles) - // If the angles are the same, sort according to their distance to origin - T dist[24]; - for (int i = 0; i < num_in; i++) - { - dist[i] = dot_2d(q[i], q[i]); - } - - for (int i = 1; i < num_in - 1; i++) - { - for (int j = i + 1; j < num_in; j++) - { - T crossProduct = cross_2d(q[i], q[j]); - if (lt_mp(crossProduct, T(-1e-6)) || (lt_mp(abs_mp(crossProduct), T(1e-6)) && gt_mp(dist[i], dist[j]))) - { - auto q_tmp = q[i]; - q[i] = q[j]; - q[j] = q_tmp; - auto dist_tmp = dist[i]; - dist[i] = dist[j]; - dist[j] = dist_tmp; - } - } - } - - // Step 4: - // Make sure there are at least 2 points (that don't overlap with each other) - // in the stack - int k; // index of the non-overlapped second point - for (k = 1; k < num_in; k++) - { - if (gt_mp(dist[k], T(1e-8))) - { - break; - } - } - if (k == num_in) - { - // We reach the end, which means the convex hull is just one point - q[0] = p[t]; - return 1; - } - q[1] = q[k]; - int m = 2; // 2 points in the stack - // Step 5: - // Finally we can start the scanning process. - // When a non-convex relationship between the 3 points is found - // (either concave shape or duplicated points), - // we pop the previous point from the stack - // until the 3-point relationship is convex again, or - // until the stack only contains two points - for (int i = k + 1; i < num_in; i++) - { - while (m > 1 && gte_mp(cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]), T(0))) - { - m--; - } - q[m++] = q[i]; - } - - // Step 6 (Optional): - // In general sense we need the original coordinates, so we - // need to shift the points back (reverting Step 2) - // But if we're only interested in getting the area/perimeter of the shape - // We can simply return. - if (!shift_to_zero) - { - for (int i = 0; i < m; i++) - { - q[i] += start; - } - } - - return m; -} - -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu -template -__device__ __inline__ T polygon_area(const Point (&q)[24], const int &m) -{ - if (m <= 2) - { - return 0; - } - - T area = 0; - for (int i = 1; i < m - 1; i++) - { - area = add_mp(area, abs_mp(cross_2d(q[i] - q[0], q[i + 1] - q[0]))); - } - - return mul_mp(area, T(0.5)); -} - -// modified from -// https://github.com/open-mmlab/mmdeploy/blob/v1.3.1/csrc/mmdeploy/backend_ops/tensorrt/common_impl/nms/allClassRotatedNMS.cu -template -__device__ __inline__ T rotated_boxes_intersection(const RotatedBoxCenterSize& box1, const RotatedBoxCenterSize& box2) { - // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned - // from rotated_rect_intersection_pts - Point intersectPts[24], orderedPts[24]; - - Point pts1[4]; - Point pts2[4]; - get_rotated_vertices(box1, pts1); - get_rotated_vertices(box2, pts2); - - int num = get_intersection_points(pts1, pts2, intersectPts); - - if (num <= 2) { - return 0.0; - } - - // Convex Hull to order the intersection points in clockwise order and find - // the contour area. - int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true); - return polygon_area(orderedPts, num_convex); + matrix.a = a * cos2 + b * sin2; + matrix.b = a * sin2 + b * cos2; + matrix.c = (a - b) * cos * sin; } template @@ -558,25 +233,13 @@ struct __align__(1 * sizeof(T)) RotatedBoxCorner return RotatedBoxCenterSize{add_mp(y1, mul_mp((T) 0.5, h)), add_mp(x1, mul_mp((T) 0.5, w)), h, w, r}; } - __device__ static float intersect_area(RotatedBoxCorner a, RotatedBoxCorner b) + // Calculate probabilistic IoU between oriented bounding boxes. + // Implements the algorithm from https://arxiv.org/pdf/2106.06072v1.pdf. + __device__ static float probiou(RotatedBoxCorner a, RotatedBoxCorner b) { RotatedBoxCenterSize box1(a), box2(b); - auto center_shift_x = mul_mp(add_mp(box1.x, box2.x), T(0.5)); - auto center_shift_y = mul_mp(add_mp(box1.y, box2.y), T(0.5)); - box1.x = sub_mp(box1.x, center_shift_x); - box1.y = sub_mp(box1.y, center_shift_y); - - box2.x = sub_mp(box2.x, center_shift_x); - box2.y = sub_mp(box2.y, center_shift_y); - - if (lt_mp(box1.area(), 1e-14) || lt_mp(box2.area(), 1e-14)) - { - return 0.f; - } - - const T intersection_area = rotated_boxes_intersection(box1, box2); - return float(intersection_area); + return RotatedBoxCenterSize::probiou(box1, box2); } }; @@ -618,25 +281,37 @@ struct __align__(1 * sizeof(T)) RotatedBoxCenterSize T w2 = mul_mp(w, (T) 0.5); return RotatedBoxCorner{sub_mp(y, h2), sub_mp(x, w2), add_mp(y, h2), add_mp(x, w2), r}; } - __device__ static float intersect_area(RotatedBoxCenterSize a, RotatedBoxCenterSize b) - { - RotatedBoxCenterSize box1(a), box2(b); - auto center_shift_x = mul_mp(add_mp(box1.x, box2.x), T(0.5)); - auto center_shift_y = mul_mp(add_mp(box1.y, box2.y), T(0.5)); - box1.x = sub_mp(box1.x, center_shift_x); - box1.y = sub_mp(box1.y, center_shift_y); + // Calculate probabilistic IoU between oriented bounding boxes. + // Implements the algorithm from https://arxiv.org/pdf/2106.06072v1.pdf. + __device__ static float probiou(RotatedBoxCenterSize < T > & a, RotatedBoxCenterSize < T > & b) { + CovarianceMatrix matrix1, matrix2; - box2.x = sub_mp(box2.x, center_shift_x); - box2.y = sub_mp(box2.y, center_shift_y); + get_covariance_matrix < T > (a, matrix1); + get_covariance_matrix < T > (b, matrix2); - if (lt_mp(box1.area(), 1e-14) || lt_mp(box2.area(), 1e-14)) - { - return 0.f; - } + float add_a1_a2 = matrix1.a + matrix2.a; + float add_b1_b2 = matrix1.b + matrix2.b; + float add_c1_c2 = matrix1.c + matrix2.c; + float sub_x1_x2 = a.x - b.x; + float sub_y1_y2 = a.y - b.y; + float sub_data = (add_a1_a2 * add_b1_b2) - (add_c1_c2 * add_c1_c2); + sub_data = fmaxf(sub_data, 1e-7 f); + + float t1 = 0.25 f * ( + (add_a1_a2 * sub_y1_y2 * sub_y1_y2) + + (add_b1_b2 * sub_x1_x2 * sub_x1_x2)) / sub_data; + + float t2 = 0.25 f * + (add_c1_c2 * sub_x1_x2 * sub_y1_y2) / sub_data; + + float t3 = 0.5 f * + logf((sub_data / (4.0 f * (fmaxf(0.0 f, matrix1.a * matrix1.b - matrix1.c * matrix1.c) * + fmaxf(0.0 f, matrix2.a * matrix2.b - matrix2.c * matrix2.c))))) / sub_data; - const T intersection_area = rotated_boxes_intersection(box1, box2); - return float(intersection_area); + float bd = fmaxf(1e-7 f, fminf(t1 + t2 + t3, 100.0 f)); + float hd = sqrtf(1.0 f - expf(-bd)); + return 1.0 f - hd; } };