From 7da383d327911f0441be1f98785b0f727e86b34c Mon Sep 17 00:00:00 2001
From: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>
Date: Wed, 10 Jul 2024 13:42:44 +0900
Subject: [PATCH 1/2] feat(lidar_transfusion): intensity as uint8 and tests
 (#7745)

* feat(lidar_transfusion): intensity as uint8 and tests

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* style(pre-commit): autofix

* fix(lidar_transfusion): headers include

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* feat(lidar_transfusion): use autoware_point_types for validation & refactor

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* style(lidar_transfusion): remove deprecated function

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* test(lidar_transfusion): point_step validation is not required

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* docs(lidar_transfusion): update assumptions / known limits

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* refactor(lidar_transfusion): redundant points structure

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* docs(lidar_transfusion): update assumptions / known limits

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

* test(lidar_transfusion): temporary disable CUDA tests

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>

---------

Signed-off-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Kenzo Lobos Tsunekawa <kenzo.lobos@tier4.jp>
---
 perception/lidar_transfusion/CMakeLists.txt   |  52 +++
 perception/lidar_transfusion/README.md        |  18 ++
 .../preprocess/pointcloud_densification.hpp   |   2 +-
 .../preprocess/preprocess_kernel.hpp          |   6 +-
 .../preprocess/voxel_generator.hpp            |   8 +-
 .../include/lidar_transfusion/ros_utils.hpp   |  68 ++++
 .../include/lidar_transfusion/utils.hpp       |  46 ---
 .../preprocess/pointcloud_densification.cpp   |   9 +-
 .../lib/preprocess/preprocess_kernel.cu       |  61 ++--
 .../lib/preprocess/voxel_generator.cpp        |  23 +-
 perception/lidar_transfusion/package.xml      |   1 +
 .../test/test_detection_class_remapper.cpp    |  92 ++++++
 .../lidar_transfusion/test/test_nms.cpp       | 121 +++++++
 .../test/test_postprocess_kernel.cpp          | 299 ++++++++++++++++++
 .../test/test_postprocess_kernel.hpp          |  48 +++
 .../test/test_preprocess_kernel.cpp           | 265 ++++++++++++++++
 .../test/test_preprocess_kernel.hpp           |  50 +++
 .../lidar_transfusion/test/test_ros_utils.cpp | 104 ++++++
 .../test/test_voxel_generator.cpp             | 284 +++++++++++++++++
 .../test/test_voxel_generator.hpp             |  65 ++++
 20 files changed, 1517 insertions(+), 105 deletions(-)
 create mode 100644 perception/lidar_transfusion/test/test_detection_class_remapper.cpp
 create mode 100644 perception/lidar_transfusion/test/test_nms.cpp
 create mode 100644 perception/lidar_transfusion/test/test_postprocess_kernel.cpp
 create mode 100644 perception/lidar_transfusion/test/test_postprocess_kernel.hpp
 create mode 100644 perception/lidar_transfusion/test/test_preprocess_kernel.cpp
 create mode 100644 perception/lidar_transfusion/test/test_preprocess_kernel.hpp
 create mode 100644 perception/lidar_transfusion/test/test_ros_utils.cpp
 create mode 100644 perception/lidar_transfusion/test/test_voxel_generator.cpp
 create mode 100644 perception/lidar_transfusion/test/test_voxel_generator.hpp

diff --git a/perception/lidar_transfusion/CMakeLists.txt b/perception/lidar_transfusion/CMakeLists.txt
index c0e2d85f27e62..677a835e5b04e 100644
--- a/perception/lidar_transfusion/CMakeLists.txt
+++ b/perception/lidar_transfusion/CMakeLists.txt
@@ -137,6 +137,58 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
   if(BUILD_TESTING)
     find_package(ament_lint_auto REQUIRED)
     ament_lint_auto_find_test_dependencies()
+
+    find_package(ament_cmake_gtest REQUIRED)
+    ament_auto_add_gtest(test_detection_class_remapper
+      test/test_detection_class_remapper.cpp
+    )
+    ament_auto_add_gtest(test_ros_utils
+      test/test_ros_utils.cpp
+    )
+    ament_auto_add_gtest(test_nms
+      test/test_nms.cpp
+    )
+
+    # Temporary disabled, tracked by:
+    # https://github.com/autowarefoundation/autoware.universe/issues/7724
+    # ament_auto_add_gtest(test_voxel_generator
+    #   test/test_voxel_generator.cpp
+    # )
+
+    # # preprocess kernel test
+    # add_executable(test_preprocess_kernel
+    #   test/test_preprocess_kernel.cpp
+    # )
+    # target_include_directories(test_preprocess_kernel PUBLIC
+    #   ${test_preprocess_kernel_SOURCE_DIR}
+    # )
+    # target_link_libraries(test_preprocess_kernel
+    #   transfusion_cuda_lib
+    #   gtest
+    #   gtest_main
+    # )
+    # ament_add_test(test_preprocess_kernel
+    #   GENERATE_RESULT_FOR_RETURN_CODE_ZERO
+    #   COMMAND "$<TARGET_FILE:test_preprocess_kernel>"
+    # )
+
+    # # postprocess kernel test
+    # add_executable(test_postprocess_kernel
+    #   test/test_postprocess_kernel.cpp
+    # )
+    # target_include_directories(test_postprocess_kernel PUBLIC
+    #   ${test_postprocess_kernel_SOURCE_DIR}
+    # )
+    # target_link_libraries(test_postprocess_kernel
+    #   transfusion_cuda_lib
+    #   gtest
+    #   gtest_main
+    # )
+    # ament_add_test(test_postprocess_kernel
+    #   GENERATE_RESULT_FOR_RETURN_CODE_ZERO
+    #   COMMAND "$<TARGET_FILE:test_postprocess_kernel>"
+    # )
+
   endif()
 
   ament_auto_package(
diff --git a/perception/lidar_transfusion/README.md b/perception/lidar_transfusion/README.md
index 7974714720c32..e30403141e8ad 100644
--- a/perception/lidar_transfusion/README.md
+++ b/perception/lidar_transfusion/README.md
@@ -59,6 +59,24 @@ ros2 launch lidar_transfusion lidar_transfusion.launch.xml log_level:=debug
 
 ## Assumptions / Known limits
 
+This library operates on raw cloud data (bytes). It is assumed that the input pointcloud message has following format:
+
+```python
+[
+  sensor_msgs.msg.PointField(name='x', offset=0, datatype=7, count=1),
+  sensor_msgs.msg.PointField(name='y', offset=4, datatype=7, count=1),
+  sensor_msgs.msg.PointField(name='z', offset=8, datatype=7, count=1),
+  sensor_msgs.msg.PointField(name='intensity', offset=12, datatype=2, count=1)
+]
+```
+
+This input may consist of other fields as well - shown format is required minimum.
+For debug purposes, you can validate your pointcloud topic using simple command:
+
+```bash
+ros2 topic echo <input_topic> --field fields
+```
+
 ## Trained Models
 
 You can download the onnx format of trained models by clicking on the links below.
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp
index 25095f4dc9c0b..6ac0a6544389f 100644
--- a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp
+++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp
@@ -51,7 +51,7 @@ class DensificationParam
 
 struct PointCloudWithTransform
 {
-  cuda::unique_ptr<float[]> points_d{nullptr};
+  cuda::unique_ptr<uint8_t[]> data_d{nullptr};
   std_msgs::msg::Header header;
   size_t num_points{0};
   Eigen::Affine3f affine_past2world;
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp
index c571990d84b51..592f09c2d288a 100644
--- a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp
+++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp
@@ -56,12 +56,8 @@ class PreprocessCuda
     unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features,
     unsigned int * voxel_num, unsigned int * voxel_idxs);
 
-  // cudaError_t generateVoxelsInput_launch(
-  //   uint8_t * cloud_data, CloudInfo & cloud_info, unsigned int points_agg, unsigned int
-  //   points_size, float time_lag, float * affine_transform, float * points);
-
   cudaError_t generateSweepPoints_launch(
-    const float * input_points, size_t points_size, int input_point_step, float time_lag,
+    const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
     const float * transform, float * output_points);
 
 private:
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp
index 3e3660e238473..f0d253ee28755 100644
--- a/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp
+++ b/perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp
@@ -18,8 +18,8 @@
 #include "lidar_transfusion/cuda_utils.hpp"
 #include "lidar_transfusion/preprocess/pointcloud_densification.hpp"
 #include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
+#include "lidar_transfusion/ros_utils.hpp"
 #include "lidar_transfusion/transfusion_config.hpp"
-#include "lidar_transfusion/utils.hpp"
 
 #ifdef ROS_DISTRO_GALACTIC
 #include <tf2_eigen/tf2_eigen.h>
@@ -27,6 +27,8 @@
 #include <tf2_eigen/tf2_eigen.hpp>
 #endif
 
+#include <autoware_point_types/types.hpp>
+
 #include <sensor_msgs/msg/point_cloud2.hpp>
 
 #include <memory>
@@ -36,8 +38,8 @@
 
 namespace lidar_transfusion
 {
-constexpr size_t AFF_MAT_SIZE = 16;         // 4x4 matrix
-constexpr size_t MAX_CLOUD_STEP_SIZE = 35;  // PointXYZIRCADT
+constexpr size_t AFF_MAT_SIZE = 16;  // 4x4 matrix
+constexpr size_t MAX_CLOUD_STEP_SIZE = sizeof(autoware_point_types::PointXYZIRCAEDT);
 
 class VoxelGenerator
 {
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp
index f013a02404a29..cbfc4e87b1610 100644
--- a/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp
+++ b/perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp
@@ -17,16 +17,84 @@
 
 #include "lidar_transfusion/utils.hpp"
 
+#include <autoware_point_types/types.hpp>
+
 #include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
 #include <autoware_perception_msgs/msg/detected_objects.hpp>
 #include <autoware_perception_msgs/msg/object_classification.hpp>
 #include <autoware_perception_msgs/msg/shape.hpp>
+#include <sensor_msgs/msg/point_field.hpp>
 
+#include <cstdint>
 #include <string>
 #include <vector>
 
+#define CHECK_OFFSET(offset, structure, field) \
+  static_assert(                               \
+    offsetof(structure, field) == offset,      \
+    "Offset of " #field " in " #structure " does not match expected offset.")
+#define CHECK_TYPE(type, structure, field)                 \
+  static_assert(                                           \
+    std::is_same<decltype(structure::field), type>::value, \
+    "Type of " #field " in " #structure " does not match expected type.")
+#define CHECK_FIELD(offset, type, structure, field) \
+  CHECK_OFFSET(offset, structure, field);           \
+  CHECK_TYPE(type, structure, field)
+
 namespace lidar_transfusion
 {
+using sensor_msgs::msg::PointField;
+
+CHECK_FIELD(0, float, autoware_point_types::PointXYZIRCAEDT, x);
+CHECK_FIELD(4, float, autoware_point_types::PointXYZIRCAEDT, y);
+CHECK_FIELD(8, float, autoware_point_types::PointXYZIRCAEDT, z);
+CHECK_FIELD(12, uint8_t, autoware_point_types::PointXYZIRCAEDT, intensity);
+
+struct CloudInfo
+{
+  uint32_t x_offset{0};
+  uint32_t y_offset{sizeof(float)};
+  uint32_t z_offset{sizeof(float) * 2};
+  uint32_t intensity_offset{sizeof(float) * 3};
+  uint8_t x_datatype{PointField::FLOAT32};
+  uint8_t y_datatype{PointField::FLOAT32};
+  uint8_t z_datatype{PointField::FLOAT32};
+  uint8_t intensity_datatype{PointField::UINT8};
+  uint8_t x_count{1};
+  uint8_t y_count{1};
+  uint8_t z_count{1};
+  uint8_t intensity_count{1};
+  uint32_t point_step{sizeof(autoware_point_types::PointXYZIRCAEDT)};
+  bool is_bigendian{false};
+
+  bool operator!=(const CloudInfo & rhs) const
+  {
+    return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset ||
+           intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype ||
+           y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype ||
+           intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count ||
+           y_count != rhs.y_count || z_count != rhs.z_count ||
+           intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian;
+  }
+
+  friend std::ostream & operator<<(std::ostream & os, const CloudInfo & info)
+  {
+    os << "x_offset: " << static_cast<int>(info.x_offset) << std::endl;
+    os << "y_offset: " << static_cast<int>(info.y_offset) << std::endl;
+    os << "z_offset: " << static_cast<int>(info.z_offset) << std::endl;
+    os << "intensity_offset: " << static_cast<int>(info.intensity_offset) << std::endl;
+    os << "x_datatype: " << static_cast<int>(info.x_datatype) << std::endl;
+    os << "y_datatype: " << static_cast<int>(info.y_datatype) << std::endl;
+    os << "z_datatype: " << static_cast<int>(info.z_datatype) << std::endl;
+    os << "intensity_datatype: " << static_cast<int>(info.intensity_datatype) << std::endl;
+    os << "x_count: " << static_cast<int>(info.x_count) << std::endl;
+    os << "y_count: " << static_cast<int>(info.y_count) << std::endl;
+    os << "z_count: " << static_cast<int>(info.z_count) << std::endl;
+    os << "intensity_count: " << static_cast<int>(info.intensity_count) << std::endl;
+    os << "is_bigendian: " << static_cast<int>(info.is_bigendian) << std::endl;
+    return os;
+  }
+};
 
 void box3DToDetectedObject(
   const Box3D & box3d, const std::vector<std::string> & class_names,
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp b/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
index e73fe7f055953..cc40e55851738 100644
--- a/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
+++ b/perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
@@ -36,52 +36,6 @@ struct Box3D
   float yaw;
 };
 
-struct CloudInfo
-{
-  uint32_t x_offset;
-  uint32_t y_offset;
-  uint32_t z_offset;
-  uint32_t intensity_offset;
-  uint8_t x_datatype;
-  uint8_t y_datatype;
-  uint8_t z_datatype;
-  uint8_t intensity_datatype;
-  uint8_t x_count;
-  uint8_t y_count;
-  uint8_t z_count;
-  uint8_t intensity_count;
-  uint32_t point_step;
-  bool is_bigendian;
-
-  CloudInfo()
-  : x_offset(0),
-    y_offset(4),
-    z_offset(8),
-    intensity_offset(12),
-    x_datatype(7),
-    y_datatype(7),
-    z_datatype(7),
-    intensity_datatype(7),
-    x_count(1),
-    y_count(1),
-    z_count(1),
-    intensity_count(1),
-    point_step(16),
-    is_bigendian(false)
-  {
-  }
-
-  bool operator!=(const CloudInfo & rhs) const
-  {
-    return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset ||
-           intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype ||
-           y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype ||
-           intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count ||
-           y_count != rhs.y_count || z_count != rhs.z_count ||
-           intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian;
-  }
-};
-
 enum NetworkIO { voxels = 0, num_points, coors, cls_score, dir_pred, bbox_pred, ENUM_SIZE };
 
 // cspell: ignore divup
diff --git a/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp b/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp
index 774b3a05d71c1..c13423f2d24d8 100644
--- a/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp
+++ b/perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp
@@ -93,16 +93,15 @@ void PointCloudDensification::enqueue(
   affine_world2current_ = affine_world2current;
   current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
 
-  assert(sizeof(uint8_t) * msg.width * msg.height * msg.point_step % sizeof(float) == 1);
-  auto points_d = cuda::make_unique<float[]>(
-    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(float));
+  auto data_d = cuda::make_unique<uint8_t[]>(
+    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));
 
   CHECK_CUDA_ERROR(cudaMemcpyAsync(
-    points_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
+    data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
     cudaMemcpyHostToDevice, stream_));
 
   PointCloudWithTransform pointcloud = {
-    std::move(points_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
+    std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
 
   pointcloud_cache_.push_front(std::move(pointcloud));
 }
diff --git a/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu b/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
index b8f9ae998fd4f..48bb4eb9332a8 100644
--- a/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
+++ b/perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
@@ -31,6 +31,8 @@
 #include "lidar_transfusion/cuda_utils.hpp"
 #include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
 
+#include <cstdint>
+
 namespace lidar_transfusion
 {
 
@@ -99,9 +101,12 @@ __global__ void generateVoxels_random_kernel(
 cudaError_t PreprocessCuda::generateVoxels_random_launch(
   float * points, unsigned int points_size, unsigned int * mask, float * voxels)
 {
-  int threadNum = config_.threads_for_voxel_;
-  dim3 blocks((points_size + threadNum - 1) / threadNum);
-  dim3 threads(threadNum);
+  if (points_size == 0) {
+    return cudaGetLastError();
+  }
+  dim3 blocks(divup(points_size, config_.threads_for_voxel_));
+  dim3 threads(config_.threads_for_voxel_);
+
   generateVoxels_random_kernel<<<blocks, threads, 0, stream_>>>(
     points, points_size, config_.min_x_range_, config_.max_x_range_, config_.min_y_range_,
     config_.max_y_range_, config_.min_z_range_, config_.max_z_range_, config_.voxel_x_size_,
@@ -165,40 +170,48 @@ cudaError_t PreprocessCuda::generateBaseFeatures_launch(
 }
 
 __global__ void generateSweepPoints_kernel(
-  const float * input_points, size_t points_size, int input_point_step, float time_lag,
+  const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
   const float * transform_array, int num_features, float * output_points)
 {
   int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
   if (point_idx >= points_size) return;
 
-  const float input_x = input_points[point_idx * input_point_step + 0];
-  const float input_y = input_points[point_idx * input_point_step + 1];
-  const float input_z = input_points[point_idx * input_point_step + 2];
-  const float intensity = input_points[point_idx * input_point_step + 3];
-
-  output_points[point_idx * num_features] = transform_array[0] * input_x +
-                                            transform_array[4] * input_y +
-                                            transform_array[8] * input_z + transform_array[12];
-  output_points[point_idx * num_features + 1] = transform_array[1] * input_x +
-                                                transform_array[5] * input_y +
-                                                transform_array[9] * input_z + transform_array[13];
-  output_points[point_idx * num_features + 2] = transform_array[2] * input_x +
-                                                transform_array[6] * input_y +
-                                                transform_array[10] * input_z + transform_array[14];
-  output_points[point_idx * num_features + 3] = intensity;
+  union {
+    uint32_t raw{0};
+    float value;
+  } input_x, input_y, input_z;
+
+#pragma unroll
+  for (int i = 0; i < 4; i++) {  // 4 bytes for float32
+    input_x.raw |= input_data[point_idx * input_point_step + i] << i * 8;
+    input_y.raw |= input_data[point_idx * input_point_step + i + 4] << i * 8;
+    input_z.raw |= input_data[point_idx * input_point_step + i + 8] << i * 8;
+  }
+
+  float input_intensity = static_cast<float>(input_data[point_idx * input_point_step + 12]);
+
+  output_points[point_idx * num_features] =
+    transform_array[0] * input_x.value + transform_array[4] * input_y.value +
+    transform_array[8] * input_z.value + transform_array[12];
+  output_points[point_idx * num_features + 1] =
+    transform_array[1] * input_x.value + transform_array[5] * input_y.value +
+    transform_array[9] * input_z.value + transform_array[13];
+  output_points[point_idx * num_features + 2] =
+    transform_array[2] * input_x.value + transform_array[6] * input_y.value +
+    transform_array[10] * input_z.value + transform_array[14];
+  output_points[point_idx * num_features + 3] = input_intensity;
   output_points[point_idx * num_features + 4] = time_lag;
 }
 
 cudaError_t PreprocessCuda::generateSweepPoints_launch(
-  const float * input_points, size_t points_size, int input_point_step, float time_lag,
+  const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
   const float * transform_array, float * output_points)
 {
-  int threadNum = config_.threads_for_voxel_;
-  dim3 blocks((points_size + threadNum - 1) / threadNum);
-  dim3 threads(threadNum);
+  dim3 blocks(divup(points_size, config_.threads_for_voxel_));
+  dim3 threads(config_.threads_for_voxel_);
 
   generateSweepPoints_kernel<<<blocks, threads, 0, stream_>>>(
-    input_points, points_size, input_point_step, time_lag, transform_array,
+    input_data, points_size, input_point_step, time_lag, transform_array,
     config_.num_point_feature_size_, output_points);
 
   cudaError_t err = cudaGetLastError();
diff --git a/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp b/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp
index 7072e8491af66..cb8bac984aef3 100644
--- a/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp
+++ b/perception/lidar_transfusion/lib/preprocess/voxel_generator.cpp
@@ -23,25 +23,6 @@
 namespace lidar_transfusion
 {
 
-std::ostream & operator<<(std::ostream & os, const CloudInfo & info)
-{
-  os << "x_offset: " << static_cast<int>(info.x_offset) << std::endl;
-  os << "y_offset: " << static_cast<int>(info.y_offset) << std::endl;
-  os << "z_offset: " << static_cast<int>(info.z_offset) << std::endl;
-  os << "intensity_offset: " << static_cast<int>(info.intensity_offset) << std::endl;
-  os << "x_datatype: " << static_cast<int>(info.x_datatype) << std::endl;
-  os << "y_datatype: " << static_cast<int>(info.y_datatype) << std::endl;
-  os << "z_datatype: " << static_cast<int>(info.z_datatype) << std::endl;
-  os << "intensity_datatype: " << static_cast<int>(info.intensity_datatype) << std::endl;
-  os << "x_count: " << static_cast<int>(info.x_count) << std::endl;
-  os << "y_count: " << static_cast<int>(info.y_count) << std::endl;
-  os << "z_count: " << static_cast<int>(info.z_count) << std::endl;
-  os << "intensity_count: " << static_cast<int>(info.intensity_count) << std::endl;
-  os << "point_step: " << static_cast<int>(info.point_step) << std::endl;
-  os << "is_bigendian: " << static_cast<int>(info.is_bigendian) << std::endl;
-  return os;
-}
-
 VoxelGenerator::VoxelGenerator(
   const DensificationParam & densification_param, const TransfusionConfig & config,
   cudaStream_t & stream)
@@ -106,8 +87,8 @@ std::size_t VoxelGenerator::generateSweepPoints(
     CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
 
     pre_ptr_->generateSweepPoints_launch(
-      pc_cache_iter->points_d.get(), sweep_num_points, cloud_info_.point_step / sizeof(float),
-      time_lag, affine_past2current_d_.get(), points_d.get() + shift);
+      pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
+      affine_past2current_d_.get(), points_d.get() + shift);
     point_counter += sweep_num_points;
   }
 
diff --git a/perception/lidar_transfusion/package.xml b/perception/lidar_transfusion/package.xml
index ffbe091b998e5..3b495025b1c34 100644
--- a/perception/lidar_transfusion/package.xml
+++ b/perception/lidar_transfusion/package.xml
@@ -13,6 +13,7 @@
   <buildtool_depend>autoware_cmake</buildtool_depend>
 
   <depend>autoware_perception_msgs</depend>
+  <depend>autoware_point_types</depend>
   <depend>autoware_universe_utils</depend>
   <depend>launch_ros</depend>
   <depend>object_recognition_utils</depend>
diff --git a/perception/lidar_transfusion/test/test_detection_class_remapper.cpp b/perception/lidar_transfusion/test/test_detection_class_remapper.cpp
new file mode 100644
index 0000000000000..6e7f896e44d2c
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_detection_class_remapper.cpp
@@ -0,0 +1,92 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <lidar_transfusion/detection_class_remapper.hpp>
+
+#include <gtest/gtest.h>
+
+TEST(DetectionClassRemapperTest, MapClasses)
+{
+  lidar_transfusion::DetectionClassRemapper remapper;
+
+  // Set up the parameters for the remapper
+  // Labels: CAR, TRUCK, TRAILER
+  std::vector<int64_t> allow_remapping_by_area_matrix = {
+    0, 0, 0,  // CAR cannot be remapped
+    0, 0, 1,  // TRUCK can be remapped to TRAILER
+    0, 1, 0   // TRAILER can be remapped to TRUCK
+  };
+  std::vector<double> min_area_matrix = {0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0};
+  std::vector<double> max_area_matrix = {0.0, 0.0, 0.0, 0.0, 0.0, 999.0, 0.0, 10.0, 0.0};
+
+  remapper.setParameters(allow_remapping_by_area_matrix, min_area_matrix, max_area_matrix);
+
+  // Create a DetectedObjects message with some objects
+  autoware_perception_msgs::msg::DetectedObjects msg;
+
+  // CAR with area 4.0, which is out of the range for remapping
+  autoware_perception_msgs::msg::DetectedObject obj1;
+  autoware_perception_msgs::msg::ObjectClassification obj1_classification;
+  obj1.shape.dimensions.x = 2.0;
+  obj1.shape.dimensions.y = 2.0;
+  obj1_classification.label = 0;
+  obj1_classification.probability = 1.0;
+  obj1.classification = {obj1_classification};
+  msg.objects.push_back(obj1);
+
+  // TRUCK with area 16.0, which is in the range for remapping to TRAILER
+  autoware_perception_msgs::msg::DetectedObject obj2;
+  autoware_perception_msgs::msg::ObjectClassification obj2_classification;
+  obj2.shape.dimensions.x = 8.0;
+  obj2.shape.dimensions.y = 2.0;
+  obj2_classification.label = 1;
+  obj2_classification.probability = 1.0;
+  obj2.classification = {obj2_classification};
+  msg.objects.push_back(obj2);
+
+  // TRAILER with area 9.0, which is in the range for remapping to TRUCK
+  autoware_perception_msgs::msg::DetectedObject obj3;
+  autoware_perception_msgs::msg::ObjectClassification obj3_classification;
+  obj3.shape.dimensions.x = 3.0;
+  obj3.shape.dimensions.y = 3.0;
+  obj3_classification.label = 2;
+  obj3_classification.probability = 1.0;
+  obj3.classification = {obj3_classification};
+  msg.objects.push_back(obj3);
+
+  // TRAILER with area 12.0, which is out of the range for remapping
+  autoware_perception_msgs::msg::DetectedObject obj4;
+  autoware_perception_msgs::msg::ObjectClassification obj4_classification;
+  obj4.shape.dimensions.x = 4.0;
+  obj4.shape.dimensions.y = 3.0;
+  obj4_classification.label = 2;
+  obj4_classification.probability = 1.0;
+  obj4.classification = {obj4_classification};
+  msg.objects.push_back(obj4);
+
+  // Call the mapClasses function
+  remapper.mapClasses(msg);
+
+  // Check the remapped labels
+  EXPECT_EQ(msg.objects[0].classification[0].label, 0);
+  EXPECT_EQ(msg.objects[1].classification[0].label, 2);
+  EXPECT_EQ(msg.objects[2].classification[0].label, 1);
+  EXPECT_EQ(msg.objects[3].classification[0].label, 2);
+}
+
+int main(int argc, char ** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/perception/lidar_transfusion/test/test_nms.cpp b/perception/lidar_transfusion/test/test_nms.cpp
new file mode 100644
index 0000000000000..b6f0ec8c31684
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_nms.cpp
@@ -0,0 +1,121 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/postprocess/non_maximum_suppression.hpp"
+
+#include <gtest/gtest.h>
+
+TEST(NonMaximumSuppressionTest, Apply)
+{
+  lidar_transfusion::NonMaximumSuppression nms;
+  lidar_transfusion::NMSParams params;
+  params.search_distance_2d_ = 1.0;
+  params.iou_threshold_ = 0.2;
+  params.nms_type_ = lidar_transfusion::NMS_TYPE::IoU_BEV;
+  params.target_class_names_ = {"CAR"};
+  nms.setParameters(params);
+
+  std::vector<lidar_transfusion::DetectedObject> input_objects(4);
+
+  // Object 1
+  autoware_perception_msgs::msg::ObjectClassification obj1_classification;
+  obj1_classification.label = 0;  // Assuming "car" has label 0
+  obj1_classification.probability = 1.0;
+  input_objects[0].classification = {obj1_classification};  // Assuming "car" has label 0
+  input_objects[0].kinematics.pose_with_covariance.pose.position.x = 0.0;
+  input_objects[0].kinematics.pose_with_covariance.pose.position.y = 0.0;
+  input_objects[0].kinematics.pose_with_covariance.pose.orientation.x = 0.0;
+  input_objects[0].kinematics.pose_with_covariance.pose.orientation.y = 0.0;
+  input_objects[0].kinematics.pose_with_covariance.pose.orientation.z = 0.0;
+  input_objects[0].kinematics.pose_with_covariance.pose.orientation.w = 1.0;
+  input_objects[0].shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+  input_objects[0].shape.dimensions.x = 4.0;
+  input_objects[0].shape.dimensions.y = 2.0;
+
+  // Object 2 (overlaps with Object 1)
+  autoware_perception_msgs::msg::ObjectClassification obj2_classification;
+  obj2_classification.label = 0;  // Assuming "car" has label 0
+  obj2_classification.probability = 1.0;
+  input_objects[1].classification = {obj2_classification};  // Assuming "car" has label 0
+  input_objects[1].kinematics.pose_with_covariance.pose.position.x = 0.5;
+  input_objects[1].kinematics.pose_with_covariance.pose.position.y = 0.5;
+  input_objects[1].kinematics.pose_with_covariance.pose.orientation.x = 0.0;
+  input_objects[1].kinematics.pose_with_covariance.pose.orientation.y = 0.0;
+  input_objects[1].kinematics.pose_with_covariance.pose.orientation.z = 0.0;
+  input_objects[1].kinematics.pose_with_covariance.pose.orientation.w = 1.0;
+  input_objects[1].shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+  input_objects[1].shape.dimensions.x = 4.0;
+  input_objects[1].shape.dimensions.y = 2.0;
+
+  // Object 3
+  autoware_perception_msgs::msg::ObjectClassification obj3_classification;
+  obj3_classification.label = 0;  // Assuming "car" has label 0
+  obj3_classification.probability = 1.0;
+  input_objects[2].classification = {obj3_classification};  // Assuming "car" has label 0
+  input_objects[2].kinematics.pose_with_covariance.pose.position.x = 5.0;
+  input_objects[2].kinematics.pose_with_covariance.pose.position.y = 5.0;
+  input_objects[2].kinematics.pose_with_covariance.pose.orientation.x = 0.0;
+  input_objects[2].kinematics.pose_with_covariance.pose.orientation.y = 0.0;
+  input_objects[2].kinematics.pose_with_covariance.pose.orientation.z = 0.0;
+  input_objects[2].kinematics.pose_with_covariance.pose.orientation.w = 1.0;
+  input_objects[2].shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+  input_objects[2].shape.dimensions.x = 4.0;
+  input_objects[2].shape.dimensions.y = 2.0;
+
+  // Object 4 (different class)
+  autoware_perception_msgs::msg::ObjectClassification obj4_classification;
+  obj4_classification.label = 1;  // Assuming "pedestrian" has label 1
+  obj4_classification.probability = 1.0;
+  input_objects[3].classification = {obj4_classification};  // Assuming "pedestrian" has label 1
+  input_objects[3].kinematics.pose_with_covariance.pose.position.x = 0.0;
+  input_objects[3].kinematics.pose_with_covariance.pose.position.y = 0.0;
+  input_objects[3].kinematics.pose_with_covariance.pose.orientation.x = 0.0;
+  input_objects[3].kinematics.pose_with_covariance.pose.orientation.y = 0.0;
+  input_objects[3].kinematics.pose_with_covariance.pose.orientation.z = 0.0;
+  input_objects[3].kinematics.pose_with_covariance.pose.orientation.w = 1.0;
+  input_objects[3].shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+  input_objects[3].shape.dimensions.x = 0.5;
+  input_objects[3].shape.dimensions.y = 0.5;
+
+  std::vector<lidar_transfusion::DetectedObject> output_objects = nms.apply(input_objects);
+
+  // Assert the expected number of output objects
+  EXPECT_EQ(output_objects.size(), 3);
+
+  // Assert that input_objects[2] is included in the output_objects
+  bool is_input_object_2_included = false;
+  for (const auto & output_object : output_objects) {
+    if (output_object == input_objects[2]) {
+      is_input_object_2_included = true;
+      break;
+    }
+  }
+  EXPECT_TRUE(is_input_object_2_included);
+
+  // Assert that input_objects[3] is included in the output_objects
+  bool is_input_object_3_included = false;
+  for (const auto & output_object : output_objects) {
+    if (output_object == input_objects[3]) {
+      is_input_object_3_included = true;
+      break;
+    }
+  }
+  EXPECT_TRUE(is_input_object_3_included);
+}
+
+int main(int argc, char ** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/perception/lidar_transfusion/test/test_postprocess_kernel.cpp b/perception/lidar_transfusion/test/test_postprocess_kernel.cpp
new file mode 100644
index 0000000000000..1e01db5a8b3d7
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_postprocess_kernel.cpp
@@ -0,0 +1,299 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_postprocess_kernel.hpp"
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+void PostprocessKernelTest::SetUp()
+{
+  cudaStreamCreate(&stream_);
+
+  auto voxels_num = std::vector<int64_t>{1, 3, 5};
+  auto point_cloud_range = std::vector<double>{-76.8, -76.8, -3.0, 76.8, 76.8, 5.0};
+  auto voxel_size = std::vector<double>{0.3, 0.3, 8.0};
+  auto score_threshold = 0.2f;
+  auto circle_nms_dist_threshold = 0.5f;
+  auto yaw_norm_thresholds = std::vector<double>{0.5, 0.5, 0.5};
+  config_ptr_ = std::make_unique<TransfusionConfig>(
+    voxels_num, point_cloud_range, voxel_size, circle_nms_dist_threshold, yaw_norm_thresholds,
+    score_threshold);
+  post_ptr_ = std::make_unique<PostprocessCuda>(*config_ptr_, stream_);
+
+  cls_size_ = config_ptr_->num_proposals_ * config_ptr_->num_classes_;
+  box_size_ = config_ptr_->num_proposals_ * config_ptr_->num_box_values_;
+  dir_cls_size_ = config_ptr_->num_proposals_ * 2;  // x, y
+
+  cls_output_d_ = cuda::make_unique<float[]>(cls_size_);
+  box_output_d_ = cuda::make_unique<float[]>(box_size_);
+  dir_cls_output_d_ = cuda::make_unique<float[]>(dir_cls_size_);
+
+  cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
+  cuda::clear_async(box_output_d_.get(), box_size_, stream_);
+  cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+}
+
+void PostprocessKernelTest::TearDown()
+{
+}
+
+TEST_F(PostprocessKernelTest, EmptyTensorTest)
+{
+  std::vector<Box3D> det_boxes3d;
+
+  CHECK_CUDA_ERROR(post_ptr_->generateDetectedBoxes3D_launch(
+    cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_));
+
+  EXPECT_EQ(0, det_boxes3d.size());
+}
+
+TEST_F(PostprocessKernelTest, SingleDetectionTest)
+{
+  cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
+  cuda::clear_async(box_output_d_.get(), box_size_, stream_);
+  cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  std::vector<Box3D> det_boxes3d;
+  unsigned int det_idx = 42;
+  unsigned int cls_idx = 2;
+
+  constexpr float detection_score = 1.f;
+  constexpr float detection_x = 50.f;
+  constexpr float detection_y = -38.4f;
+  constexpr float detection_z = 1.0;
+  const float detection_raw_x = (detection_x - config_ptr_->min_x_range_) /
+                                (config_ptr_->num_point_values_ * config_ptr_->voxel_x_size_);
+  const float detection_raw_y = (detection_y - config_ptr_->min_y_range_) /
+                                (config_ptr_->num_point_values_ * config_ptr_->voxel_y_size_);
+  const float detection_raw_z = detection_z;
+
+  constexpr float detection_w = 3.2f;
+  constexpr float detection_l = 9.3f;
+  constexpr float detection_h = 1.7f;
+  constexpr float detection_log_w = std::log(detection_w);
+  constexpr float detection_log_l = std::log(detection_l);
+  constexpr float detection_log_h = std::log(detection_h);
+
+  constexpr float detection_yaw = M_PI_4;
+  constexpr float detection_yaw_sin = std::sin(detection_yaw);
+  constexpr float detection_yaw_cos = std::cos(detection_yaw);
+
+  // Set the values in the cuda tensor
+  cudaMemcpy(
+    &cls_output_d_[cls_idx * config_ptr_->num_proposals_ + det_idx], &detection_score,
+    1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &dir_cls_output_d_[det_idx], &detection_yaw_sin, 1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &dir_cls_output_d_[det_idx + config_ptr_->num_proposals_], &detection_yaw_cos,
+    1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(&box_output_d_[det_idx], &detection_raw_x, 1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + config_ptr_->num_proposals_], &detection_raw_y, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 2 * config_ptr_->num_proposals_], &detection_raw_z, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 3 * config_ptr_->num_proposals_], &detection_log_w, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 4 * config_ptr_->num_proposals_], &detection_log_l, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 5 * config_ptr_->num_proposals_], &detection_log_h, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+
+  auto code1 = cudaGetLastError();
+  ASSERT_EQ(cudaSuccess, code1);
+
+  // Extract the boxes
+  auto code2 = post_ptr_->generateDetectedBoxes3D_launch(
+    cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  ASSERT_EQ(cudaSuccess, code2);
+  ASSERT_EQ(1, det_boxes3d.size());
+
+  const auto det_box3d = det_boxes3d[0];
+  EXPECT_EQ(cls_idx, det_box3d.label);
+  EXPECT_EQ(detection_score, det_box3d.score);
+  EXPECT_EQ(detection_x, det_box3d.x);
+  EXPECT_EQ(detection_y, det_box3d.y);
+  EXPECT_EQ(detection_z, det_box3d.z);
+  EXPECT_NEAR(detection_w, det_box3d.width, 1e-6);
+  EXPECT_NEAR(detection_l, det_box3d.length, 1e-6);
+  EXPECT_NEAR(detection_h, det_box3d.height, 1e-6);
+  EXPECT_EQ(detection_yaw, det_box3d.yaw);
+}
+
+TEST_F(PostprocessKernelTest, InvalidYawTest)
+{
+  cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
+  cuda::clear_async(box_output_d_.get(), box_size_, stream_);
+  cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  std::vector<Box3D> det_boxes3d;
+  unsigned int det_idx = 42;
+  unsigned int cls_idx = 2;
+
+  constexpr float detection_score = 1.f;
+  constexpr float detection_x = 50.f;
+  constexpr float detection_y = -38.4f;
+  constexpr float detection_z = 1.0;
+  const float detection_raw_x = (detection_x - config_ptr_->min_x_range_) /
+                                (config_ptr_->num_point_values_ * config_ptr_->voxel_x_size_);
+  const float detection_raw_y = (detection_y - config_ptr_->min_y_range_) /
+                                (config_ptr_->num_point_values_ * config_ptr_->voxel_y_size_);
+  const float detection_raw_z = detection_z;
+
+  constexpr float detection_w = 3.2f;
+  constexpr float detection_l = 9.3f;
+  constexpr float detection_h = 1.7f;
+  constexpr float detection_log_w = std::log(detection_w);
+  constexpr float detection_log_l = std::log(detection_l);
+  constexpr float detection_log_h = std::log(detection_h);
+
+  constexpr float detection_yaw_sin = 0.f;
+  constexpr float detection_yaw_cos = 0.2f;
+
+  // Set the values in the cuda tensor
+  cudaMemcpy(
+    &cls_output_d_[cls_idx * config_ptr_->num_proposals_ + det_idx], &detection_score,
+    1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &dir_cls_output_d_[det_idx], &detection_yaw_sin, 1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &dir_cls_output_d_[det_idx + config_ptr_->num_proposals_], &detection_yaw_cos,
+    1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(&box_output_d_[det_idx], &detection_raw_x, 1 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + config_ptr_->num_proposals_], &detection_raw_y, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 2 * config_ptr_->num_proposals_], &detection_raw_z, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 3 * config_ptr_->num_proposals_], &detection_log_w, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 4 * config_ptr_->num_proposals_], &detection_log_l, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+  cudaMemcpy(
+    &box_output_d_[det_idx + 5 * config_ptr_->num_proposals_], &detection_log_h, 1 * sizeof(float),
+    cudaMemcpyHostToDevice);
+
+  auto code1 = cudaGetLastError();
+  ASSERT_EQ(cudaSuccess, code1);
+
+  // Extract the boxes
+  auto code2 = post_ptr_->generateDetectedBoxes3D_launch(
+    cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  ASSERT_EQ(cudaSuccess, code2);
+  EXPECT_EQ(0, det_boxes3d.size());
+}
+
+TEST_F(PostprocessKernelTest, CircleNMSTest)
+{
+  cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
+  cuda::clear_async(box_output_d_.get(), box_size_, stream_);
+  cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  std::vector<Box3D> det_boxes3d;
+  unsigned int det_idx = 42;
+  unsigned int det_num = 2;
+  unsigned int cls_idx = 2;
+
+  constexpr float detection_score = 1.f;
+  constexpr float detection_x = 50.f;
+  constexpr float detection_y = -38.4f;
+  constexpr float detection_z = 1.0;
+  const float detection_raw_x = (detection_x - config_ptr_->min_x_range_) /
+                                (config_ptr_->num_point_values_ * config_ptr_->voxel_x_size_);
+  const float detection_raw_y = (detection_y - config_ptr_->min_y_range_) /
+                                (config_ptr_->num_point_values_ * config_ptr_->voxel_y_size_);
+  const float detection_raw_z = detection_z;
+
+  constexpr float detection_w = 3.2f;
+  constexpr float detection_l = 9.3f;
+  constexpr float detection_h = 1.7f;
+  constexpr float detection_log_w = std::log(detection_w);
+  constexpr float detection_log_l = std::log(detection_l);
+  constexpr float detection_log_h = std::log(detection_h);
+
+  constexpr float detection_yaw = M_PI_4;
+  constexpr float detection_yaw_sin = std::sin(detection_yaw);
+  constexpr float detection_yaw_cos = std::cos(detection_yaw);
+
+  // Set the values in the cuda tensor for 2 detections
+  for (std::size_t i = 0; i < det_num; ++i) {
+    cudaMemcpy(
+      &cls_output_d_[cls_idx * config_ptr_->num_proposals_ + det_idx + i], &detection_score,
+      1 * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &dir_cls_output_d_[det_idx + i], &detection_yaw_sin, 1 * sizeof(float),
+      cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &dir_cls_output_d_[det_idx + i + config_ptr_->num_proposals_], &detection_yaw_cos,
+      1 * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &box_output_d_[det_idx + i], &detection_raw_x, 1 * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &box_output_d_[det_idx + i + config_ptr_->num_proposals_], &detection_raw_y,
+      1 * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &box_output_d_[det_idx + i + 2 * config_ptr_->num_proposals_], &detection_raw_z,
+      1 * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &box_output_d_[det_idx + i + 3 * config_ptr_->num_proposals_], &detection_log_w,
+      1 * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &box_output_d_[det_idx + i + 4 * config_ptr_->num_proposals_], &detection_log_l,
+      1 * sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(
+      &box_output_d_[det_idx + i + 5 * config_ptr_->num_proposals_], &detection_log_h,
+      1 * sizeof(float), cudaMemcpyHostToDevice);
+  }
+
+  auto code1 = cudaGetLastError();
+  ASSERT_EQ(cudaSuccess, code1);
+
+  // Extract the boxes
+  auto code2 = post_ptr_->generateDetectedBoxes3D_launch(
+    cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  ASSERT_EQ(cudaSuccess, code2);
+  EXPECT_EQ(1, det_boxes3d.size());
+}
+
+}  // namespace lidar_transfusion
+
+int main(int argc, char ** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/perception/lidar_transfusion/test/test_postprocess_kernel.hpp b/perception/lidar_transfusion/test/test_postprocess_kernel.hpp
new file mode 100644
index 0000000000000..60fa7882ecc11
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_postprocess_kernel.hpp
@@ -0,0 +1,48 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TEST_POSTPROCESS_KERNEL_HPP_
+#define TEST_POSTPROCESS_KERNEL_HPP_
+
+#include <lidar_transfusion/cuda_utils.hpp>
+#include <lidar_transfusion/postprocess/postprocess_kernel.hpp>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+namespace lidar_transfusion
+{
+
+class PostprocessKernelTest : public testing::Test
+{
+public:
+  void SetUp() override;
+  void TearDown() override;
+
+  std::unique_ptr<PostprocessCuda> post_ptr_{nullptr};
+  std::unique_ptr<TransfusionConfig> config_ptr_{};
+  cudaStream_t stream_{};
+
+  unsigned int cls_size_{0};
+  unsigned int box_size_{0};
+  unsigned int dir_cls_size_{0};
+  cuda::unique_ptr<float[]> cls_output_d_{nullptr};
+  cuda::unique_ptr<float[]> box_output_d_{nullptr};
+  cuda::unique_ptr<float[]> dir_cls_output_d_{nullptr};
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // TEST_POSTPROCESS_KERNEL_HPP_
diff --git a/perception/lidar_transfusion/test/test_preprocess_kernel.cpp b/perception/lidar_transfusion/test/test_preprocess_kernel.cpp
new file mode 100644
index 0000000000000..c1c2a824f0f53
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_preprocess_kernel.cpp
@@ -0,0 +1,265 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_preprocess_kernel.hpp"
+
+#include "lidar_transfusion/cuda_utils.hpp"
+
+#include <lidar_transfusion/preprocess/preprocess_kernel.hpp>
+
+#include <gtest/gtest.h>
+#include <thrust/host_vector.h>
+#include <thrust/reduce.h>
+
+#include <cmath>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+void PreprocessKernelTest::SetUp()
+{
+  cudaStreamCreate(&stream_);
+
+  auto voxels_num = std::vector<int64_t>{1, 3, 5};
+  auto point_cloud_range = std::vector<double>{-76.8, -76.8, -3.0, 76.8, 76.8, 5.0};
+  auto voxel_size = std::vector<double>{0.3, 0.3, 8.0};
+  auto score_threshold = 0.2f;
+  auto circle_nms_dist_threshold = 0.5f;
+  auto yaw_norm_thresholds = std::vector<double>{0.5, 0.5, 0.5};
+  config_ptr_ = std::make_unique<TransfusionConfig>(
+    voxels_num, point_cloud_range, voxel_size, circle_nms_dist_threshold, yaw_norm_thresholds,
+    score_threshold);
+  pre_ptr_ = std::make_unique<PreprocessCuda>(*config_ptr_, stream_);
+
+  voxel_features_size_ = config_ptr_->max_voxels_ * config_ptr_->max_num_points_per_pillar_ *
+                         config_ptr_->num_point_feature_size_;
+  voxel_num_size_ = config_ptr_->max_voxels_;
+  voxel_idxs_size_ = config_ptr_->max_voxels_ * config_ptr_->num_point_values_;
+
+  params_input_d_ = cuda::make_unique<unsigned int>();
+  voxel_features_d_ = cuda::make_unique<float[]>(voxel_features_size_);
+  voxel_num_d_ = cuda::make_unique<unsigned int[]>(voxel_num_size_);
+  voxel_idxs_d_ = cuda::make_unique<unsigned int[]>(voxel_idxs_size_);
+  points_d_ =
+    cuda::make_unique<float[]>(config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_);
+
+  cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
+  cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
+  cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
+  cuda::clear_async(params_input_d_.get(), 1, stream_);
+  cuda::clear_async(
+    points_d_.get(), config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+}
+
+void PreprocessKernelTest::TearDown()
+{
+}
+
+TEST_F(PreprocessKernelTest, EmptyVoxelTest)
+{
+  std::vector<float> points{};
+  std::size_t count = 0;
+
+  pre_ptr_->generateVoxels(
+    points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
+    voxel_idxs_d_.get());
+  unsigned int params_input;
+  auto code1 = cudaStreamSynchronize(stream_);
+
+  auto code2 = cudaMemcpyAsync(
+    &params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost, stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  auto code3 = cudaMemcpy(
+    points_d_.get(), points.data(), count * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice);
+
+  ASSERT_EQ(cudaSuccess, code1);
+  ASSERT_EQ(cudaSuccess, code2);
+  ASSERT_EQ(cudaSuccess, code3);
+
+  EXPECT_EQ(0, params_input);
+}
+
+TEST_F(PreprocessKernelTest, BasicTest)
+{
+  cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
+  cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
+  cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
+  cuda::clear_async(params_input_d_.get(), 1, stream_);
+  cuda::clear_async(
+    points_d_.get(), config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_, stream_);
+  cudaStreamSynchronize(stream_);
+
+  std::vector<float> point1{25.f, -61.1f, 1.8f, 42.0, 0.1f};
+  std::vector<float> point2{-31.6f, 1.1f, 2.8f, 77.0, 0.1f};
+  std::vector<float> point3{42.6f, 15.1f, -0.1f, 3.0, 0.1f};
+
+  std::vector<float> points;
+  points.reserve(point1.size() + point2.size() + point3.size());
+  points.insert(points.end(), point1.begin(), point1.end());
+  points.insert(points.end(), point2.begin(), point2.end());
+  points.insert(points.end(), point3.begin(), point3.end());
+  std::size_t count = 3;
+
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    points_d_.get(), points.data(), count * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  // Compute the total amount of voxels filled
+  pre_ptr_->generateVoxels(
+    points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
+    voxel_idxs_d_.get());
+
+  unsigned int params_input;
+  CHECK_CUDA_ERROR(
+    cudaMemcpy(&params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost));
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    points_d_.get(), points.data(), count * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  ASSERT_EQ(count, params_input);
+
+  // Check if voxels were filled
+  unsigned int voxel_num;
+
+  for (std::size_t i = 0; i < count; ++i) {
+    CHECK_CUDA_ERROR(
+      cudaMemcpy(&voxel_num, voxel_num_d_.get() + i, sizeof(unsigned int), cudaMemcpyDeviceToHost));
+    EXPECT_EQ(1, voxel_num);
+  }
+
+  // Check grid indices
+  thrust::host_vector<unsigned int> voxel_idxs(config_ptr_->num_point_values_ * count, 0);
+  unsigned int voxel_idx_gt;
+  unsigned int voxel_idy_gt;
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    voxel_idxs.data(), voxel_idxs_d_.get(),
+    config_ptr_->num_point_values_ * count * sizeof(unsigned int), cudaMemcpyDeviceToHost));
+
+  for (std::size_t i = 0; i < count; ++i) {
+    voxel_idx_gt = std::floor(
+      (points[config_ptr_->num_point_feature_size_ * i + 0] - config_ptr_->min_x_range_) /
+      config_ptr_->voxel_x_size_);
+    voxel_idy_gt = std::floor(
+      (points[config_ptr_->num_point_feature_size_ * i + 1] - config_ptr_->min_y_range_) /
+      config_ptr_->voxel_y_size_);
+
+    EXPECT_EQ(
+      voxel_idx_gt,
+      voxel_idxs[config_ptr_->num_point_values_ * i + 3]);  // {0, 0, voxel_idy, VOXEL_IDX}
+    EXPECT_EQ(
+      voxel_idy_gt,
+      voxel_idxs[config_ptr_->num_point_values_ * i + 2]);  // {0, 0, VOXEL_IDY, voxel_idx}
+  }
+
+  // Check voxel features
+  thrust::host_vector<float> voxel_features(
+    count * config_ptr_->max_num_points_per_pillar_ * config_ptr_->num_point_feature_size_, 0.f);
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    voxel_features.data(), voxel_features_d_.get(),
+    count * config_ptr_->max_num_points_per_pillar_ * config_ptr_->num_point_feature_size_ *
+      sizeof(float),
+    cudaMemcpyDeviceToHost));
+
+  for (std::size_t i = 0; i < count; ++i) {
+    for (std::size_t j = 0; j < config_ptr_->num_point_feature_size_; ++j) {
+      EXPECT_EQ(
+        points[config_ptr_->num_point_feature_size_ * i + j],
+        voxel_features
+          [i * config_ptr_->max_num_points_per_pillar_ * config_ptr_->num_point_feature_size_ + j]);
+    }
+  }
+}
+
+TEST_F(PreprocessKernelTest, OutOfRangeTest)
+{
+  cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
+  cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
+  cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
+  cuda::clear_async(params_input_d_.get(), 1, stream_);
+  cuda::clear_async(
+    points_d_.get(), config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_, stream_);
+  cudaStreamSynchronize(stream_);
+
+  std::vector<float> points{25.f, config_ptr_->max_y_range_ + 0.1f, 2.1f, 55.f, 0.1f};
+  std::size_t count = 0;
+
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    points_d_.get(), points.data(), count * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  pre_ptr_->generateVoxels(
+    points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
+    voxel_idxs_d_.get());
+
+  unsigned int params_input;
+  CHECK_CUDA_ERROR(
+    cudaMemcpy(&params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost));
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    points_d_.get(), points.data(), count * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  EXPECT_EQ(count, params_input);
+}
+
+TEST_F(PreprocessKernelTest, VoxelOverflowTest)
+{
+  cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
+  cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
+  cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
+  cuda::clear_async(params_input_d_.get(), 1, stream_);
+  cuda::clear_async(
+    points_d_.get(), config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_, stream_);
+  cudaStreamSynchronize(stream_);
+
+  std::vector<float> point{config_ptr_->min_x_range_, -61.1f, 1.8f, 42.f, 0.1f};
+  std::size_t count = config_ptr_->max_voxels_ + 1;
+  std::vector<float> points{};
+  points.reserve(count * config_ptr_->num_point_feature_size_);
+
+  for (std::size_t i = 0; i < count; ++i) {
+    points.insert(points.end(), point.begin(), point.end());
+    point[0] += config_ptr_->voxel_x_size_;
+  }
+
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    points_d_.get(), points.data(), count * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  pre_ptr_->generateVoxels(
+    points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
+    voxel_idxs_d_.get());
+
+  ASSERT_EQ(cudaSuccess, cudaGetLastError());
+
+  unsigned int params_input;
+  CHECK_CUDA_ERROR(
+    cudaMemcpy(&params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost));
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    points_d_.get(), points.data(), count * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  EXPECT_EQ(config_ptr_->max_voxels_, params_input);
+}
+
+}  // namespace lidar_transfusion
+
+int main(int argc, char ** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/perception/lidar_transfusion/test/test_preprocess_kernel.hpp b/perception/lidar_transfusion/test/test_preprocess_kernel.hpp
new file mode 100644
index 0000000000000..024fae5eae4b1
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_preprocess_kernel.hpp
@@ -0,0 +1,50 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TEST_PREPROCESS_KERNEL_HPP_
+#define TEST_PREPROCESS_KERNEL_HPP_
+
+#include <lidar_transfusion/cuda_utils.hpp>
+#include <lidar_transfusion/preprocess/preprocess_kernel.hpp>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+namespace lidar_transfusion
+{
+
+class PreprocessKernelTest : public testing::Test
+{
+public:
+  void SetUp() override;
+  void TearDown() override;
+
+  std::unique_ptr<PreprocessCuda> pre_ptr_{nullptr};
+  std::unique_ptr<TransfusionConfig> config_ptr_{};
+  cudaStream_t stream_{};
+
+  unsigned int voxel_features_size_{0};
+  unsigned int voxel_num_size_{0};
+  unsigned int voxel_idxs_size_{0};
+  cuda::unique_ptr<float[]> points_d_{nullptr};
+  cuda::unique_ptr<unsigned int> params_input_d_{nullptr};
+  cuda::unique_ptr<float[]> voxel_features_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> voxel_num_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> voxel_idxs_d_{nullptr};
+};
+
+}  // namespace lidar_transfusion
+
+#endif  // TEST_PREPROCESS_KERNEL_HPP_
diff --git a/perception/lidar_transfusion/test/test_ros_utils.cpp b/perception/lidar_transfusion/test/test_ros_utils.cpp
new file mode 100644
index 0000000000000..15541e6353b42
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_ros_utils.cpp
@@ -0,0 +1,104 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_transfusion/ros_utils.hpp"
+
+#include <gtest/gtest.h>
+
+TEST(TestSuite, box3DToDetectedObject)
+{
+  std::vector<std::string> class_names = {"CAR",     "TRUCK",      "BUS",       "TRAILER",
+                                          "BICYCLE", "MOTORCYCLE", "PEDESTRIAN"};
+
+  // Test case 1: Test with valid label
+  {
+    lidar_transfusion::Box3D box3d;
+    box3d.label = 0;  // CAR
+    box3d.score = 0.8f;
+    box3d.x = 1.0;
+    box3d.y = 2.0;
+    box3d.z = 3.0;
+    box3d.width = 2.0;
+    box3d.height = 1.5;
+    box3d.length = 4.0;
+    box3d.yaw = 0.5;
+
+    autoware_perception_msgs::msg::DetectedObject obj;
+    lidar_transfusion::box3DToDetectedObject(box3d, class_names, obj);
+
+    EXPECT_FLOAT_EQ(obj.existence_probability, 0.8f);
+    EXPECT_EQ(
+      obj.classification[0].label, autoware_perception_msgs::msg::ObjectClassification::CAR);
+    EXPECT_FLOAT_EQ(obj.kinematics.pose_with_covariance.pose.position.x, 1.0);
+    EXPECT_FLOAT_EQ(obj.kinematics.pose_with_covariance.pose.position.y, 2.0);
+    EXPECT_FLOAT_EQ(obj.kinematics.pose_with_covariance.pose.position.z, 3.0);
+    EXPECT_FLOAT_EQ(obj.shape.dimensions.x, 4.0);
+    EXPECT_FLOAT_EQ(obj.shape.dimensions.y, 2.0);
+    EXPECT_FLOAT_EQ(obj.shape.dimensions.z, 1.5);
+    EXPECT_FALSE(obj.kinematics.has_position_covariance);
+    EXPECT_FALSE(obj.kinematics.has_twist);
+    EXPECT_FALSE(obj.kinematics.has_twist_covariance);
+  }
+
+  // Test case 2: Test with invalid label
+  {
+    lidar_transfusion::Box3D box3d;
+    box3d.score = 0.5f;
+    box3d.label = 10;  // Invalid
+
+    autoware_perception_msgs::msg::DetectedObject obj;
+    lidar_transfusion::box3DToDetectedObject(box3d, class_names, obj);
+
+    EXPECT_FLOAT_EQ(obj.existence_probability, 0.5f);
+    EXPECT_EQ(
+      obj.classification[0].label, autoware_perception_msgs::msg::ObjectClassification::UNKNOWN);
+    EXPECT_FALSE(obj.kinematics.has_position_covariance);
+    EXPECT_FALSE(obj.kinematics.has_twist);
+    EXPECT_FALSE(obj.kinematics.has_twist_covariance);
+  }
+}
+
+TEST(TestSuite, getSemanticType)
+{
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("CAR"),
+    autoware_perception_msgs::msg::ObjectClassification::CAR);
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("TRUCK"),
+    autoware_perception_msgs::msg::ObjectClassification::TRUCK);
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("BUS"),
+    autoware_perception_msgs::msg::ObjectClassification::BUS);
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("TRAILER"),
+    autoware_perception_msgs::msg::ObjectClassification::TRAILER);
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("MOTORCYCLE"),
+    autoware_perception_msgs::msg::ObjectClassification::MOTORCYCLE);
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("BICYCLE"),
+    autoware_perception_msgs::msg::ObjectClassification::BICYCLE);
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("PEDESTRIAN"),
+    autoware_perception_msgs::msg::ObjectClassification::PEDESTRIAN);
+  EXPECT_EQ(
+    lidar_transfusion::getSemanticType("UNKNOWN"),
+    autoware_perception_msgs::msg::ObjectClassification::UNKNOWN);
+}
+
+int main(int argc, char ** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/perception/lidar_transfusion/test/test_voxel_generator.cpp b/perception/lidar_transfusion/test/test_voxel_generator.cpp
new file mode 100644
index 0000000000000..2673a341b3721
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_voxel_generator.cpp
@@ -0,0 +1,284 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "test_voxel_generator.hpp"
+
+#include "gtest/gtest.h"
+#include "lidar_transfusion/transfusion_config.hpp"
+#include "lidar_transfusion/utils.hpp"
+
+#include <autoware_point_types/types.hpp>
+
+#include "sensor_msgs/point_cloud2_iterator.hpp"
+
+namespace lidar_transfusion
+{
+
+void VoxelGeneratorTest::SetUp()
+{
+  // Setup things that should occur before every test instance should go here
+  node_ = std::make_shared<rclcpp::Node>("voxel_generator_test_node");
+
+  world_frame_ = "map";
+  lidar_frame_ = "lidar";
+  delta_pointcloud_x_ = 1.0;
+  points_per_pointcloud_ = 300;
+
+  voxels_num_ = std::vector<int64_t>{5000, 30000, 60000};
+  point_cloud_range_ = std::vector<double>{-76.8, -76.8, -3.0, 76.8, 76.8, 5.0};
+  voxel_size_ = std::vector<double>{0.3, 0.3, 8.0};
+  score_threshold_ = 0.2f;
+  circle_nms_dist_threshold_ = 0.5f;
+  yaw_norm_thresholds_ = std::vector<double>{0.5, 0.5, 0.5};
+  config_ptr_ = std::make_unique<TransfusionConfig>(
+    voxels_num_, point_cloud_range_, voxel_size_, circle_nms_dist_threshold_, yaw_norm_thresholds_,
+    score_threshold_);
+
+  cloud1_ = std::make_unique<sensor_msgs::msg::PointCloud2>();
+  cloud2_ = std::make_unique<sensor_msgs::msg::PointCloud2>();
+
+  // Set up the fields
+  point_cloud_msg_wrapper::PointCloud2Modifier<
+    autoware_point_types::PointXYZIRCAEDT, autoware_point_types::PointXYZIRCAEDTGenerator>
+    modifier{*cloud1_, lidar_frame_};
+
+  // Resize the cloud to hold points_per_pointcloud_ points
+  modifier.resize(points_per_pointcloud_);
+
+  // Create an iterator for desired fields
+  sensor_msgs::PointCloud2Iterator<float> iter_x(*cloud1_, "x");
+  sensor_msgs::PointCloud2Iterator<float> iter_y(*cloud1_, "y");
+  sensor_msgs::PointCloud2Iterator<float> iter_z(*cloud1_, "z");
+  sensor_msgs::PointCloud2Iterator<uint8_t> iter_i(*cloud1_, "intensity");
+
+  // Populate the point cloud
+  for (size_t i = 0; i < modifier.size(); ++i, ++iter_x, ++iter_y, ++iter_z) {
+    *iter_x = static_cast<float>(i);
+    *iter_y = static_cast<float>(i);
+    *iter_z = static_cast<float>(i);
+  }
+  for (size_t i = 0; i < modifier.size(); ++i, ++iter_i) {
+    *iter_i = static_cast<uint8_t>(i % 256);
+  }
+
+  *cloud2_ = *cloud1_;
+
+  // Set the stamps for the point clouds. They usually come every 100ms
+  cloud1_->header.stamp.sec = 1;
+  cloud1_->header.stamp.nanosec = 100'000'000;
+  cloud2_->header.stamp.sec = 1;
+  cloud2_->header.stamp.nanosec = 200'000'000;
+
+  tf2_buffer_ = std::make_unique<tf2_ros::Buffer>(node_->get_clock());
+  tf2_buffer_->setUsingDedicatedThread(true);
+
+  // The vehicle moves 1m/s in the x direction
+  const double world_origin_x = 6'371'000.0;
+  const double world_origin_y = 1'371'000.0;
+
+  transform1_.header.stamp = cloud1_->header.stamp;
+  transform1_.header.frame_id = world_frame_;
+  transform1_.child_frame_id = lidar_frame_;
+  transform1_.transform.translation.x = world_origin_x;
+  transform1_.transform.translation.y = world_origin_y;
+  transform1_.transform.translation.z = 1.8;
+  transform1_.transform.rotation.x = 0.0;
+  transform1_.transform.rotation.y = 0.0;
+  transform1_.transform.rotation.z = 0.0;
+  transform1_.transform.rotation.w = 1.0;
+
+  transform2_ = transform1_;
+  transform2_.header.stamp = cloud2_->header.stamp;
+  transform2_.transform.translation.x = world_origin_x + delta_pointcloud_x_;
+
+  cudaStreamCreate(&stream_);
+}
+
+void VoxelGeneratorTest::TearDown()
+{
+}
+
+TEST_F(VoxelGeneratorTest, CloudInfo)
+{
+  CloudInfo cloud_info{};
+  EXPECT_EQ(cloud1_->is_bigendian, cloud_info.is_bigendian);
+  EXPECT_EQ(cloud1_->fields[0].name, "x");
+  EXPECT_EQ(cloud1_->fields[0].datatype, cloud_info.x_datatype);
+  EXPECT_EQ(cloud1_->fields[0].count, cloud_info.x_count);
+  EXPECT_EQ(cloud1_->fields[0].offset, cloud_info.x_offset);
+  EXPECT_EQ(cloud1_->fields[1].name, "y");
+  EXPECT_EQ(cloud1_->fields[1].datatype, cloud_info.y_datatype);
+  EXPECT_EQ(cloud1_->fields[1].count, cloud_info.y_count);
+  EXPECT_EQ(cloud1_->fields[1].offset, cloud_info.y_offset);
+  EXPECT_EQ(cloud1_->fields[2].name, "z");
+  EXPECT_EQ(cloud1_->fields[2].datatype, cloud_info.z_datatype);
+  EXPECT_EQ(cloud1_->fields[2].count, cloud_info.z_count);
+  EXPECT_EQ(cloud1_->fields[2].offset, cloud_info.z_offset);
+  EXPECT_EQ(cloud1_->fields[3].name, "intensity");
+  EXPECT_EQ(cloud1_->fields[3].datatype, cloud_info.intensity_datatype);
+  EXPECT_EQ(cloud1_->fields[3].count, cloud_info.intensity_count);
+  EXPECT_EQ(cloud1_->fields[3].offset, cloud_info.intensity_offset);
+  EXPECT_EQ(cloud1_->width, points_per_pointcloud_);
+  EXPECT_EQ(cloud1_->height, 1);
+}
+
+TEST_F(VoxelGeneratorTest, SingleFrame)
+{
+  const unsigned int num_past_frames = 0;  // only current frame
+  DensificationParam param(world_frame_, num_past_frames);
+  VoxelGenerator voxel_generator(param, *config_ptr_, stream_);
+  std::vector<float> points;
+  points.resize(config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_);
+  std::fill(points.begin(), points.end(), std::nan(""));
+
+  auto points_d =
+    cuda::make_unique<float[]>(config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_);
+  cudaMemcpy(
+    points_d.get(), points.data(),
+    points_per_pointcloud_ * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice);
+
+  bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_);
+  std::size_t generated_points_num = voxel_generator.generateSweepPoints(*cloud1_, points_d);
+
+  cudaMemcpy(
+    points.data(), points_d.get(),
+    points_per_pointcloud_ * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyDeviceToHost);
+
+  EXPECT_TRUE(status1);
+  EXPECT_EQ(generated_points_num, points_per_pointcloud_);
+
+  // Check valid points
+  for (std::size_t i = 0; i < points_per_pointcloud_; ++i) {
+    // There are no tf conversions
+    EXPECT_EQ(static_cast<double>(i), points[i * config_ptr_->num_point_feature_size_ + 0]);
+    EXPECT_EQ(static_cast<double>(i), points[i * config_ptr_->num_point_feature_size_ + 1]);
+    EXPECT_EQ(static_cast<double>(i), points[i * config_ptr_->num_point_feature_size_ + 2]);
+    EXPECT_EQ(static_cast<double>(i % 256), points[i * config_ptr_->num_point_feature_size_ + 3]);
+    EXPECT_EQ(static_cast<double>(0), points[i * config_ptr_->num_point_feature_size_ + 4]);
+  }
+
+  // Check invalid points
+  for (std::size_t i = points_per_pointcloud_ * config_ptr_->num_point_feature_size_;
+       i < points_per_pointcloud_ * config_ptr_->num_point_feature_size_ * 2; ++i) {
+    EXPECT_TRUE(std::isnan(points[i]));
+  }
+}
+
+TEST_F(VoxelGeneratorTest, TwoFramesNoTf)
+{
+  const unsigned int num_past_frames = 1;
+  DensificationParam param(world_frame_, num_past_frames);
+  VoxelGenerator voxel_generator(param, *config_ptr_, stream_);
+  std::vector<float> points;
+  points.resize(config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_);
+  std::fill(points.begin(), points.end(), std::nan(""));
+
+  auto points_d =
+    cuda::make_unique<float[]>(config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_);
+  cudaMemcpy(
+    points_d.get(), points.data(),
+    points_per_pointcloud_ * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice);
+
+  bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_);
+  bool status2 = voxel_generator.enqueuePointCloud(*cloud2_, *tf2_buffer_);
+  std::size_t generated_points_num = voxel_generator.generateSweepPoints(*cloud1_, points_d);
+
+  cudaMemcpy(
+    points.data(), points_d.get(),
+    points_per_pointcloud_ * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyDeviceToHost);
+
+  EXPECT_FALSE(status1);
+  EXPECT_FALSE(status2);
+  EXPECT_EQ(0, generated_points_num);
+}
+
+TEST_F(VoxelGeneratorTest, TwoFrames)
+{
+  const unsigned int num_past_frames = 1;
+  DensificationParam param(world_frame_, num_past_frames);
+  VoxelGenerator voxel_generator(param, *config_ptr_, stream_);
+  std::vector<float> points;
+  points.resize(config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_);
+  std::fill(points.begin(), points.end(), std::nan(""));
+
+  auto points_d =
+    cuda::make_unique<float[]>(config_ptr_->cloud_capacity_ * config_ptr_->num_point_feature_size_);
+  cudaMemcpy(
+    points_d.get(), points.data(),
+    2 * points_per_pointcloud_ * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyHostToDevice);
+
+  tf2_buffer_->setTransform(transform1_, "authority1");
+  tf2_buffer_->setTransform(transform2_, "authority1");
+
+  bool status1 = voxel_generator.enqueuePointCloud(*cloud1_, *tf2_buffer_);
+  bool status2 = voxel_generator.enqueuePointCloud(*cloud2_, *tf2_buffer_);
+  std::size_t generated_points_num = voxel_generator.generateSweepPoints(*cloud1_, points_d);
+
+  cudaMemcpy(
+    points.data(), points_d.get(),
+    2 * points_per_pointcloud_ * config_ptr_->num_point_feature_size_ * sizeof(float),
+    cudaMemcpyDeviceToHost);
+
+  EXPECT_TRUE(status1);
+  EXPECT_TRUE(status2);
+  EXPECT_EQ(2 * points_per_pointcloud_, generated_points_num);
+
+  // Check valid points for the latest pointcloud
+  for (std::size_t i = 0; i < points_per_pointcloud_; ++i) {
+    // There are no tf conversions
+    EXPECT_EQ(static_cast<double>(i), points[i * config_ptr_->num_point_feature_size_ + 0]);
+    EXPECT_EQ(static_cast<double>(i), points[i * config_ptr_->num_point_feature_size_ + 1]);
+    EXPECT_EQ(static_cast<double>(i), points[i * config_ptr_->num_point_feature_size_ + 2]);
+    EXPECT_EQ(static_cast<double>(i % 256), points[i * config_ptr_->num_point_feature_size_ + 3]);
+    EXPECT_EQ(static_cast<double>(0), points[i * config_ptr_->num_point_feature_size_ + 4]);
+  }
+
+  // Check valid points for the oldest pointcloud
+  for (std::size_t i = 0; i < points_per_pointcloud_; ++i) {
+    // There are tf conversions, so results are not numerically the same
+    EXPECT_NEAR(
+      static_cast<double>(i) - delta_pointcloud_x_,
+      points[(points_per_pointcloud_ + i) * config_ptr_->num_point_feature_size_ + 0], 1e-6);
+    EXPECT_NEAR(
+      static_cast<double>(i),
+      points[(points_per_pointcloud_ + i) * config_ptr_->num_point_feature_size_ + 1], 1e-6);
+    EXPECT_NEAR(
+      static_cast<double>(i),
+      points[(points_per_pointcloud_ + i) * config_ptr_->num_point_feature_size_ + 2], 1e-6);
+    EXPECT_NEAR(
+      static_cast<double>(i % 256),
+      points[(points_per_pointcloud_ + i) * config_ptr_->num_point_feature_size_ + 3], 1e-6);
+    EXPECT_NEAR(
+      0.1, points[(points_per_pointcloud_ + i) * config_ptr_->num_point_feature_size_ + 4], 1e-6);
+  }
+
+  // Check invalid points
+  for (std::size_t i = 2 * points_per_pointcloud_ * config_ptr_->num_point_feature_size_;
+       i < 3 * points_per_pointcloud_ * config_ptr_->num_point_feature_size_; ++i) {
+    EXPECT_TRUE(std::isnan(points[i]));
+  }
+}
+}  // namespace lidar_transfusion
+
+int main(int argc, char ** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  rclcpp::init(argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/perception/lidar_transfusion/test/test_voxel_generator.hpp b/perception/lidar_transfusion/test/test_voxel_generator.hpp
new file mode 100644
index 0000000000000..eccbe6412d183
--- /dev/null
+++ b/perception/lidar_transfusion/test/test_voxel_generator.hpp
@@ -0,0 +1,65 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TEST_VOXEL_GENERATOR_HPP_
+#define TEST_VOXEL_GENERATOR_HPP_
+
+#include <lidar_transfusion/preprocess/voxel_generator.hpp>
+#include <rclcpp/rclcpp.hpp>
+
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace lidar_transfusion
+{
+
+class VoxelGeneratorTest : public testing::Test
+{
+public:
+  void SetUp() override;
+  void TearDown() override;
+
+  // These need to be public so that they can be accessed in the test cases
+  rclcpp::Node::SharedPtr node_{};
+
+  std::unique_ptr<sensor_msgs::msg::PointCloud2> cloud1_{}, cloud2_{};
+  geometry_msgs::msg::TransformStamped transform1_{}, transform2_{};
+
+  std::unique_ptr<DensificationParam> densification_param_ptr_{};
+  std::unique_ptr<TransfusionConfig> config_ptr_{};
+
+  std::unique_ptr<tf2_ros::Buffer> tf2_buffer_{};
+
+  std::string world_frame_{};
+  std::string lidar_frame_{};
+  double delta_pointcloud_x_{};
+  std::size_t points_per_pointcloud_{};
+
+  std::vector<int64_t> voxels_num_{};
+  std::vector<double> point_cloud_range_{};
+  std::vector<double> voxel_size_{};
+  float circle_nms_dist_threshold_{};
+  std::vector<double> yaw_norm_thresholds_{};
+  float score_threshold_{};
+
+  cudaStream_t stream_{};
+};
+}  // namespace lidar_transfusion
+
+#endif  // TEST_VOXEL_GENERATOR_HPP_

From 1f01a7c96140e67eab84b5c7fb9af01279a57ed0 Mon Sep 17 00:00:00 2001
From: Satoshi Tanaka <16330533+scepter914@users.noreply.github.com>
Date: Wed, 10 Jul 2024 15:41:17 +0900
Subject: [PATCH 2/2] feat(lidar_transfusion): update TransFusion-L model
 (#7890)

* add num_proposals

Signed-off-by: scepter914 <scepter914@gmail.com>

* fix config

Signed-off-by: scepter914 <scepter914@gmail.com>

* update README

Signed-off-by: scepter914 <scepter914@gmail.com>

---------

Signed-off-by: scepter914 <scepter914@gmail.com>
---
 perception/lidar_transfusion/README.md              |  4 ++--
 .../lidar_transfusion/config/transfusion.param.yaml |  7 ++++---
 .../lidar_transfusion/transfusion_config.hpp        | 13 ++++++++++---
 .../schema/transfusion.schema.json                  |  6 ++++++
 .../src/lidar_transfusion_node.cpp                  |  5 +++--
 5 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/perception/lidar_transfusion/README.md b/perception/lidar_transfusion/README.md
index e30403141e8ad..6745cc1f7d219 100644
--- a/perception/lidar_transfusion/README.md
+++ b/perception/lidar_transfusion/README.md
@@ -81,9 +81,9 @@ ros2 topic echo <input_topic> --field fields
 
 You can download the onnx format of trained models by clicking on the links below.
 
-- TransFusion: [transfusion.onnx](https://awf.ml.dev.web.auto/perception/models/transfusion/v1/transfusion.onnx)
+- TransFusion: [transfusion.onnx](https://awf.ml.dev.web.auto/perception/models/transfusion/t4xx1_90m/v2/transfusion.onnx)
 
-The model was trained in TIER IV's internal database (~11k lidar frames) for 20 epochs.
+The model was trained in TIER IV's internal database (~11k lidar frames) for 50 epochs.
 
 ### Changelog
 
diff --git a/perception/lidar_transfusion/config/transfusion.param.yaml b/perception/lidar_transfusion/config/transfusion.param.yaml
index feabe359caf1f..2c6680fe50af1 100644
--- a/perception/lidar_transfusion/config/transfusion.param.yaml
+++ b/perception/lidar_transfusion/config/transfusion.param.yaml
@@ -4,8 +4,9 @@
     class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"]
     trt_precision: fp16
     voxels_num: [5000, 30000, 60000]  # [min, opt, max]
-    point_cloud_range: [-76.8, -76.8, -3.0, 76.8, 76.8, 5.0]  # [x_min, y_min, z_min, x_max, y_max, z_max]
-    voxel_size: [0.3, 0.3, 8.0]  # [x, y, z]
+    point_cloud_range: [-92.16, -92.16, -3.0, 92.16, 92.16, 7.0]  # [x_min, y_min, z_min, x_max, y_max, z_max]
+    voxel_size: [0.24, 0.24, 10.0]  # [x, y, z]
+    num_proposals: 500
     onnx_path: "$(var model_path)/transfusion.onnx"
     engine_path: "$(var model_path)/transfusion.engine"
     # pre-process params
@@ -17,4 +18,4 @@
     iou_nms_search_distance_2d: 10.0
     iou_nms_threshold: 0.1
     yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]  # refers to the class_names
-    score_threshold: 0.2
+    score_threshold: 0.1
diff --git a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp
index 31976de56a9da..0ad3ab2231f50 100644
--- a/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp
+++ b/perception/lidar_transfusion/include/lidar_transfusion/transfusion_config.hpp
@@ -26,8 +26,9 @@ class TransfusionConfig
 public:
   TransfusionConfig(
     const std::vector<int64_t> & voxels_num, const std::vector<double> & point_cloud_range,
-    const std::vector<double> & voxel_size, const float circle_nms_dist_threshold,
-    const std::vector<double> & yaw_norm_thresholds, const float score_threshold)
+    const std::vector<double> & voxel_size, const int num_proposals,
+    const float circle_nms_dist_threshold, const std::vector<double> & yaw_norm_thresholds,
+    const float score_threshold)
   {
     if (voxels_num.size() == 3) {
       max_voxels_ = voxels_num[2];
@@ -61,6 +62,9 @@ class TransfusionConfig
       voxel_y_size_ = static_cast<float>(voxel_size[1]);
       voxel_z_size_ = static_cast<float>(voxel_size[2]);
     }
+    if (num_proposals > 0) {
+      num_proposals_ = num_proposals;
+    }
     if (score_threshold > 0.0) {
       score_threshold_ = score_threshold;
     }
@@ -76,6 +80,9 @@ class TransfusionConfig
     grid_x_size_ = static_cast<std::size_t>((max_x_range_ - min_x_range_) / voxel_x_size_);
     grid_y_size_ = static_cast<std::size_t>((max_y_range_ - min_y_range_) / voxel_y_size_);
     grid_z_size_ = static_cast<std::size_t>((max_z_range_ - min_z_range_) / voxel_z_size_);
+
+    feature_x_size_ = grid_x_size_ / out_size_factor_;
+    feature_y_size_ = grid_y_size_ / out_size_factor_;
   }
 
   ///// INPUT PARAMETERS /////
@@ -107,7 +114,7 @@ class TransfusionConfig
   const std::size_t out_size_factor_{4};
   const std::size_t max_num_points_per_pillar_{points_per_voxel_};
   const std::size_t num_point_values_{4};
-  const std::size_t num_proposals_{200};
+  std::size_t num_proposals_{200};
   // the number of feature maps for pillar scatter
   const std::size_t num_feature_scatter_{pillar_feature_size_};
   // the score threshold for classification
diff --git a/perception/lidar_transfusion/schema/transfusion.schema.json b/perception/lidar_transfusion/schema/transfusion.schema.json
index 41d8d887236a8..7debc0edda6fb 100644
--- a/perception/lidar_transfusion/schema/transfusion.schema.json
+++ b/perception/lidar_transfusion/schema/transfusion.schema.json
@@ -61,6 +61,12 @@
           "default": "$(var model_path)/transfusion.engine",
           "pattern": "\\.engine$"
         },
+        "num_proposals": {
+          "type": "integer",
+          "description": "Number of proposals at TransHead.",
+          "default": 500,
+          "minimum": 1
+        },
         "down_sample_factor": {
           "type": "integer",
           "description": "A scale factor of downsampling points",
diff --git a/perception/lidar_transfusion/src/lidar_transfusion_node.cpp b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp
index e3ea6b3780de8..7f5833e60d6d0 100644
--- a/perception/lidar_transfusion/src/lidar_transfusion_node.cpp
+++ b/perception/lidar_transfusion/src/lidar_transfusion_node.cpp
@@ -31,6 +31,7 @@ LidarTransfusionNode::LidarTransfusionNode(const rclcpp::NodeOptions & options)
   const auto point_cloud_range =
     this->declare_parameter<std::vector<double>>("point_cloud_range", descriptor);
   const auto voxel_size = this->declare_parameter<std::vector<double>>("voxel_size", descriptor);
+  const int num_proposals = (this->declare_parameter<int>("num_proposals", descriptor));
   const std::string onnx_path = this->declare_parameter<std::string>("onnx_path", descriptor);
   const std::string engine_path = this->declare_parameter<std::string>("engine_path", descriptor);
 
@@ -73,8 +74,8 @@ LidarTransfusionNode::LidarTransfusionNode(const rclcpp::NodeOptions & options)
   DensificationParam densification_param(
     densification_world_frame_id, densification_num_past_frames);
   TransfusionConfig config(
-    voxels_num, point_cloud_range, voxel_size, circle_nms_dist_threshold, yaw_norm_thresholds,
-    score_threshold);
+    voxels_num, point_cloud_range, voxel_size, num_proposals, circle_nms_dist_threshold,
+    yaw_norm_thresholds, score_threshold);
 
   const auto allow_remapping_by_area_matrix =
     this->declare_parameter<std::vector<int64_t>>("allow_remapping_by_area_matrix", descriptor);