From cd841a81e361f757865ecc16de76f265b37422e0 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 31 May 2024 14:15:37 -0700 Subject: [PATCH 01/35] Build Embree with SYCL support and OneAPI 2024.1.0 --- 3rdparty/embree/embree.cmake | 5 +++++ docker/Dockerfile.ci | 5 ++++- docker/docker_build.sh | 8 ++++---- 3 files changed, 13 insertions(+), 5 deletions(-) mode change 100644 => 100755 docker/Dockerfile.ci diff --git a/3rdparty/embree/embree.cmake b/3rdparty/embree/embree.cmake index 2495c987bbd..b2f4446ee02 100644 --- a/3rdparty/embree/embree.cmake +++ b/3rdparty/embree/embree.cmake @@ -64,6 +64,11 @@ else() endif() +if(BUILD_SYCL_MODULE) + set(ISA_ARGS -DEMBREE_SYCL_SUPPORT=ON) +endif() + + ExternalProject_Add( ext_embree PREFIX embree diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci old mode 100644 new mode 100755 index 73feb7bffd6..cd1b5d45981 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -191,9 +191,11 @@ RUN \ export CMAKE_CXX_COMPILER=icpx; \ export CMAKE_C_COMPILER=icx; \ export GLIBCXX_USE_CXX11_ABI=ON; \ + export BUILD_ISPC_MODULE=OFF; \ else \ export CMAKE_CXX_COMPILER=g++; \ export CMAKE_C_COMPILER=gcc; \ + export BUILD_ISPC_MODULE=ON; \ # TODO: PyTorch still use old CXX ABI, remove this line when PyTorch is updated if [ "$BUILD_PYTORCH_OPS" = "ON" ]; then \ export GLIBCXX_USE_CXX11_ABI=OFF; \ @@ -208,11 +210,12 @@ RUN \ -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} \ -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} \ -DBUILD_SYCL_MODULE=${BUILD_SYCL_MODULE} \ + -DBUILD_ISPC_MODULE=${BUILD_ISPC_MODULE} \ -DDEVELOPER_BUILD=${DEVELOPER_BUILD} \ -DBUILD_LIBREALSENSE=ON \ -DBUILD_CUDA_MODULE=${BUILD_CUDA_MODULE} \ -DBUILD_COMMON_CUDA_ARCHS=ON \ - -DBUILD_COMMON_ISPC_ISAS=ON \ + -DBUILD_COMMON_ISPC_ISAS=${BUILD_ISPC_MODULE} \ -DGLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI} \ -DBUILD_TENSORFLOW_OPS=${BUILD_TENSORFLOW_OPS} \ -DBUILD_PYTORCH_OPS=${BUILD_PYTORCH_OPS} \ diff --git a/docker/docker_build.sh b/docker/docker_build.sh index bc150b06e11..1d7c95af93f 100755 --- a/docker/docker_build.sh +++ b/docker/docker_build.sh @@ -76,7 +76,7 @@ HOST_OPEN3D_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null 2>&1 && pw # Shared variables CCACHE_VERSION=4.3 -CMAKE_VERSION=cmake-3.24.4-linux-x86_64 +CMAKE_VERSION=cmake-3.29.2-linux-x86_64 CMAKE_VERSION_AARCH64=cmake-3.24.4-linux-aarch64 CUDA_VERSION=11.7.1-cudnn8 CUDA_VERSION_LATEST=11.8.0-cudnn8 @@ -437,7 +437,7 @@ sycl-shared_export_env() { # https://hub.docker.com/r/intel/oneapi-basekit # https://github.com/intel/oneapi-containers/blob/main/images/docker/basekit/Dockerfile.ubuntu-20.04 - export BASE_IMAGE=intel/oneapi-basekit:2022.2-devel-ubuntu20.04 + export BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu20.04 export DEVELOPER_BUILD=ON export CCACHE_TAR_NAME=open3d-ci-sycl export PYTHON_VERSION=3.8 @@ -453,8 +453,8 @@ sycl-static_export_env() { export DOCKER_TAG=open3d-ci:sycl-static # https://hub.docker.com/r/intel/oneapi-basekit - # https://github.com/intel/oneapi-containers/blob/main/images/docker/basekit/Dockerfile.ubuntu-20.04 - export BASE_IMAGE=intel/oneapi-basekit:2022.2-devel-ubuntu20.04 + # https://github.com/intel/oneapi-containers/blob/main/images/docker/basekit/Dockerfile.ubuntu-18.04 + export BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu20.04 export DEVELOPER_BUILD=ON export CCACHE_TAR_NAME=open3d-ci-sycl export PYTHON_VERSION=3.8 From 97402f9f59422d60870063f889d5dc83973cd567 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Mon, 24 Jun 2024 15:20:48 -0700 Subject: [PATCH 02/35] Add correct compiler for embree SYCL --- 3rdparty/embree/embree.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/3rdparty/embree/embree.cmake b/3rdparty/embree/embree.cmake index b2f4446ee02..bfda0fe5109 100644 --- a/3rdparty/embree/embree.cmake +++ b/3rdparty/embree/embree.cmake @@ -65,7 +65,10 @@ endif() if(BUILD_SYCL_MODULE) - set(ISA_ARGS -DEMBREE_SYCL_SUPPORT=ON) + set(ISA_ARGS ${ISA_ARGS} -DCMAKE_CXX_COMPILER=icpx) + set(ISA_ARGS ${ISA_ARGS} -DCMAKE_C_COMPILER=icx) + set(ISA_ARGS ${ISA_ARGS} -DEMBREE_SYCL_SUPPORT=ON) + #list(APPEND ISA_LIBS embree4_sycl) endif() @@ -103,4 +106,4 @@ ExternalProject_Add( ExternalProject_Get_Property(ext_embree INSTALL_DIR) set(EMBREE_INCLUDE_DIRS ${INSTALL_DIR}/include/ ${INSTALL_DIR}/src/ext_embree/) # "/" is critical. set(EMBREE_LIB_DIR ${INSTALL_DIR}/${Open3D_INSTALL_LIB_DIR}) -set(EMBREE_LIBRARIES embree4 ${ISA_LIBS} simd lexers sys math tasking) +set(EMBREE_LIBRARIES embree4_sycl embree4 simd lexers sys math tasking ze_wrapper ${ISA_LIBS}) From 5d90e33d485473f8d3a37e17f1c5abc9a4e82424 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Tue, 2 Jul 2024 08:30:43 -0700 Subject: [PATCH 03/35] Add embree dependency change --- 3rdparty/find_dependencies.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/3rdparty/find_dependencies.cmake b/3rdparty/find_dependencies.cmake index d91377a3138..5a9f2dbb0d9 100644 --- a/3rdparty/find_dependencies.cmake +++ b/3rdparty/find_dependencies.cmake @@ -1897,6 +1897,7 @@ if(NOT USE_SYSTEM_EMBREE) include(${Open3D_3RDPARTY_DIR}/embree/embree.cmake) open3d_import_3rdparty_library(3rdparty_embree HIDDEN + GROUPED INCLUDE_DIRS ${EMBREE_INCLUDE_DIRS} LIB_DIR ${EMBREE_LIB_DIR} LIBRARIES ${EMBREE_LIBRARIES} From c799893f3a4f877879afb3f800aa4e5ac7516ae3 Mon Sep 17 00:00:00 2001 From: Sameer Sheorey Date: Fri, 12 Jul 2024 00:10:09 -0700 Subject: [PATCH 04/35] Fix embree linking error Switch to standard sycl:: namespace for sycl API Rename internal open3d namespace to open3d::core::sy to avoid name collision TODO: Replace deprecated SYCL APIs --- 3rdparty/embree/embree.cmake | 4 +- 3rdparty/find_dependencies.cmake | 1 - cpp/open3d/core/Device.cpp | 2 +- cpp/open3d/core/MemoryManagerSYCL.cpp | 22 +++--- cpp/open3d/core/SYCLContext.cpp | 20 +++--- cpp/open3d/core/SYCLContext.h | 16 ++--- cpp/open3d/core/SYCLUtils.cpp | 88 +++++++++++------------ cpp/open3d/core/SYCLUtils.h | 4 +- cpp/open3d/t/geometry/RaycastingScene.cpp | 3 + cpp/pybind/core/sycl_utils.cpp | 2 +- cpp/tests/core/SYCLUtils.cpp | 8 +-- 11 files changed, 83 insertions(+), 87 deletions(-) diff --git a/3rdparty/embree/embree.cmake b/3rdparty/embree/embree.cmake index bfda0fe5109..1bd89b2ba58 100644 --- a/3rdparty/embree/embree.cmake +++ b/3rdparty/embree/embree.cmake @@ -100,10 +100,12 @@ ExternalProject_Add( /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}sys${CMAKE_STATIC_LIBRARY_SUFFIX} /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}math${CMAKE_STATIC_LIBRARY_SUFFIX} /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}tasking${CMAKE_STATIC_LIBRARY_SUFFIX} + /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_wrapper${CMAKE_STATIC_LIBRARY_SUFFIX} + /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}embree_rthwif${CMAKE_STATIC_LIBRARY_SUFFIX} ${ISA_BUILD_BYPRODUCTS} ) ExternalProject_Get_Property(ext_embree INSTALL_DIR) set(EMBREE_INCLUDE_DIRS ${INSTALL_DIR}/include/ ${INSTALL_DIR}/src/ext_embree/) # "/" is critical. set(EMBREE_LIB_DIR ${INSTALL_DIR}/${Open3D_INSTALL_LIB_DIR}) -set(EMBREE_LIBRARIES embree4_sycl embree4 simd lexers sys math tasking ze_wrapper ${ISA_LIBS}) +set(EMBREE_LIBRARIES embree4_sycl embree4 simd lexers sys math tasking ze_wrapper embree_rthwif ${ISA_LIBS}) diff --git a/3rdparty/find_dependencies.cmake b/3rdparty/find_dependencies.cmake index 5a9f2dbb0d9..d91377a3138 100644 --- a/3rdparty/find_dependencies.cmake +++ b/3rdparty/find_dependencies.cmake @@ -1897,7 +1897,6 @@ if(NOT USE_SYSTEM_EMBREE) include(${Open3D_3RDPARTY_DIR}/embree/embree.cmake) open3d_import_3rdparty_library(3rdparty_embree HIDDEN - GROUPED INCLUDE_DIRS ${EMBREE_INCLUDE_DIRS} LIB_DIR ${EMBREE_LIB_DIR} LIBRARIES ${EMBREE_LIBRARIES} diff --git a/cpp/open3d/core/Device.cpp b/cpp/open3d/core/Device.cpp index 7137e307874..2681fe8dc79 100644 --- a/cpp/open3d/core/Device.cpp +++ b/cpp/open3d/core/Device.cpp @@ -137,7 +137,7 @@ std::vector Device::GetAvailableCUDADevices() { } std::vector Device::GetAvailableSYCLDevices() { - return sycl::GetAvailableSYCLDevices(); + return sy::GetAvailableSYCLDevices(); } void Device::PrintAvailableDevices() { diff --git a/cpp/open3d/core/MemoryManagerSYCL.cpp b/cpp/open3d/core/MemoryManagerSYCL.cpp index c3d642a2c76..d1cc8a229b7 100644 --- a/cpp/open3d/core/MemoryManagerSYCL.cpp +++ b/cpp/open3d/core/MemoryManagerSYCL.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- -#include +#include #include #include @@ -18,24 +18,22 @@ namespace open3d { namespace core { -namespace sy = cl::sycl; - void* MemoryManagerSYCL::Malloc(size_t byte_size, const Device& device) { - const sy::queue& queue = - sycl::SYCLContext::GetInstance().GetDefaultQueue(device); + const sycl::queue& queue = + sy::SYCLContext::GetInstance().GetDefaultQueue(device); #ifdef ENABLE_SYCL_UNIFIED_SHARED_MEMORY - return static_cast(sy::malloc_shared(byte_size, queue)); + return static_cast(sycl::malloc_shared(byte_size, queue)); #else - return static_cast(sy::malloc_device(byte_size, queue)); + return static_cast(sycl::malloc_device(byte_size, queue)); #endif } void MemoryManagerSYCL::Free(void* ptr, const Device& device) { if (ptr) { - const sy::queue& queue = - sycl::SYCLContext::GetInstance().GetDefaultQueue(device); - sy::free(ptr, queue); + const sycl::queue& queue = + sy::SYCLContext::GetInstance().GetDefaultQueue(device); + sycl::free(ptr, queue); } } @@ -62,8 +60,8 @@ void MemoryManagerSYCL::Memcpy(void* dst_ptr, dst_device.ToString()); } - sy::queue queue = - sycl::SYCLContext::GetInstance().GetDefaultQueue(device_with_queue); + sycl::queue queue = + sy::SYCLContext::GetInstance().GetDefaultQueue(device_with_queue); queue.memcpy(dst_ptr, src_ptr, num_bytes).wait_and_throw(); } diff --git a/cpp/open3d/core/SYCLContext.cpp b/cpp/open3d/core/SYCLContext.cpp index 660ab8d2bab..119af0cf5ae 100644 --- a/cpp/open3d/core/SYCLContext.cpp +++ b/cpp/open3d/core/SYCLContext.cpp @@ -7,7 +7,7 @@ #include "open3d/core/SYCLContext.h" -#include +#include #include #include #include @@ -17,7 +17,7 @@ namespace open3d { namespace core { -namespace sycl { +namespace sy { SYCLContext &SYCLContext::GetInstance() { static thread_local SYCLContext instance; @@ -38,7 +38,7 @@ bool SYCLContext::IsDeviceAvailable(const Device &device) { } std::vector SYCLContext::GetAvailableSYCLDevices() { return devices_; } -sy::queue SYCLContext::GetDefaultQueue(const Device &device) { +sycl::queue SYCLContext::GetDefaultQueue(const Device &device) { return device_to_default_queue_.at(device); } @@ -46,12 +46,12 @@ SYCLContext::SYCLContext() { // SYCL GPU. // TODO: Currently we only support one GPU device. try { - const sy::device &sycl_device = sy::device(sy::gpu_selector()); + const sycl::device &sycl_device = sycl::device(sycl::gpu_selector()); const Device open3d_device = Device("SYCL:0"); devices_.push_back(open3d_device); device_to_sycl_device_[open3d_device] = sycl_device; - device_to_default_queue_[open3d_device] = sy::queue(sycl_device); - } catch (const sy::exception &e) { + device_to_default_queue_[open3d_device] = sycl::queue(sycl_device); + } catch (const sycl::exception &e) { } if (devices_.size() == 0) { @@ -59,15 +59,15 @@ SYCLContext::SYCLContext() { // This could happen if the Intel GPGPU driver is not installed or if // your CPU does not have integrated GPU. try { - const sy::device &sycl_device = sy::device(sy::host_selector()); + const sycl::device &sycl_device = sycl::device(sycl::host_selector()); const Device open3d_device = Device("SYCL:0"); utility::LogWarning( "SYCL GPU device is not available, falling back to SYCL " "host device."); devices_.push_back(open3d_device); device_to_sycl_device_[open3d_device] = sycl_device; - device_to_default_queue_[open3d_device] = sy::queue(sycl_device); - } catch (const sy::exception &e) { + device_to_default_queue_[open3d_device] = sycl::queue(sycl_device); + } catch (const sycl::exception &e) { } } @@ -76,6 +76,6 @@ SYCLContext::SYCLContext() { } } -} // namespace sycl +} // namespace sy } // namespace core } // namespace open3d diff --git a/cpp/open3d/core/SYCLContext.h b/cpp/open3d/core/SYCLContext.h index e23b9d8770c..b44e2e3fb96 100644 --- a/cpp/open3d/core/SYCLContext.h +++ b/cpp/open3d/core/SYCLContext.h @@ -14,16 +14,14 @@ #pragma once -#include +#include #include #include "open3d/core/Device.h" namespace open3d { namespace core { -namespace sycl { - -namespace sy = cl::sycl; +namespace sy { /// Singleton SYCL context manager. It maintains: /// - A default queue for each SYCL device @@ -45,7 +43,7 @@ class SYCLContext { std::vector GetAvailableSYCLDevices(); /// Get the default SYCL queue given an Open3D device. - sy::queue GetDefaultQueue(const Device& device); + sycl::queue GetDefaultQueue(const Device& device); private: SYCLContext(); @@ -54,12 +52,12 @@ class SYCLContext { std::vector devices_; /// Maps core::Device to the corresponding default SYCL queue. - std::unordered_map device_to_default_queue_; + std::unordered_map device_to_default_queue_; - /// Maps core::Device to sy::device. Internal use only for now. - std::unordered_map device_to_sycl_device_; + /// Maps core::Device to sycl::device. Internal use only for now. + std::unordered_map device_to_sycl_device_; }; -} // namespace sycl +} // namespace sy } // namespace core } // namespace open3d diff --git a/cpp/open3d/core/SYCLUtils.cpp b/cpp/open3d/core/SYCLUtils.cpp index d9e42fdcde0..ecbedcaa03b 100644 --- a/cpp/open3d/core/SYCLUtils.cpp +++ b/cpp/open3d/core/SYCLUtils.cpp @@ -22,45 +22,41 @@ #include "open3d/utility/Logging.h" #ifdef BUILD_SYCL_MODULE -#include +#include #include "open3d/core/SYCLContext.h" #endif namespace open3d { namespace core { -namespace sycl { - -#ifdef BUILD_SYCL_MODULE -namespace sy = cl::sycl; -#endif +namespace sy { int SYCLDemo() { #ifdef BUILD_SYCL_MODULE // Ref: https://intel.github.io/llvm-docs/GetStartedGuide.html // Creating buffer of 4 ints to be used inside the kernel code. - sy::buffer buffer(4); + sycl::buffer buffer(4); // Creating SYCL queue. - sy::queue q; + sycl::queue q; // Size of index space for kernel. - sy::range<1> num_workloads{buffer.size()}; + sycl::range<1> num_workloads{buffer.size()}; // Submitting command group(work) to q. - q.submit([&](sy::handler &cgh) { + q.submit([&](sycl::handler &cgh) { // Getting write only access to the buffer on a device. - auto accessor = buffer.get_access(cgh); + auto accessor = buffer.get_access(cgh); // Execute kernel. - cgh.parallel_for(num_workloads, [=](sy::id<1> WIid) { + cgh.parallel_for(num_workloads, [=](sycl::id<1> WIid) { // Fill buffer with indexes. - accessor[WIid] = (sy::cl_int)WIid.get(0); + accessor[WIid] = (sycl::cl_int)WIid.get(0); }); }); // Getting read only access to the buffer on the host. // Implicit barrier waiting for q to complete the work. - const auto host_accessor = buffer.get_access(); + const auto host_accessor = buffer.get_access(); // Check the results. bool mismatch_found = false; @@ -87,34 +83,34 @@ int SYCLDemo() { #ifdef BUILD_SYCL_MODULE -static std::string GetDeviceTypeName(const sy::device &device) { - auto device_type = device.get_info(); +static std::string GetDeviceTypeName(const sycl::device &device) { + auto device_type = device.get_info(); switch (device_type) { - case sy::info::device_type::cpu: + case sycl::info::device_type::cpu: return "cpu"; - case sy::info::device_type::gpu: + case sycl::info::device_type::gpu: return "gpu"; - case sy::info::device_type::host: + case sycl::info::device_type::host: return "host"; - case sy::info::device_type::accelerator: + case sycl::info::device_type::accelerator: return "acc"; default: return "unknown"; } } -static std::string GetBackendName(const sy::device &device) { - sy::platform platform = device.get_info(); - sy::backend backend = platform.get_backend(); +static std::string GetBackendName(const sycl::device &device) { + sycl::platform platform = device.get_info(); + sycl::backend backend = platform.get_backend(); std::ostringstream os; os << backend; return os.str(); } -static std::string SYCLDeviceToString(const sy::device &device) { +static std::string SYCLDeviceToString(const sycl::device &device) { std::ostringstream os; os << "[" << GetBackendName(device) << ":" << GetDeviceTypeName(device) - << "] " << device.get_info(); + << "] " << device.get_info(); return os.str(); } #endif @@ -131,68 +127,68 @@ void PrintSYCLDevices(bool print_all) { if (print_all) { utility::LogInfo("# All SYCL devices"); - const std::vector &platforms = - sy::platform::get_platforms(); - for (const sy::platform &platform : platforms) { - sy::backend backend = platform.get_backend(); - const std::vector &devices = platform.get_devices(); - for (const sy::device &device : devices) { + const std::vector &platforms = + sycl::platform::get_platforms(); + for (const sycl::platform &platform : platforms) { + sycl::backend backend = platform.get_backend(); + const std::vector &devices = platform.get_devices(); + for (const sycl::device &device : devices) { utility::LogInfo("- {}", SYCLDeviceToString(device)); } } utility::LogInfo("# Default SYCL selectors"); try { - const sy::device &device = sy::device(sy::default_selector()); + const sycl::device &device = sycl::device(sycl::default_selector()); utility::LogInfo("- sycl::default_selector() : {}", SYCLDeviceToString(device)); - } catch (const sy::exception &e) { + } catch (const sycl::exception &e) { utility::LogInfo("- sycl::default_selector() : N/A"); } try { - const sy::device &device = sy::device(sy::host_selector()); + const sycl::device &device = sycl::device(sycl::host_selector()); utility::LogInfo("- sycl::host_selector() : {}", SYCLDeviceToString(device)); - } catch (const sy::exception &e) { + } catch (const sycl::exception &e) { utility::LogInfo("- sycl::host_selector() : N/A"); } try { - const sy::device &device = sy::device(sy::cpu_selector()); + const sycl::device &device = sycl::device(sycl::cpu_selector()); utility::LogInfo("- sycl::cpu_selector() : {}", SYCLDeviceToString(device)); - } catch (const sy::exception &e) { + } catch (const sycl::exception &e) { utility::LogInfo("- sycl::cpu_selector() : N/A"); } try { - const sy::device &device = sy::device(sy::gpu_selector()); + const sycl::device &device = sycl::device(sycl::gpu_selector()); utility::LogInfo("- sycl::gpu_selector() : {}", SYCLDeviceToString(device)); - } catch (const sy::exception &e) { + } catch (const sycl::exception &e) { utility::LogInfo("- sycl::gpu_selector() : N/A"); } try { - const sy::device &device = sy::device(sy::accelerator_selector()); + const sycl::device &device = sycl::device(sycl::accelerator_selector()); utility::LogInfo("- sycl::accelerator_selector(): {}", SYCLDeviceToString(device)); - } catch (const sy::exception &e) { + } catch (const sycl::exception &e) { utility::LogInfo("- sycl::accelerator_selector(): N/A"); } utility::LogInfo("# Open3D SYCL device"); try { - const sy::device &device = sy::device(sy::gpu_selector()); + const sycl::device &device = sycl::device(sycl::gpu_selector()); utility::LogInfo("- Device(\"SYCL:0\"): {}", SYCLDeviceToString(device)); - } catch (const sy::exception &e) { + } catch (const sycl::exception &e) { utility::LogInfo("- Device(\"SYCL:0\"): N/A"); } } else { utility::LogInfo("# Open3D SYCL device"); try { - const sy::device &device = sy::device(sy::gpu_selector()); + const sycl::device &device = sycl::device(sycl::gpu_selector()); utility::LogInfo("- Device(\"SYCL:0\"): {}", SYCLDeviceToString(device)); - } catch (const sy::exception &e) { + } catch (const sycl::exception &e) { utility::LogInfo("- Device(\"SYCL:0\"): N/A"); } } @@ -227,6 +223,6 @@ std::vector GetAvailableSYCLDevices() { #endif } -} // namespace sycl +} // namespace sy } // namespace core } // namespace open3d diff --git a/cpp/open3d/core/SYCLUtils.h b/cpp/open3d/core/SYCLUtils.h index 385523f3800..a1cb74426fb 100644 --- a/cpp/open3d/core/SYCLUtils.h +++ b/cpp/open3d/core/SYCLUtils.h @@ -19,7 +19,7 @@ namespace open3d { namespace core { -namespace sycl { +namespace sy { /// Runs simple SYCL test program for sanity checks. /// \return Retuns 0 if successful. @@ -40,6 +40,6 @@ bool IsDeviceAvailable(const Device& device); /// Return a list of available SYCL devices. std::vector GetAvailableSYCLDevices(); -} // namespace sycl +} // namespace sy } // namespace core } // namespace open3d diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index bc003f8cbed..f5fbeca3797 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -12,6 +12,9 @@ #include "open3d/t/geometry/RaycastingScene.h" // This header is in the embree src dir (embree/src/ext_embree/..). +#ifdef BUILD_SYCL_MODULE + #include +#endif #include #include diff --git a/cpp/pybind/core/sycl_utils.cpp b/cpp/pybind/core/sycl_utils.cpp index 0acf8c37a5b..7705575e261 100644 --- a/cpp/pybind/core/sycl_utils.cpp +++ b/cpp/pybind/core/sycl_utils.cpp @@ -13,7 +13,7 @@ namespace open3d { namespace core { void pybind_sycl_utils_definitions(py::module& m) { - m.def("sycl_demo", &sycl::SYCLDemo); + m.def("sycl_demo", &sy::SYCLDemo); } } // namespace core diff --git a/cpp/tests/core/SYCLUtils.cpp b/cpp/tests/core/SYCLUtils.cpp index d10d99f09fa..e4462f53be9 100644 --- a/cpp/tests/core/SYCLUtils.cpp +++ b/cpp/tests/core/SYCLUtils.cpp @@ -18,18 +18,18 @@ namespace open3d { namespace tests { -TEST(SYCLUtils, SYCLDemo) { core::sycl::SYCLDemo(); } +TEST(SYCLUtils, SYCLDemo) { core::sy::SYCLDemo(); } TEST(SYCLUtils, PrintAllSYCLDevices) { - core::sycl::PrintSYCLDevices(/*print_all=*/true); + core::sy::PrintSYCLDevices(/*print_all=*/true); } TEST(SYCLUtils, PrintSYCLDevices) { - core::sycl::PrintSYCLDevices(/*print_all=*/false); + core::sy::PrintSYCLDevices(/*print_all=*/false); } TEST(SYCLUtils, SYCLUnifiedSharedMemory) { - if (!core::sycl::IsAvailable()) { + if (!core::sy::IsAvailable()) { return; } From f3bde48286c73b1d818f387b119345a69155051e Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 26 Jul 2024 13:42:15 -0700 Subject: [PATCH 05/35] Fix SYCL demo typo --- examples/cpp/SYCLDemo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cpp/SYCLDemo.cpp b/examples/cpp/SYCLDemo.cpp index 2fcd8c55dd1..d72db5ea250 100644 --- a/examples/cpp/SYCLDemo.cpp +++ b/examples/cpp/SYCLDemo.cpp @@ -8,6 +8,6 @@ #include "open3d/core/SYCLUtils.h" int main() { - open3d::core::sycl::SYCLDemo(); + open3d::core::sy::SYCLDemo(); return 0; } From d37e8c177abed41e9650e347feff51772f3c7a29 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 26 Jul 2024 13:43:32 -0700 Subject: [PATCH 06/35] Fix ubuntu CI --- docker/Dockerfile.ci | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index cd1b5d45981..4b281e11963 100755 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -164,7 +164,7 @@ WORKDIR /root/Open3D RUN ./util/install_deps_ubuntu.sh assume-yes # Open3D Python dependencies -RUN source util/ci_utils.sh \ +RUN source /open3d_env/bin/activate && source util/ci_utils.sh \ && if [ "${BUILD_CUDA_MODULE}" = "ON" ]; then \ install_python_dependencies with-cuda with-jupyter; \ else \ @@ -173,7 +173,7 @@ RUN source util/ci_utils.sh \ && pip install -r python/requirements_test.txt # Open3D Jupyter dependencies -RUN mkdir -p /etc/apt/keyrings \ +RUN source /open3d_env/bin/activate && mkdir -p /etc/apt/keyrings \ && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_16.x nodistro main" \ @@ -186,7 +186,7 @@ RUN mkdir -p /etc/apt/keyrings \ && yarn --version # Build all -RUN \ +RUN source /open3d_env/bin/activate && \ if [ "${BUILD_SYCL_MODULE}" = "ON" ]; then \ export CMAKE_CXX_COMPILER=icpx; \ export CMAKE_C_COMPILER=icx; \ @@ -215,7 +215,7 @@ RUN \ -DBUILD_LIBREALSENSE=ON \ -DBUILD_CUDA_MODULE=${BUILD_CUDA_MODULE} \ -DBUILD_COMMON_CUDA_ARCHS=ON \ - -DBUILD_COMMON_ISPC_ISAS=${BUILD_ISPC_MODULE} \ + -DBUILD_COMMON_ISPC_ISAS=${BUILD_ISPC_MODULE} \ -DGLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI} \ -DBUILD_TENSORFLOW_OPS=${BUILD_TENSORFLOW_OPS} \ -DBUILD_PYTORCH_OPS=${BUILD_PYTORCH_OPS} \ From 667dc182bf981d99eb388ce970a5286facf806fb Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 26 Jul 2024 16:27:09 -0700 Subject: [PATCH 07/35] Fix code style --- cpp/open3d/core/MemoryManagerSYCL.cpp | 2 +- cpp/open3d/core/SYCLContext.cpp | 5 +++-- cpp/open3d/core/SYCLUtils.cpp | 12 +++++++----- cpp/open3d/t/geometry/RaycastingScene.cpp | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cpp/open3d/core/MemoryManagerSYCL.cpp b/cpp/open3d/core/MemoryManagerSYCL.cpp index d1cc8a229b7..cfa92ffc804 100644 --- a/cpp/open3d/core/MemoryManagerSYCL.cpp +++ b/cpp/open3d/core/MemoryManagerSYCL.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- -#include #include +#include #include #include "open3d/core/Device.h" diff --git a/cpp/open3d/core/SYCLContext.cpp b/cpp/open3d/core/SYCLContext.cpp index 119af0cf5ae..65e3f9c4bc4 100644 --- a/cpp/open3d/core/SYCLContext.cpp +++ b/cpp/open3d/core/SYCLContext.cpp @@ -7,10 +7,10 @@ #include "open3d/core/SYCLContext.h" -#include #include #include #include +#include #include "open3d/core/SYCLUtils.h" #include "open3d/utility/Logging.h" @@ -59,7 +59,8 @@ SYCLContext::SYCLContext() { // This could happen if the Intel GPGPU driver is not installed or if // your CPU does not have integrated GPU. try { - const sycl::device &sycl_device = sycl::device(sycl::host_selector()); + const sycl::device &sycl_device = + sycl::device(sycl::host_selector()); const Device open3d_device = Device("SYCL:0"); utility::LogWarning( "SYCL GPU device is not available, falling back to SYCL " diff --git a/cpp/open3d/core/SYCLUtils.cpp b/cpp/open3d/core/SYCLUtils.cpp index ecbedcaa03b..ae8ccf63b7d 100644 --- a/cpp/open3d/core/SYCLUtils.cpp +++ b/cpp/open3d/core/SYCLUtils.cpp @@ -48,10 +48,11 @@ int SYCLDemo() { // Getting write only access to the buffer on a device. auto accessor = buffer.get_access(cgh); // Execute kernel. - cgh.parallel_for(num_workloads, [=](sycl::id<1> WIid) { - // Fill buffer with indexes. - accessor[WIid] = (sycl::cl_int)WIid.get(0); - }); + cgh.parallel_for( + num_workloads, [=](sycl::id<1> WIid) { + // Fill buffer with indexes. + accessor[WIid] = (sycl::cl_int)WIid.get(0); + }); }); // Getting read only access to the buffer on the host. @@ -167,7 +168,8 @@ void PrintSYCLDevices(bool print_all) { utility::LogInfo("- sycl::gpu_selector() : N/A"); } try { - const sycl::device &device = sycl::device(sycl::accelerator_selector()); + const sycl::device &device = + sycl::device(sycl::accelerator_selector()); utility::LogInfo("- sycl::accelerator_selector(): {}", SYCLDeviceToString(device)); } catch (const sycl::exception &e) { diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index f5fbeca3797..ea05435a13b 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -13,7 +13,7 @@ // This header is in the embree src dir (embree/src/ext_embree/..). #ifdef BUILD_SYCL_MODULE - #include +#include #endif #include #include From 94c0f0f52c86248636c10dd20f9d36145fcfc1b1 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Thu, 1 Aug 2024 18:21:42 -0700 Subject: [PATCH 08/35] Fix python virtualenv in Dockerfile --- docker/Dockerfile.ci | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index 4b281e11963..f1054c42e4c 100755 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -164,7 +164,7 @@ WORKDIR /root/Open3D RUN ./util/install_deps_ubuntu.sh assume-yes # Open3D Python dependencies -RUN source /open3d_env/bin/activate && source util/ci_utils.sh \ +RUN source /root/open3d/bin/activate && source util/ci_utils.sh \ && if [ "${BUILD_CUDA_MODULE}" = "ON" ]; then \ install_python_dependencies with-cuda with-jupyter; \ else \ @@ -173,7 +173,7 @@ RUN source /open3d_env/bin/activate && source util/ci_utils.sh \ && pip install -r python/requirements_test.txt # Open3D Jupyter dependencies -RUN source /open3d_env/bin/activate && mkdir -p /etc/apt/keyrings \ +RUN source /root/open3d/bin/activate && mkdir -p /etc/apt/keyrings \ && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_16.x nodistro main" \ @@ -186,7 +186,7 @@ RUN source /open3d_env/bin/activate && mkdir -p /etc/apt/keyrings \ && yarn --version # Build all -RUN source /open3d_env/bin/activate && \ +RUN source /root/open3d/bin/activate && \ if [ "${BUILD_SYCL_MODULE}" = "ON" ]; then \ export CMAKE_CXX_COMPILER=icpx; \ export CMAKE_C_COMPILER=icx; \ From f1515e2e95ac7c46c9a58d715539a078c6ba0301 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Thu, 1 Aug 2024 18:24:53 -0700 Subject: [PATCH 09/35] Update oneAPI version to SYCL shared process --- docker/docker_build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/docker_build.sh b/docker/docker_build.sh index 1d7c95af93f..f6f10e8776e 100755 --- a/docker/docker_build.sh +++ b/docker/docker_build.sh @@ -436,7 +436,7 @@ sycl-shared_export_env() { export DOCKER_TAG=open3d-ci:sycl-shared # https://hub.docker.com/r/intel/oneapi-basekit - # https://github.com/intel/oneapi-containers/blob/main/images/docker/basekit/Dockerfile.ubuntu-20.04 + # https://github.com/intel/oneapi-containers/blob/master/images/docker/basekit/Dockerfile.ubuntu-20.04 export BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu20.04 export DEVELOPER_BUILD=ON export CCACHE_TAR_NAME=open3d-ci-sycl @@ -453,7 +453,7 @@ sycl-static_export_env() { export DOCKER_TAG=open3d-ci:sycl-static # https://hub.docker.com/r/intel/oneapi-basekit - # https://github.com/intel/oneapi-containers/blob/main/images/docker/basekit/Dockerfile.ubuntu-18.04 + # https://github.com/intel/oneapi-containers/blob/master/images/docker/basekit/Dockerfile.ubuntu-20.04 export BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu20.04 export DEVELOPER_BUILD=ON export CCACHE_TAR_NAME=open3d-ci-sycl From bd02607c058c9858e1f7ff3f11315a18cec83f5d Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Sat, 17 Aug 2024 15:19:59 -0700 Subject: [PATCH 10/35] Add initial SYCL support to RayCastingScene class --- cpp/open3d/t/geometry/RaycastingScene.cpp | 330 ++++++++++++++++-- cpp/open3d/t/geometry/RaycastingScene.h | 10 +- cpp/pybind/core/sycl_utils.cpp | 7 +- cpp/pybind/t/geometry/raycasting_scene.cpp | 10 + python/test/open3d_test.py | 6 + .../test/t/geometry/test_raycasting_scene.py | 57 +-- 6 files changed, 365 insertions(+), 55 deletions(-) mode change 100644 => 100755 python/test/open3d_test.py mode change 100644 => 100755 python/test/t/geometry/test_raycasting_scene.py diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index ea05435a13b..04dfc34563c 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -34,6 +34,17 @@ typedef Eigen::AlignedVector3 Vec3fa; typedef Eigen::Matrix Vec2f; typedef Eigen::Vector3f Vec3f; +void enablePersistentJITCache() +{ +#if defined(_WIN32) + _putenv_s("SYCL_CACHE_PERSISTENT","1"); + _putenv_s("SYCL_CACHE_DIR","cache"); +#else + setenv("SYCL_CACHE_PERSISTENT","1",1); + setenv("SYCL_CACHE_DIR","cache",1); +#endif +} + // Error function called by embree. void ErrorFunction(void* userPtr, enum RTCError error, const char* str) { open3d::utility::LogError("embree error: {} {}", error, str); @@ -329,18 +340,18 @@ namespace t { namespace geometry { struct RaycastingScene::Impl { - // The maximum number of rays used in calls to embree. - const size_t BATCH_SIZE = 1024; - RTCDevice device_; RTCScene scene_; bool scene_committed_; // true if the scene has been committed. + RTCDevice device_; // Vector for storing some information about the added geometry. std::vector> geometry_ptrs_; - core::Device tensor_device_; // cpu + core::Device tensor_device_; // cpu or sycl bool devprop_join_commit; + virtual ~Impl() = default; + void CommitScene() { if (!scene_committed_) { if (devprop_join_commit) { @@ -352,7 +363,80 @@ struct RaycastingScene::Impl { } } - template + virtual void CastRays(const float* const rays, + const size_t num_rays, + float* t_hit, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* primitive_normals, + const int nthreads, + const bool line_intersection) = 0; + + virtual void TestOcclusions(const float* const rays, + const size_t num_rays, + const float tnear, + const float tfar, + int8_t* occluded, + const int nthreads) = 0; + + virtual void CountIntersections(const float* const rays, + const size_t num_rays, + int* intersections, + const int nthreads) = 0; + + virtual void ListIntersections(const float* const rays, + const size_t num_rays, + const size_t num_intersections, + const Eigen::VectorXi& cumsum, + unsigned int* track_intersections, + unsigned int* ray_ids, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* t_hit, + const int nthreads) = 0; + + virtual void ComputeClosestPoints(const float* const query_points, + const size_t num_query_points, + float* closest_points, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* primitive_normals, + const int nthreads) = 0; +}; + +#ifdef BUILD_SYCL_MODULE +struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { + // SYCL variables + sycl::queue queue_; + sycl::context context_; + sycl::device sycl_device_; + + void InitializeDevice() { + enablePersistentJITCache(); + + try { + sycl_device_ = sycl::device(rtcSYCLDeviceSelector); + } catch(std::exception& e) { + utility::LogError("Caught exception creating sycl::device: {}", e.what()); + return; + } + + queue_ = sycl::queue(sycl_device_); + context_ = sycl::context(sycl_device_); + + device_ = rtcNewSYCLDevice(context_, ""); + rtcSetDeviceSYCLDevice(device_, sycl_device_); + + if (!device_) { + utility::LogError( + "Error %d: cannot create device\n", + rtcGetDeviceError(NULL)); + } + } + void CastRays(const float* const rays, const size_t num_rays, float* t_hit, @@ -360,7 +444,159 @@ struct RaycastingScene::Impl { unsigned int* primitive_ids, float* primitive_uvs, float* primitive_normals, - const int nthreads) { + const int nthreads, + const bool line_intersection) override { + CommitScene(); + + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { + const size_t i = item.get_id(0); + + struct RTCRayHit rh; + const float* r = &rays[i * 6]; + rh.ray.org_x = r[0]; + rh.ray.org_y = r[1]; + rh.ray.org_z = r[2]; + if (line_intersection) { + rh.ray.dir_x = r[3] - r[0]; + rh.ray.dir_y = r[4] - r[1]; + rh.ray.dir_z = r[5] - r[2]; + } else { + rh.ray.dir_x = r[3]; + rh.ray.dir_y = r[4]; + rh.ray.dir_z = r[5]; + } + rh.ray.tnear = 0; + if (line_intersection) { + rh.ray.tfar = 1.f; + } else { + rh.ray.tfar = std::numeric_limits::infinity(); + } + rh.ray.mask = -1; + rh.ray.id = i; + rh.ray.flags = 0; + rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; + + rtcIntersect1(scene, &rh); + + t_hit[i] = rh.ray.tfar; + if (rh.hit.geomID != RTC_INVALID_GEOMETRY_ID) { + geometry_ids[i] = rh.hit.geomID; + primitive_ids[i] = rh.hit.primID; + primitive_uvs[i * 2 + 0] = rh.hit.u; + primitive_uvs[i * 2 + 1] = rh.hit.v; + float inv_norm = 1.f / std::sqrt(rh.hit.Ng_x * rh.hit.Ng_x + + rh.hit.Ng_y * rh.hit.Ng_y + + rh.hit.Ng_z * rh.hit.Ng_z); + primitive_normals[i * 3 + 0] = rh.hit.Ng_x * inv_norm; + primitive_normals[i * 3 + 1] = rh.hit.Ng_y * inv_norm; + primitive_normals[i * 3 + 2] = rh.hit.Ng_z * inv_norm; + } else { + geometry_ids[i] = RTC_INVALID_GEOMETRY_ID; + primitive_ids[i] = RTC_INVALID_GEOMETRY_ID; + primitive_uvs[i * 2 + 0] = 0; + primitive_uvs[i * 2 + 1] = 0; + primitive_normals[i * 3 + 0] = 0; + primitive_normals[i * 3 + 1] = 0; + primitive_normals[i * 3 + 2] = 0; + } + }); + }); + queue_.wait_and_throw(); + } + + void TestOcclusions(const float* const rays, + const size_t num_rays, + const float tnear, + const float tfar, + int8_t* occluded, + const int nthreads) override { + CommitScene(); + + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { + struct RTCRayQueryContext context; + rtcInitRayQueryContext(&context); + + RTCOccludedArguments args; + rtcInitOccludedArguments(&args); + args.context = &context; + + const size_t i = item.get_id(0); + + struct RTCRay ray; + const float* r = &rays[i * 6]; + ray.org_x = r[0]; + ray.org_y = r[1]; + ray.org_z = r[2]; + ray.dir_x = r[3]; + ray.dir_y = r[4]; + ray.dir_z = r[5]; + ray.tnear = tnear; + ray.tfar = tfar; + ray.mask = -1; + ray.id = i; + ray.flags = 0; + + rtcOccluded1(scene, &ray, &args); + + occluded[i] = int8_t( + -std::numeric_limits::infinity() == ray.tfar); + }); + }); + queue_.wait_and_throw(); + } + + void CountIntersections(const float* const rays, + const size_t num_rays, + int* intersections, + const int nthreads) override { + throw std::logic_error("Function not yet implemented"); + } + + void ListIntersections(const float* const rays, + const size_t num_rays, + const size_t num_intersections, + const Eigen::VectorXi& cumsum, + unsigned int* track_intersections, + unsigned int* ray_ids, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* t_hit, + const int nthreads) override { + throw std::logic_error("Function not yet implemented"); + } + + void ComputeClosestPoints(const float* const query_points, + const size_t num_query_points, + float* closest_points, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* primitive_normals, + const int nthreads) override { + throw std::logic_error("Function not yet implemented"); + } +}; +#endif + +struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { + // The maximum number of rays used in calls to embree. + const size_t BATCH_SIZE = 1024; + + void CastRays(const float* const rays, + const size_t num_rays, + float* t_hit, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* primitive_normals, + const int nthreads, + const bool line_intersection) override { CommitScene(); auto LoopFn = [&](const tbb::blocked_range& range) { @@ -372,7 +608,7 @@ struct RaycastingScene::Impl { rh.ray.org_x = r[0]; rh.ray.org_y = r[1]; rh.ray.org_z = r[2]; - if (LINE_INTERSECTION) { + if (line_intersection) { rh.ray.dir_x = r[3] - r[0]; rh.ray.dir_y = r[4] - r[1]; rh.ray.dir_z = r[5] - r[2]; @@ -382,7 +618,7 @@ struct RaycastingScene::Impl { rh.ray.dir_z = r[5]; } rh.ray.tnear = 0; - if (LINE_INTERSECTION) { + if (line_intersection) { rh.ray.tfar = 1.f; } else { rh.ray.tfar = std::numeric_limits::infinity(); @@ -442,7 +678,7 @@ struct RaycastingScene::Impl { const float tnear, const float tfar, int8_t* occluded, - const int nthreads) { + const int nthreads) override { CommitScene(); struct RTCRayQueryContext context; @@ -497,7 +733,7 @@ struct RaycastingScene::Impl { void CountIntersections(const float* const rays, const size_t num_rays, int* intersections, - const int nthreads) { + const int nthreads) override { CommitScene(); memset(intersections, 0, sizeof(int) * num_rays); @@ -567,7 +803,7 @@ struct RaycastingScene::Impl { unsigned int* primitive_ids, float* primitive_uvs, float* t_hit, - const int nthreads) { + const int nthreads) override { CommitScene(); memset(track_intersections, 0, sizeof(uint32_t) * num_rays); @@ -645,7 +881,7 @@ struct RaycastingScene::Impl { unsigned int* primitive_ids, float* primitive_uvs, float* primitive_normals, - const int nthreads) { + const int nthreads) override { CommitScene(); auto LoopFn = [&](const tbb::blocked_range& range) { @@ -693,14 +929,31 @@ struct RaycastingScene::Impl { } }; -RaycastingScene::RaycastingScene(int64_t nthreads) - : impl_(new RaycastingScene::Impl()) { - if (nthreads > 0) { - std::string config("threads=" + std::to_string(nthreads)); - impl_->device_ = rtcNewDevice(config.c_str()); +RaycastingScene::RaycastingScene(int64_t nthreads +#ifdef BUILD_SYCL_MODULE + , const core::Device& device +#endif + ) { + +#ifdef BUILD_SYCL_MODULE + if (device.IsSYCL()) { + impl_ = std::make_unique(); + dynamic_cast(impl_.get())->InitializeDevice(); } else { - impl_->device_ = rtcNewDevice(NULL); +#endif + impl_ = std::make_unique(); + + if (nthreads > 0) { + std::string config("threads=" + std::to_string(nthreads)); + impl_->device_ = rtcNewDevice(config.c_str()); + } else { + impl_->device_ = rtcNewDevice(NULL); + } +#ifdef BUILD_SYCL_MODULE } +#endif + + impl_->tensor_device_ = device; rtcSetDeviceErrorFunction(impl_->device_, ErrorFunction, NULL); impl_->scene_ = rtcNewScene(impl_->device_); @@ -747,14 +1000,30 @@ uint32_t RaycastingScene::AddTriangles(const core::Tensor& vertex_positions, 3 * sizeof(uint32_t), num_triangles); { +#ifdef BUILD_SYCL_MODULE auto data = vertex_positions.Contiguous(); - memcpy(vertex_buffer, data.GetDataPtr(), - sizeof(float) * 3 * num_vertices); + if (impl_->tensor_device_.IsSYCL()) { + dynamic_cast(impl_.get())->queue_.memcpy(vertex_buffer, data.GetDataPtr(), sizeof(float) * 3 * num_vertices).wait(); + } else { +#endif + memcpy(vertex_buffer, data.GetDataPtr(), + sizeof(float) * 3 * num_vertices); +#ifdef BUILD_SYCL_MODULE + } +#endif } { auto data = triangle_indices.Contiguous(); - memcpy(index_buffer, data.GetDataPtr(), - sizeof(uint32_t) * 3 * num_triangles); +#ifdef BUILD_SYCL_MODULE + if (impl_->tensor_device_.IsSYCL()) { + dynamic_cast(impl_.get())->queue_.memcpy(index_buffer, data.GetDataPtr(), sizeof(uint32_t) * 3 * num_triangles).wait(); + } else { +#endif + memcpy(index_buffer, data.GetDataPtr(), + sizeof(uint32_t) * 3 * num_triangles); +#ifdef BUILD_SYCL_MODULE + } +#endif } rtcSetGeometryEnableFilterFunctionFromArguments(geom, true); rtcCommitGeometry(geom); @@ -789,22 +1058,23 @@ std::unordered_map RaycastingScene::CastRays( size_t num_rays = shape.NumElements(); std::unordered_map result; - result["t_hit"] = core::Tensor(shape, core::Float32); - result["geometry_ids"] = core::Tensor(shape, core::UInt32); - result["primitive_ids"] = core::Tensor(shape, core::UInt32); + result["t_hit"] = core::Tensor(shape, core::Float32, rays.GetDevice()); + result["geometry_ids"] = core::Tensor(shape, core::UInt32, rays.GetDevice()); + result["primitive_ids"] = core::Tensor(shape, core::UInt32, rays.GetDevice()); shape.push_back(2); - result["primitive_uvs"] = core::Tensor(shape, core::Float32); + result["primitive_uvs"] = core::Tensor(shape, core::Float32, rays.GetDevice()); shape.back() = 3; - result["primitive_normals"] = core::Tensor(shape, core::Float32); + result["primitive_normals"] = core::Tensor(shape, core::Float32, rays.GetDevice()); auto data = rays.Contiguous(); - impl_->CastRays(data.GetDataPtr(), num_rays, + impl_->CastRays(data.GetDataPtr(), num_rays, result["t_hit"].GetDataPtr(), result["geometry_ids"].GetDataPtr(), result["primitive_ids"].GetDataPtr(), result["primitive_uvs"].GetDataPtr(), result["primitive_normals"].GetDataPtr(), - nthreads); + nthreads, + false); return result; } @@ -820,7 +1090,7 @@ core::Tensor RaycastingScene::TestOcclusions(const core::Tensor& rays, // results. size_t num_rays = shape.NumElements(); - core::Tensor result(shape, core::Bool); + core::Tensor result(shape, core::Bool, rays.GetDevice()); auto data = rays.Contiguous(); impl_->TestOcclusions(data.GetDataPtr(), num_rays, tnear, tfar, diff --git a/cpp/open3d/t/geometry/RaycastingScene.h b/cpp/open3d/t/geometry/RaycastingScene.h index f25d994b0b5..20237344279 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.h +++ b/cpp/open3d/t/geometry/RaycastingScene.h @@ -30,7 +30,11 @@ namespace geometry { class RaycastingScene { public: /// \brief Default Constructor. - RaycastingScene(int64_t nthreads = 0); + RaycastingScene(int64_t nthreads = 0 +#ifdef BUILD_SYCL_MODULE + , const core::Device& device = core::Device("CPU:0") +#endif + ); ~RaycastingScene(); @@ -250,6 +254,10 @@ class RaycastingScene { private: struct Impl; + struct CPUImpl; +#ifdef BUILD_SYCL_MODULE + struct SYCLImpl; +#endif std::unique_ptr impl_; }; diff --git a/cpp/pybind/core/sycl_utils.cpp b/cpp/pybind/core/sycl_utils.cpp index 7705575e261..ba90cb6fc51 100644 --- a/cpp/pybind/core/sycl_utils.cpp +++ b/cpp/pybind/core/sycl_utils.cpp @@ -12,8 +12,13 @@ namespace open3d { namespace core { -void pybind_sycl_utils_definitions(py::module& m) { +void pybind_sycl_utils_definitions(py::module& m) { m.def("sycl_demo", &sy::SYCLDemo); + + py::module m_sycl = m.def_submodule("sycl"); + m_sycl.def("is_available", sy::IsAvailable, + "Returns true if Open3D is compiled with SYCL support and at " + "least one compatible SYCL device is detected."); } } // namespace core diff --git a/cpp/pybind/t/geometry/raycasting_scene.cpp b/cpp/pybind/t/geometry/raycasting_scene.cpp index b3e8983aaef..a9156b212e5 100644 --- a/cpp/pybind/t/geometry/raycasting_scene.cpp +++ b/cpp/pybind/t/geometry/raycasting_scene.cpp @@ -58,12 +58,22 @@ void pybind_raycasting_scene_definitions(py::module& m) { auto raycasting_scene = static_cast>(m.attr("RaycastingScene")); // Constructors. +#ifdef BUILD_SYCL_MODULE + raycasting_scene.def(py::init(), "nthreads"_a = 0, "device"_a = core::Device("CPU:0"), R"doc( +Create a RaycastingScene. + +Args: + nthreads (int): The number of threads to use for building the scene. Set to 0 for automatic. + enable_sycl (bool): Enable SYCL for building the scene. Default is False. +)doc"); +#else raycasting_scene.def(py::init(), "nthreads"_a = 0, R"doc( Create a RaycastingScene. Args: nthreads (int): The number of threads to use for building the scene. Set to 0 for automatic. )doc"); +#endif raycasting_scene.def( "add_triangles", diff --git a/python/test/open3d_test.py b/python/test/open3d_test.py old mode 100644 new mode 100755 index 46a768746d0..271cdadf3a1 --- a/python/test/open3d_test.py +++ b/python/test/open3d_test.py @@ -29,12 +29,18 @@ def list_devices(): - If cuda device is available, returns [Device("CPU:0"), Device("CUDA:0")]. - If cuda device is not available, returns [Device("CPU:0")]. + If Open3D is built with SYCL support: + - If SYCL device is available, returns [Device("CPU:0"), Device("SYCL:0")]. + - If SYCL device is not available, returns [Device("CPU:0")]. + If Open3D is built without CUDA support: - returns [Device("CPU:0")]. """ import open3d as o3d if o3d.core.cuda.device_count() > 0: return [o3d.core.Device("CPU:0"), o3d.core.Device("CUDA:0")] + elif o3d.core.sycl.is_available(): + return [o3d.core.Device("CPU:0"), o3d.core.Device("SYCL:0")] else: return [o3d.core.Device("CPU:0")] diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py old mode 100644 new mode 100755 index 3ce024a2b29..df190e46a27 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -9,57 +9,66 @@ import numpy as np import pytest +import sys +import os +sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../..") +from open3d_test import list_devices + # test intersection with a single triangle -def test_cast_rays(): +@pytest.mark.parametrize("device", list_devices()) +def test_cast_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32) + dtype=o3d.core.float32, device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) - scene = o3d.t.geometry.RaycastingScene() + scene = o3d.t.geometry.RaycastingScene(device=device) geom_id = scene.add_triangles(vertices, triangles) rays = o3d.core.Tensor([[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32) + dtype=o3d.core.float32, device=device) ans = scene.cast_rays(rays) # first ray hits the triangle - assert geom_id == ans['geometry_ids'][0] + assert geom_id == ans['geometry_ids'][0].cpu() assert np.isclose(ans['t_hit'][0].item(), 1.0) # second ray misses - assert o3d.t.geometry.RaycastingScene.INVALID_ID == ans['geometry_ids'][1] + assert o3d.t.geometry.RaycastingScene.INVALID_ID == ans['geometry_ids'][1].cpu() assert np.isinf(ans['t_hit'][1].item()) # cast lots of random rays to test the internal batching # we expect no errors for this test -def test_cast_lots_of_rays(): +@pytest.mark.parametrize("device", list_devices()) +def test_cast_lots_of_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32) + dtype=o3d.core.float32, device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) - scene = o3d.t.geometry.RaycastingScene() + scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertices, triangles) rs = np.random.RandomState(123) rays = o3d.core.Tensor.from_numpy(rs.rand(7654321, 6).astype(np.float32)) + rays = rays.to(device) _ = scene.cast_rays(rays) # test occlusion with a single triangle -def test_test_occlusions(): +@pytest.mark.parametrize("device", list_devices()) +def test_test_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32) + dtype=o3d.core.float32, device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) - scene = o3d.t.geometry.RaycastingScene() + scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertices, triangles) rays = o3d.core.Tensor([[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32) - ans = scene.test_occlusions(rays) + dtype=o3d.core.float32, device=device) + ans = scene.test_occlusions(rays).cpu() # first ray is occluded by the triangle assert ans[0] == True @@ -68,26 +77,28 @@ def test_test_occlusions(): assert ans[1] == False # set tfar such that no ray is occluded - ans = scene.test_occlusions(rays, tfar=0.5) + ans = scene.test_occlusions(rays, tfar=0.5).cpu() assert ans.any() == False # set tnear such that no ray is occluded - ans = scene.test_occlusions(rays, tnear=1.5) + ans = scene.test_occlusions(rays, tnear=1.5).cpu() assert ans.any() == False # test lots of random rays for occlusions to test the internal batching # we expect no errors for this test -def test_test_lots_of_occlusions(): +@pytest.mark.parametrize("device", list_devices()) +def test_test_lots_of_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32) + dtype=o3d.core.float32, device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) - scene = o3d.t.geometry.RaycastingScene() + scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertices, triangles) rs = np.random.RandomState(123) rays = o3d.core.Tensor.from_numpy(rs.rand(7654321, 6).astype(np.float32)) + rays = rays.to(device) _ = scene.test_occlusions(rays) From 2557482e9fd8e49351ef626f20a1ab2553115c05 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Sun, 18 Aug 2024 17:24:30 -0700 Subject: [PATCH 11/35] Add SYCL support to CountIntersections function --- cpp/open3d/t/geometry/RaycastingScene.cpp | 189 ++++++++++++------ .../test/t/geometry/test_raycasting_scene.py | 45 +++-- 2 files changed, 160 insertions(+), 74 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 04dfc34563c..bde62b05490 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -27,6 +27,67 @@ #include "open3d/utility/Helper.h" #include "open3d/utility/Logging.h" + +namespace callbacks { + +struct GeomPrimID { + uint32_t geomID; + uint32_t primID; + float ray_tfar; +}; + +struct CountIntersectionsContext { + RTCRayQueryContext context; + GeomPrimID* previous_geom_prim_ID_tfar; + int* intersections; +}; + +#ifdef BUILD_SYCL_MODULE +RTC_SYCL_INDIRECTLY_CALLABLE void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) { +#else +void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) { +#endif + int* valid = args->valid; + const CountIntersectionsContext* context = + reinterpret_cast(args->context); + struct RTCRayN* rayN = args->ray; + struct RTCHitN* hitN = args->hit; + const unsigned int N = args->N; + + // Avoid crashing when debug visualizations are used. + if (context == nullptr) return; + + GeomPrimID *previous_geom_prim_ID_tfar = context->previous_geom_prim_ID_tfar; + int* intersections = context->intersections; + + // Iterate over all rays in ray packet. + for (unsigned int ui = 0; ui < N; ui += 1) { + // Calculate loop and execution mask + unsigned int vi = ui + 0; + if (vi >= N) continue; + + // Ignore inactive rays. + if (valid[vi] != -1) continue; + + // Read ray/hit from ray structure. + RTCRay ray = rtcGetRayFromRayN(rayN, N, ui); + RTCHit hit = rtcGetHitFromHitN(hitN, N, ui); + + unsigned int ray_id = ray.id; + GeomPrimID gpID = {hit.geomID, hit.primID, ray.tfar}; + auto& prev_gpIDtfar = previous_geom_prim_ID_tfar[ray_id]; + if (prev_gpIDtfar.geomID != hit.geomID || + (prev_gpIDtfar.primID != hit.primID && + prev_gpIDtfar.ray_tfar != ray.tfar)) { + ++(intersections[ray_id]); + previous_geom_prim_ID_tfar[ray_id] = gpID; + } + // Always ignore hit + valid[ui] = 0; + } +} +} // namespace callbacks + namespace { typedef Eigen::AlignedVector3 Vec3fa; @@ -74,56 +135,6 @@ void AssertTensorDtypeLastDimDeviceMinNDim(const open3d::core::Tensor& tensor, open3d::core::Dtype::FromType()); } -struct CountIntersectionsContext { - RTCRayQueryContext context; - std::vector>* - previous_geom_prim_ID_tfar; - int* intersections; -}; - -void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) { - int* valid = args->valid; - const CountIntersectionsContext* context = - reinterpret_cast(args->context); - struct RTCRayN* rayN = args->ray; - struct RTCHitN* hitN = args->hit; - const unsigned int N = args->N; - - // Avoid crashing when debug visualizations are used. - if (context == nullptr) return; - - std::vector>* - previous_geom_prim_ID_tfar = context->previous_geom_prim_ID_tfar; - int* intersections = context->intersections; - - // Iterate over all rays in ray packet. - for (unsigned int ui = 0; ui < N; ui += 1) { - // Calculate loop and execution mask - unsigned int vi = ui + 0; - if (vi >= N) continue; - - // Ignore inactive rays. - if (valid[vi] != -1) continue; - - // Read ray/hit from ray structure. - RTCRay ray = rtcGetRayFromRayN(rayN, N, ui); - RTCHit hit = rtcGetHitFromHitN(hitN, N, ui); - - unsigned int ray_id = ray.id; - std::tuple gpID(hit.geomID, hit.primID, - ray.tfar); - auto& prev_gpIDtfar = previous_geom_prim_ID_tfar->operator[](ray_id); - if (std::get<0>(prev_gpIDtfar) != hit.geomID || - (std::get<1>(prev_gpIDtfar) != hit.primID && - std::get<2>(prev_gpIDtfar) != ray.tfar)) { - ++(intersections[ray_id]); - previous_geom_prim_ID_tfar->operator[](ray_id) = gpID; - } - // Always ignore hit - valid[ui] = 0; - } -} - struct ListIntersectionsContext { RTCRayQueryContext context; std::vector>* @@ -554,7 +565,64 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { const size_t num_rays, int* intersections, const int nthreads) override { - throw std::logic_error("Function not yet implemented"); + CommitScene(); + + queue_.memset(intersections, 0, sizeof(int) * num_rays).wait(); + + callbacks::GeomPrimID* previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); + + // Check if allocation was successful + if (!previous_geom_prim_ID_tfar) { + throw std::runtime_error("Failed to allocate device memory"); + } + + auto host_previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + for (size_t i = 0; i < num_rays; ++i) { + host_previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), + uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + } + + // Copy the initialized data to the device + queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), previous_geom_prim_ID_tfar, num_rays * sizeof(callbacks::GeomPrimID)).wait(); + + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { + callbacks::CountIntersectionsContext context; + rtcInitRayQueryContext(&context.context); + context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar; + context.intersections = intersections; + + RTCIntersectArguments args; + rtcInitIntersectArguments(&args); + args.filter = callbacks::CountIntersectionsFunc; + args.context = &context.context; + + const size_t i = item.get_id(0); + + struct RTCRayHit rh; + const float* r = &rays[i * 6]; + rh.ray.org_x = r[0]; + rh.ray.org_y = r[1]; + rh.ray.org_z = r[2]; + rh.ray.dir_x = r[3]; + rh.ray.dir_y = r[4]; + rh.ray.dir_z = r[5]; + rh.ray.tnear = 0; + rh.ray.tfar = std::numeric_limits::infinity(); + rh.ray.mask = -1; + rh.ray.flags = 0; + rh.ray.id = i; + rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; + + rtcIntersect1(scene, &rh, &args); + }); + }); + queue_.wait_and_throw(); + + // Free the allocated memory + sycl::free(previous_geom_prim_ID_tfar, queue_); } void ListIntersections(const float* const rays, @@ -738,21 +806,20 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { memset(intersections, 0, sizeof(int) * num_rays); - std::vector> - previous_geom_prim_ID_tfar( - num_rays, - std::make_tuple(uint32_t(RTC_INVALID_GEOMETRY_ID), - uint32_t(RTC_INVALID_GEOMETRY_ID), - 0.f)); + auto previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + for (size_t i = 0; i < num_rays; ++i) { + previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), + uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + } - CountIntersectionsContext context; + callbacks::CountIntersectionsContext context; rtcInitRayQueryContext(&context.context); - context.previous_geom_prim_ID_tfar = &previous_geom_prim_ID_tfar; + context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar.get(); context.intersections = intersections; RTCIntersectArguments args; rtcInitIntersectArguments(&args); - args.filter = CountIntersectionsFunc; + args.filter = callbacks::CountIntersectionsFunc; args.context = &context.context; auto LoopFn = [&](const tbb::blocked_range& range) { @@ -1109,7 +1176,7 @@ core::Tensor RaycastingScene::CountIntersections(const core::Tensor& rays, // results. size_t num_rays = shape.NumElements(); - core::Tensor intersections(shape, core::Dtype::FromType()); + core::Tensor intersections(shape, core::Dtype::FromType(), impl_->tensor_device_); auto data = rays.Contiguous(); diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index df190e46a27..7632bff1668 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -103,47 +103,66 @@ def test_test_lots_of_occlusions(device): _ = scene.test_occlusions(rays) -def test_add_triangle_mesh(): +@pytest.mark.parametrize("device", list_devices()) +def test_add_triangle_mesh(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) + vertex_positions = cube.vertex.positions + vertex_positions = vertex_positions.to(device) + triangle_indices = cube.triangle.indices + triangle_indices = triangle_indices.to(o3d.core.Dtype.UInt32) + triangle_indices = triangle_indices.to(device) - scene = o3d.t.geometry.RaycastingScene() - scene.add_triangles(cube) + scene = o3d.t.geometry.RaycastingScene(device=device) + scene.add_triangles(vertex_positions, triangle_indices) rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32) + dtype=o3d.core.float32, device=device) ans = scene.count_intersections(rays) - np.testing.assert_equal(ans.numpy(), [2, 1, 0]) + np.testing.assert_equal(ans.cpu().numpy(), [2, 1, 0]) -def test_count_intersections(): +@pytest.mark.parametrize("device", list_devices()) +def test_count_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) + vertex_positions = cube.vertex.positions + vertex_positions = vertex_positions.to(device) + triangle_indices = cube.triangle.indices + triangle_indices = triangle_indices.to(o3d.core.Dtype.UInt32) + triangle_indices = triangle_indices.to(device) - scene = o3d.t.geometry.RaycastingScene() - scene.add_triangles(cube) + scene = o3d.t.geometry.RaycastingScene(device=device) + scene.add_triangles(vertex_positions, triangle_indices) rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32) + dtype=o3d.core.float32, device=device) ans = scene.count_intersections(rays) - np.testing.assert_equal(ans.numpy(), [2, 1, 0]) + np.testing.assert_equal(ans.cpu().numpy(), [2, 1, 0]) # count lots of random ray intersections to test the internal batching # we expect no errors for this test -def test_count_lots_of_intersections(): +@pytest.mark.parametrize("device", list_devices()) +def test_count_lots_of_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) + vertex_positions = cube.vertex.positions + vertex_positions = vertex_positions.to(device) + triangle_indices = cube.triangle.indices + triangle_indices = triangle_indices.to(o3d.core.Dtype.UInt32) + triangle_indices = triangle_indices.to(device) - scene = o3d.t.geometry.RaycastingScene() - scene.add_triangles(cube) + scene = o3d.t.geometry.RaycastingScene(device=device) + scene.add_triangles(vertex_positions, triangle_indices) rs = np.random.RandomState(123) rays = o3d.core.Tensor.from_numpy(rs.rand(1234567, 6).astype(np.float32)) + rays = rays.to(device) _ = scene.count_intersections(rays) From 2689d45a3294e8a35d262531fa3a4d414c1bfa7a Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 20 Sep 2024 18:11:22 -0700 Subject: [PATCH 12/35] Provide SYCL support to list intersections functions WIP --- cpp/open3d/t/geometry/RaycastingScene.cpp | 264 ++++++++++++------ .../test/t/geometry/test_raycasting_scene.py | 18 +- 2 files changed, 185 insertions(+), 97 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index bde62b05490..e79d91c38d1 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -86,69 +86,24 @@ void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) { valid[ui] = 0; } } -} // namespace callbacks - -namespace { - -typedef Eigen::AlignedVector3 Vec3fa; -// Dont force alignment for Vec2f because we use it just for storing -typedef Eigen::Matrix Vec2f; -typedef Eigen::Vector3f Vec3f; - -void enablePersistentJITCache() -{ -#if defined(_WIN32) - _putenv_s("SYCL_CACHE_PERSISTENT","1"); - _putenv_s("SYCL_CACHE_DIR","cache"); -#else - setenv("SYCL_CACHE_PERSISTENT","1",1); - setenv("SYCL_CACHE_DIR","cache",1); -#endif -} - -// Error function called by embree. -void ErrorFunction(void* userPtr, enum RTCError error, const char* str) { - open3d::utility::LogError("embree error: {} {}", error, str); -} - -// Checks the last dim, ensures that the number of dims is >= min_ndim, checks -// the device, and dtype. -template -void AssertTensorDtypeLastDimDeviceMinNDim(const open3d::core::Tensor& tensor, - const std::string& tensor_name, - int64_t last_dim, - const open3d::core::Device& device, - int64_t min_ndim = 2) { - open3d::core::AssertTensorDevice(tensor, device); - if (tensor.NumDims() < min_ndim) { - open3d::utility::LogError( - "{} Tensor ndim is {} but expected ndim >= {}", tensor_name, - tensor.NumDims(), min_ndim); - } - if (tensor.GetShape().back() != last_dim) { - open3d::utility::LogError( - "The last dimension of the {} Tensor must be {} but got " - "Tensor with shape {}", - tensor_name, last_dim, tensor.GetShape().ToString()); - } - open3d::core::AssertTensorDtype(tensor, - open3d::core::Dtype::FromType()); -} struct ListIntersectionsContext { RTCRayQueryContext context; - std::vector>* - previous_geom_prim_ID_tfar; + GeomPrimID* previous_geom_prim_ID_tfar; unsigned int* ray_ids; unsigned int* geometry_ids; unsigned int* primitive_ids; float* primitive_uvs; float* t_hit; - Eigen::VectorXi cumsum; + int* cumsum; unsigned int* track_intersections; }; +#ifdef BUILD_SYCL_MODULE +RTC_SYCL_INDIRECTLY_CALLABLE void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { +#else void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { +#endif int* valid = args->valid; const ListIntersectionsContext* context = reinterpret_cast(args->context); @@ -159,14 +114,13 @@ void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { // Avoid crashing when debug visualizations are used. if (context == nullptr) return; - std::vector>* - previous_geom_prim_ID_tfar = context->previous_geom_prim_ID_tfar; + GeomPrimID *previous_geom_prim_ID_tfar = context->previous_geom_prim_ID_tfar; unsigned int* ray_ids = context->ray_ids; unsigned int* geometry_ids = context->geometry_ids; unsigned int* primitive_ids = context->primitive_ids; float* primitive_uvs = context->primitive_uvs; float* t_hit = context->t_hit; - Eigen::VectorXi cumsum = context->cumsum; + int* cumsum = context->cumsum; unsigned int* track_intersections = context->track_intersections; // Iterate over all rays in ray packet. @@ -183,12 +137,11 @@ void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { RTCHit hit = rtcGetHitFromHitN(hitN, N, ui); unsigned int ray_id = ray.id; - std::tuple gpID(hit.geomID, hit.primID, - ray.tfar); - auto& prev_gpIDtfar = previous_geom_prim_ID_tfar->operator[](ray_id); - if (std::get<0>(prev_gpIDtfar) != hit.geomID || - (std::get<1>(prev_gpIDtfar) != hit.primID && - std::get<2>(prev_gpIDtfar) != ray.tfar)) { + GeomPrimID gpID = {hit.geomID, hit.primID, ray.tfar}; + auto& prev_gpIDtfar = previous_geom_prim_ID_tfar[ray_id]; + if (prev_gpIDtfar.geomID != hit.geomID || + (prev_gpIDtfar.primID != hit.primID && + prev_gpIDtfar.ray_tfar != ray.tfar)) { size_t idx = cumsum[ray_id] + track_intersections[ray_id]; ray_ids[idx] = ray_id; geometry_ids[idx] = hit.geomID; @@ -196,7 +149,7 @@ void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { primitive_uvs[idx * 2 + 0] = hit.u; primitive_uvs[idx * 2 + 1] = hit.v; t_hit[idx] = ray.tfar; - previous_geom_prim_ID_tfar->operator[](ray_id) = gpID; + previous_geom_prim_ID_tfar[ray_id] = gpID; ++(track_intersections[ray_id]); } // Always ignore hit @@ -204,6 +157,55 @@ void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { } } +} // namespace callbacks + +namespace { + +typedef Eigen::AlignedVector3 Vec3fa; +// Dont force alignment for Vec2f because we use it just for storing +typedef Eigen::Matrix Vec2f; +typedef Eigen::Vector3f Vec3f; + +void enablePersistentJITCache() +{ +#if defined(_WIN32) + _putenv_s("SYCL_CACHE_PERSISTENT","1"); + _putenv_s("SYCL_CACHE_DIR","cache"); +#else + setenv("SYCL_CACHE_PERSISTENT","1",1); + setenv("SYCL_CACHE_DIR","cache",1); +#endif +} + +// Error function called by embree. +void ErrorFunction(void* userPtr, enum RTCError error, const char* str) { + open3d::utility::LogError("embree error: {} {}", error, str); +} + +// Checks the last dim, ensures that the number of dims is >= min_ndim, checks +// the device, and dtype. +template +void AssertTensorDtypeLastDimDeviceMinNDim(const open3d::core::Tensor& tensor, + const std::string& tensor_name, + int64_t last_dim, + const open3d::core::Device& device, + int64_t min_ndim = 2) { + open3d::core::AssertTensorDevice(tensor, device); + if (tensor.NumDims() < min_ndim) { + open3d::utility::LogError( + "{} Tensor ndim is {} but expected ndim >= {}", tensor_name, + tensor.NumDims(), min_ndim); + } + if (tensor.GetShape().back() != last_dim) { + open3d::utility::LogError( + "The last dimension of the {} Tensor must be {} but got " + "Tensor with shape {}", + tensor_name, last_dim, tensor.GetShape().ToString()); + } + open3d::core::AssertTensorDtype(tensor, + open3d::core::Dtype::FromType()); +} + // Adapted from common/math/closest_point.h inline Vec3fa closestPointTriangle(Vec3fa const& p, Vec3fa const& a, @@ -399,7 +401,7 @@ struct RaycastingScene::Impl { virtual void ListIntersections(const float* const rays, const size_t num_rays, const size_t num_intersections, - const Eigen::VectorXi& cumsum, + int* cumsum, unsigned int* track_intersections, unsigned int* ray_ids, unsigned int* geometry_ids, @@ -628,7 +630,7 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { void ListIntersections(const float* const rays, const size_t num_rays, const size_t num_intersections, - const Eigen::VectorXi& cumsum, + int* cumsum, unsigned int* track_intersections, unsigned int* ray_ids, unsigned int* geometry_ids, @@ -636,7 +638,80 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { float* primitive_uvs, float* t_hit, const int nthreads) override { - throw std::logic_error("Function not yet implemented"); + CommitScene(); + + queue_.memset(track_intersections, 0, sizeof(uint32_t) * num_rays).wait(); + queue_.memset(ray_ids, 0, sizeof(uint32_t) * num_intersections).wait(); + queue_.memset(geometry_ids, 0, sizeof(uint32_t) * num_intersections).wait(); + queue_.memset(primitive_ids, 0, sizeof(uint32_t) * num_intersections).wait(); + queue_.memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2).wait(); + queue_.memset(t_hit, 0, sizeof(float) * num_intersections).wait(); + + callbacks::GeomPrimID* previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); + + // Check if allocation was successful + if (!previous_geom_prim_ID_tfar) { + throw std::runtime_error("Failed to allocate device memory"); + } + + auto host_previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + for (size_t i = 0; i < num_rays; ++i) { + host_previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), + uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + } + + // Copy the initialized data to the device + queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), previous_geom_prim_ID_tfar, num_rays * sizeof(callbacks::GeomPrimID)).wait(); + + // cumsum + int* cumsum_ = sycl::malloc_device(num_rays, queue_); + queue_.memcpy(cumsum, cumsum_, num_rays * sizeof(int)).wait(); + + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { + callbacks::ListIntersectionsContext context; + rtcInitRayQueryContext(&context.context); + context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar; + context.ray_ids = ray_ids; + context.geometry_ids = geometry_ids; + context.primitive_ids = primitive_ids; + context.primitive_uvs = primitive_uvs; + context.t_hit = t_hit; + context.cumsum = cumsum_; + context.track_intersections = track_intersections; + + RTCIntersectArguments args; + rtcInitIntersectArguments(&args); + //args.filter = callbacks::ListIntersectionsFunc; + args.context = &context.context; + + const size_t i = item.get_id(0); + + struct RTCRayHit rh; + const float* r = &rays[i * 6]; + rh.ray.org_x = r[0]; + rh.ray.org_y = r[1]; + rh.ray.org_z = r[2]; + rh.ray.dir_x = r[3]; + rh.ray.dir_y = r[4]; + rh.ray.dir_z = r[5]; + rh.ray.tnear = 0; + rh.ray.tfar = std::numeric_limits::infinity(); + rh.ray.mask = -1; + rh.ray.flags = 0; + rh.ray.id = i; + rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; + + rtcIntersect1(scene, &rh, &args); + }); + }); + queue_.wait_and_throw(); + + // Free the allocated memory + sycl::free(previous_geom_prim_ID_tfar, queue_); + sycl::free(cumsum_, queue_); } void ComputeClosestPoints(const float* const query_points, @@ -863,7 +938,7 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { void ListIntersections(const float* const rays, const size_t num_rays, const size_t num_intersections, - const Eigen::VectorXi& cumsum, + int* cumsum, unsigned int* track_intersections, unsigned int* ray_ids, unsigned int* geometry_ids, @@ -880,16 +955,15 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2); memset(t_hit, 0, sizeof(float) * num_intersections); - std::vector> - previous_geom_prim_ID_tfar( - num_rays, - std::make_tuple(uint32_t(RTC_INVALID_GEOMETRY_ID), - uint32_t(RTC_INVALID_GEOMETRY_ID), - 0.f)); + auto previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + for (size_t i = 0; i < num_rays; ++i) { + previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), + uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + } - ListIntersectionsContext context; + callbacks::ListIntersectionsContext context; rtcInitRayQueryContext(&context.context); - context.previous_geom_prim_ID_tfar = &previous_geom_prim_ID_tfar; + context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar.get(); context.ray_ids = ray_ids; context.geometry_ids = geometry_ids; context.primitive_ids = primitive_ids; @@ -900,7 +974,7 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { RTCIntersectArguments args; rtcInitIntersectArguments(&args); - args.filter = ListIntersectionsFunc; + args.filter = callbacks::ListIntersectionsFunc; args.context = &context.context; auto LoopFn = [&](const tbb::blocked_range& range) { @@ -1197,22 +1271,27 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, size_t num_rays = shape.NumElements(); // determine total number of intersections - core::Tensor intersections(shape, core::Dtype::FromType()); - core::Tensor track_intersections(shape, core::Dtype::FromType()); + core::Tensor intersections(shape, core::Dtype::FromType(), impl_->tensor_device_); + core::Tensor track_intersections(shape, core::Dtype::FromType(), impl_->tensor_device_); auto data = rays.Contiguous(); impl_->CountIntersections(data.GetDataPtr(), num_rays, intersections.GetDataPtr(), nthreads); // prepare shape with that number of elements - Eigen::Map intersections_vector( - intersections.GetDataPtr(), num_rays); - size_t num_intersections = intersections_vector.sum(); + int* data_ptr = intersections.GetDataPtr(); + size_t num_intersections = 0; + for (size_t i = 0; i < num_rays; ++i) { + num_intersections += data_ptr[i]; + } // prepare ray allocations (cumsum) - Eigen::VectorXi cumsum = Eigen::MatrixXi::Zero(num_rays, 1); - std::partial_sum(intersections_vector.begin(), - intersections_vector.end() - 1, cumsum.begin() + 1, - std::plus()); + core::Tensor cumsum_tensor = core::Tensor::Zeros(shape, core::Dtype::FromType(), impl_->tensor_device_); + int* cumsum_ptr = cumsum_tensor.GetDataPtr(); + + cumsum_ptr[0] = 0; + for (size_t i = 1; i < num_rays; ++i) { + cumsum_ptr[i] = cumsum_ptr[i - 1] + data_ptr[i - 1]; + } // generate results structure std::unordered_map result; @@ -1220,26 +1299,27 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, shape.push_back(num_rays + 1); result["ray_splits"] = core::Tensor(shape, core::UInt32); uint32_t* ptr = result["ray_splits"].GetDataPtr(); - for (int i = 0; i < cumsum.size(); ++i) { - ptr[i] = cumsum[i]; + for (int i = 0; i < num_rays; ++i) { + ptr[i] = cumsum_ptr[i]; } ptr[num_rays] = num_intersections; - shape[0] = intersections_vector.sum(); - result["ray_ids"] = core::Tensor(shape, core::UInt32); - result["geometry_ids"] = core::Tensor(shape, core::UInt32); - result["primitive_ids"] = core::Tensor(shape, core::UInt32); - result["t_hit"] = core::Tensor(shape, core::Float32); + shape[0] = num_intersections; + result["ray_ids"] = core::Tensor(shape, core::UInt32, impl_->tensor_device_); + result["geometry_ids"] = core::Tensor(shape, core::UInt32, impl_->tensor_device_); + result["primitive_ids"] = core::Tensor(shape, core::UInt32, impl_->tensor_device_); + result["t_hit"] = core::Tensor(shape, core::Float32, impl_->tensor_device_); shape.push_back(2); - result["primitive_uvs"] = core::Tensor(shape, core::Float32); + result["primitive_uvs"] = core::Tensor(shape, core::Float32, impl_->tensor_device_); impl_->ListIntersections(data.GetDataPtr(), num_rays, - num_intersections, cumsum, + num_intersections, cumsum_ptr, track_intersections.GetDataPtr(), result["ray_ids"].GetDataPtr(), result["geometry_ids"].GetDataPtr(), result["primitive_ids"].GetDataPtr(), result["primitive_uvs"].GetDataPtr(), result["t_hit"].GetDataPtr(), nthreads); + return result; } diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index 7632bff1668..4dceb41e7eb 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -167,19 +167,27 @@ def test_count_lots_of_intersections(device): _ = scene.count_intersections(rays) -def test_list_intersections(): +@pytest.mark.parametrize("device", list_devices()) +def test_list_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) + vertex_positions = cube.vertex.positions + vertex_positions = vertex_positions.to(device) + triangle_indices = cube.triangle.indices + triangle_indices = triangle_indices.to(o3d.core.Dtype.UInt32) + triangle_indices = triangle_indices.to(device) - scene = o3d.t.geometry.RaycastingScene() - scene.add_triangles(cube) + scene = o3d.t.geometry.RaycastingScene(device=device) + scene.add_triangles(vertex_positions, triangle_indices) rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32) + dtype=o3d.core.float32, device=device) + print("PYTHON TEST 1", device) ans = scene.list_intersections(rays) + print("PYTHON TEST 2") - np.testing.assert_allclose(ans['t_hit'].numpy(), + np.testing.assert_allclose(ans['t_hit'].cpu().numpy(), np.array([1.0, 2.0, 0.5]), rtol=1e-6, atol=1e-6) From 048008c49431d69f1ef1e83264e573cf4e5b5c24 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 4 Oct 2024 09:20:39 -0700 Subject: [PATCH 13/35] Remove open3d python virtualenv from Dockerfile --- docker/Dockerfile.ci | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index f1054c42e4c..6629b129fc1 100755 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -164,7 +164,7 @@ WORKDIR /root/Open3D RUN ./util/install_deps_ubuntu.sh assume-yes # Open3D Python dependencies -RUN source /root/open3d/bin/activate && source util/ci_utils.sh \ +RUN source util/ci_utils.sh \ && if [ "${BUILD_CUDA_MODULE}" = "ON" ]; then \ install_python_dependencies with-cuda with-jupyter; \ else \ @@ -173,7 +173,7 @@ RUN source /root/open3d/bin/activate && source util/ci_utils.sh \ && pip install -r python/requirements_test.txt # Open3D Jupyter dependencies -RUN source /root/open3d/bin/activate && mkdir -p /etc/apt/keyrings \ +RUN mkdir -p /etc/apt/keyrings \ && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_16.x nodistro main" \ @@ -186,7 +186,7 @@ RUN source /root/open3d/bin/activate && mkdir -p /etc/apt/keyrings \ && yarn --version # Build all -RUN source /root/open3d/bin/activate && \ +RUN \ if [ "${BUILD_SYCL_MODULE}" = "ON" ]; then \ export CMAKE_CXX_COMPILER=icpx; \ export CMAKE_C_COMPILER=icx; \ From e25a9b7a4c39761b20e0e93eaea554931e24c8c4 Mon Sep 17 00:00:00 2001 From: Sameer Sheorey Date: Wed, 9 Oct 2024 16:42:54 -0700 Subject: [PATCH 14/35] Fix no tbb target with BUILD_SYCL_MODULE Use sycl_target_sources for files with sycl code in cmake. style fix --- 3rdparty/embree/embree.cmake | 7 +- 3rdparty/find_dependencies.cmake | 6 +- 3rdparty/mkl/tbb.cmake | 1 + cpp/open3d/t/geometry/CMakeLists.txt | 11 +- cpp/open3d/t/geometry/RaycastingScene.cpp | 626 ++++++++++-------- cpp/open3d/t/geometry/RaycastingScene.h | 3 +- cpp/pybind/CMakeLists.txt | 2 +- cpp/pybind/core/sycl_utils.cpp | 2 +- cpp/pybind/t/geometry/raycasting_scene.cpp | 3 +- docker/Dockerfile.ci | 9 +- .../test/t/geometry/test_raycasting_scene.py | 46 +- 11 files changed, 407 insertions(+), 309 deletions(-) diff --git a/3rdparty/embree/embree.cmake b/3rdparty/embree/embree.cmake index 1bd89b2ba58..6d55198ec69 100644 --- a/3rdparty/embree/embree.cmake +++ b/3rdparty/embree/embree.cmake @@ -68,7 +68,9 @@ if(BUILD_SYCL_MODULE) set(ISA_ARGS ${ISA_ARGS} -DCMAKE_CXX_COMPILER=icpx) set(ISA_ARGS ${ISA_ARGS} -DCMAKE_C_COMPILER=icx) set(ISA_ARGS ${ISA_ARGS} -DEMBREE_SYCL_SUPPORT=ON) - #list(APPEND ISA_LIBS embree4_sycl) + list(APPEND ISA_LIBS embree4_sycl ze_wrapper) + list(APPEND ISA_BUILD_BYPRODUCTS "/${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}embree4_sycl${CMAKE_STATIC_LIBRARY_SUFFIX}" + "/${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_wrapper${CMAKE_STATIC_LIBRARY_SUFFIX}") endif() @@ -101,11 +103,10 @@ ExternalProject_Add( /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}math${CMAKE_STATIC_LIBRARY_SUFFIX} /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}tasking${CMAKE_STATIC_LIBRARY_SUFFIX} /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_wrapper${CMAKE_STATIC_LIBRARY_SUFFIX} - /${Open3D_INSTALL_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}embree_rthwif${CMAKE_STATIC_LIBRARY_SUFFIX} ${ISA_BUILD_BYPRODUCTS} ) ExternalProject_Get_Property(ext_embree INSTALL_DIR) set(EMBREE_INCLUDE_DIRS ${INSTALL_DIR}/include/ ${INSTALL_DIR}/src/ext_embree/) # "/" is critical. set(EMBREE_LIB_DIR ${INSTALL_DIR}/${Open3D_INSTALL_LIB_DIR}) -set(EMBREE_LIBRARIES embree4_sycl embree4 simd lexers sys math tasking ze_wrapper embree_rthwif ${ISA_LIBS}) +set(EMBREE_LIBRARIES embree4 simd lexers sys math tasking ${ISA_LIBS}) diff --git a/3rdparty/find_dependencies.cmake b/3rdparty/find_dependencies.cmake index c3dc3885565..d2afbe405c9 100644 --- a/3rdparty/find_dependencies.cmake +++ b/3rdparty/find_dependencies.cmake @@ -1384,7 +1384,7 @@ if(BUILD_GUI) ${CPP_LIBRARY} ${CPPABI_LIBRARY}) message(STATUS "Filament C++ libraries: ${CPP_LIBRARY} ${CPPABI_LIBRARY}") if (LIBCPP_VERSION GREATER 11) - message(WARNING "libc++ (LLVM) version ${LIBCPP_VERSION} > 11 includes libunwind that " + message(WARNING "libc++ (LLVM) version ${LIBCPP_VERSION} > 11 includes libunwind that " "interferes with the system libunwind.so.8 and may crash Python code when exceptions " "are used. Please consider using libc++ (LLVM) v11.") endif() @@ -1697,7 +1697,7 @@ else(OPEN3D_USE_ONEAPI_PACKAGES) INCLUDE_DIRS ${STATIC_MKL_INCLUDE_DIR} LIB_DIR ${STATIC_MKL_LIB_DIR} LIBRARIES ${STATIC_MKL_LIBRARIES} - DEPENDS ext_tbb ext_mkl_include ext_mkl + DEPENDS Open3D::3rdparty_tbb ext_mkl_include ext_mkl ) if(UNIX) target_compile_options(3rdparty_blas INTERFACE "$<$:-m64>") @@ -1719,7 +1719,7 @@ else(OPEN3D_USE_ONEAPI_PACKAGES) endif() if(NOT USE_SYSTEM_TBB) include(${Open3D_3RDPARTY_DIR}/mkl/tbb.cmake) - list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_CUSTOM TBB::tbb) + list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_CUSTOM Open3D::3rdparty_tbb) else() list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_SYSTEM Open3D::3rdparty_tbb) endif() diff --git a/3rdparty/mkl/tbb.cmake b/3rdparty/mkl/tbb.cmake index 385af4b644b..a36435ee1b4 100644 --- a/3rdparty/mkl/tbb.cmake +++ b/3rdparty/mkl/tbb.cmake @@ -38,3 +38,4 @@ install(TARGETS tbb EXPORT ${PROJECT_NAME}Targets RUNTIME DESTINATION ${Open3D_INSTALL_BIN_DIR} COMPONENT tbb ) +add_library(${PROJECT_NAME}::3rdparty_tbb ALIAS tbb) diff --git a/cpp/open3d/t/geometry/CMakeLists.txt b/cpp/open3d/t/geometry/CMakeLists.txt index 1b1ba5760cf..e345c819b6f 100644 --- a/cpp/open3d/t/geometry/CMakeLists.txt +++ b/cpp/open3d/t/geometry/CMakeLists.txt @@ -7,7 +7,6 @@ target_sources(tgeometry PRIVATE LineSet.cpp BoundingVolume.cpp PointCloud.cpp - RaycastingScene.cpp RGBDImage.cpp TensorMap.cpp TriangleMesh.cpp @@ -16,6 +15,16 @@ target_sources(tgeometry PRIVATE VtkUtils.cpp ) +if (BUILD_SYCL_MODULE) + open3d_sycl_target_sources(tgeometry PRIVATE + RaycastingScene.cpp + ) +else() + target_sources(tgeometry PRIVATE + RaycastingScene.cpp + ) +endif() + open3d_show_and_abort_on_warning(tgeometry) open3d_set_global_properties(tgeometry) open3d_set_open3d_lib_properties(tgeometry) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index aa0b200bc76..2691d9d6fe9 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include "open3d/utility/Helper.h" #include "open3d/utility/Logging.h" - namespace callbacks { struct GeomPrimID { @@ -43,7 +43,8 @@ struct CountIntersectionsContext { }; #ifdef BUILD_SYCL_MODULE -RTC_SYCL_INDIRECTLY_CALLABLE void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) { +RTC_SYCL_INDIRECTLY_CALLABLE void CountIntersectionsFunc( + const RTCFilterFunctionNArguments* args) { #else void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) { #endif @@ -57,7 +58,8 @@ void CountIntersectionsFunc(const RTCFilterFunctionNArguments* args) { // Avoid crashing when debug visualizations are used. if (context == nullptr) return; - GeomPrimID *previous_geom_prim_ID_tfar = context->previous_geom_prim_ID_tfar; + GeomPrimID* previous_geom_prim_ID_tfar = + context->previous_geom_prim_ID_tfar; int* intersections = context->intersections; // Iterate over all rays in ray packet. @@ -100,7 +102,8 @@ struct ListIntersectionsContext { }; #ifdef BUILD_SYCL_MODULE -RTC_SYCL_INDIRECTLY_CALLABLE void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { +RTC_SYCL_INDIRECTLY_CALLABLE void ListIntersectionsFunc( + const RTCFilterFunctionNArguments* args) { #else void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { #endif @@ -114,7 +117,8 @@ void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { // Avoid crashing when debug visualizations are used. if (context == nullptr) return; - GeomPrimID *previous_geom_prim_ID_tfar = context->previous_geom_prim_ID_tfar; + GeomPrimID* previous_geom_prim_ID_tfar = + context->previous_geom_prim_ID_tfar; unsigned int* ray_ids = context->ray_ids; unsigned int* geometry_ids = context->geometry_ids; unsigned int* primitive_ids = context->primitive_ids; @@ -157,7 +161,7 @@ void ListIntersectionsFunc(const RTCFilterFunctionNArguments* args) { } } -} // namespace callbacks +} // namespace callbacks namespace { @@ -166,16 +170,17 @@ typedef Eigen::AlignedVector3 Vec3fa; typedef Eigen::Matrix Vec2f; typedef Eigen::Vector3f Vec3f; -void enablePersistentJITCache() -{ +#ifdef BUILD_SYCL_MODULE +void enablePersistentJITCache() { #if defined(_WIN32) - _putenv_s("SYCL_CACHE_PERSISTENT","1"); - _putenv_s("SYCL_CACHE_DIR","cache"); + _putenv_s("SYCL_CACHE_PERSISTENT", "1"); + _putenv_s("SYCL_CACHE_DIR", "cache"); #else - setenv("SYCL_CACHE_PERSISTENT","1",1); - setenv("SYCL_CACHE_DIR","cache",1); + setenv("SYCL_CACHE_PERSISTENT", "1", 1); + setenv("SYCL_CACHE_DIR", "cache", 1); #endif } +#endif // Error function called by embree. void ErrorFunction(void* userPtr, enum RTCError error, const char* str) { @@ -378,47 +383,47 @@ struct RaycastingScene::Impl { } virtual void CastRays(const float* const rays, - const size_t num_rays, - float* t_hit, - unsigned int* geometry_ids, - unsigned int* primitive_ids, - float* primitive_uvs, - float* primitive_normals, - const int nthreads, - const bool line_intersection) = 0; - + const size_t num_rays, + float* t_hit, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* primitive_normals, + const int nthreads, + const bool line_intersection) = 0; + virtual void TestOcclusions(const float* const rays, - const size_t num_rays, - const float tnear, - const float tfar, - int8_t* occluded, - const int nthreads) = 0; - + const size_t num_rays, + const float tnear, + const float tfar, + int8_t* occluded, + const int nthreads) = 0; + virtual void CountIntersections(const float* const rays, - const size_t num_rays, - int* intersections, - const int nthreads) = 0; - + const size_t num_rays, + int* intersections, + const int nthreads) = 0; + virtual void ListIntersections(const float* const rays, - const size_t num_rays, - const size_t num_intersections, - int* cumsum, - unsigned int* track_intersections, - unsigned int* ray_ids, - unsigned int* geometry_ids, - unsigned int* primitive_ids, - float* primitive_uvs, - float* t_hit, - const int nthreads) = 0; - + const size_t num_rays, + const size_t num_intersections, + int* cumsum, + unsigned int* track_intersections, + unsigned int* ray_ids, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* t_hit, + const int nthreads) = 0; + virtual void ComputeClosestPoints(const float* const query_points, - const size_t num_query_points, - float* closest_points, - unsigned int* geometry_ids, - unsigned int* primitive_ids, - float* primitive_uvs, - float* primitive_normals, - const int nthreads) = 0; + const size_t num_query_points, + float* closest_points, + unsigned int* geometry_ids, + unsigned int* primitive_ids, + float* primitive_uvs, + float* primitive_normals, + const int nthreads) = 0; }; #ifdef BUILD_SYCL_MODULE @@ -433,8 +438,9 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { try { sycl_device_ = sycl::device(rtcSYCLDeviceSelector); - } catch(std::exception& e) { - utility::LogError("Caught exception creating sycl::device: {}", e.what()); + } catch (std::exception& e) { + utility::LogError("Caught exception creating sycl::device: {}", + e.what()); return; } @@ -445,9 +451,8 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { rtcSetDeviceSYCLDevice(device_, sycl_device_); if (!device_) { - utility::LogError( - "Error %d: cannot create device\n", - rtcGetDeviceError(NULL)); + utility::LogError("Error %d: cannot create device\n", + rtcGetDeviceError(NULL)); } } @@ -462,65 +467,72 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { const bool line_intersection) override { CommitScene(); - auto scene = this->scene_; - queue_.submit([=](sycl::handler& cgh) { - cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { - const size_t i = item.get_id(0); - - struct RTCRayHit rh; - const float* r = &rays[i * 6]; - rh.ray.org_x = r[0]; - rh.ray.org_y = r[1]; - rh.ray.org_z = r[2]; - if (line_intersection) { - rh.ray.dir_x = r[3] - r[0]; - rh.ray.dir_y = r[4] - r[1]; - rh.ray.dir_z = r[5] - r[2]; - } else { - rh.ray.dir_x = r[3]; - rh.ray.dir_y = r[4]; - rh.ray.dir_z = r[5]; - } - rh.ray.tnear = 0; - if (line_intersection) { - rh.ray.tfar = 1.f; - } else { - rh.ray.tfar = std::numeric_limits::infinity(); - } - rh.ray.mask = -1; - rh.ray.id = i; - rh.ray.flags = 0; - rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(scene, &rh); - - t_hit[i] = rh.ray.tfar; - if (rh.hit.geomID != RTC_INVALID_GEOMETRY_ID) { - geometry_ids[i] = rh.hit.geomID; - primitive_ids[i] = rh.hit.primID; - primitive_uvs[i * 2 + 0] = rh.hit.u; - primitive_uvs[i * 2 + 1] = rh.hit.v; - float inv_norm = 1.f / std::sqrt(rh.hit.Ng_x * rh.hit.Ng_x + - rh.hit.Ng_y * rh.hit.Ng_y + - rh.hit.Ng_z * rh.hit.Ng_z); - primitive_normals[i * 3 + 0] = rh.hit.Ng_x * inv_norm; - primitive_normals[i * 3 + 1] = rh.hit.Ng_y * inv_norm; - primitive_normals[i * 3 + 2] = rh.hit.Ng_z * inv_norm; - } else { - geometry_ids[i] = RTC_INVALID_GEOMETRY_ID; - primitive_ids[i] = RTC_INVALID_GEOMETRY_ID; - primitive_uvs[i * 2 + 0] = 0; - primitive_uvs[i * 2 + 1] = 0; - primitive_normals[i * 3 + 0] = 0; - primitive_normals[i * 3 + 1] = 0; - primitive_normals[i * 3 + 2] = 0; - } - }); + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for( + sycl::range<1>(num_rays), + [=](sycl::item<1> item, sycl::kernel_handler kh) { + const size_t i = item.get_id(0); + + struct RTCRayHit rh; + const float* r = &rays[i * 6]; + rh.ray.org_x = r[0]; + rh.ray.org_y = r[1]; + rh.ray.org_z = r[2]; + if (line_intersection) { + rh.ray.dir_x = r[3] - r[0]; + rh.ray.dir_y = r[4] - r[1]; + rh.ray.dir_z = r[5] - r[2]; + } else { + rh.ray.dir_x = r[3]; + rh.ray.dir_y = r[4]; + rh.ray.dir_z = r[5]; + } + rh.ray.tnear = 0; + if (line_intersection) { + rh.ray.tfar = 1.f; + } else { + rh.ray.tfar = + std::numeric_limits::infinity(); + } + rh.ray.mask = -1; + rh.ray.id = i; + rh.ray.flags = 0; + rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; + + rtcIntersect1(scene, &rh); + + t_hit[i] = rh.ray.tfar; + if (rh.hit.geomID != RTC_INVALID_GEOMETRY_ID) { + geometry_ids[i] = rh.hit.geomID; + primitive_ids[i] = rh.hit.primID; + primitive_uvs[i * 2 + 0] = rh.hit.u; + primitive_uvs[i * 2 + 1] = rh.hit.v; + float inv_norm = + 1.f / std::sqrt(rh.hit.Ng_x * rh.hit.Ng_x + + rh.hit.Ng_y * rh.hit.Ng_y + + rh.hit.Ng_z * rh.hit.Ng_z); + primitive_normals[i * 3 + 0] = + rh.hit.Ng_x * inv_norm; + primitive_normals[i * 3 + 1] = + rh.hit.Ng_y * inv_norm; + primitive_normals[i * 3 + 2] = + rh.hit.Ng_z * inv_norm; + } else { + geometry_ids[i] = RTC_INVALID_GEOMETRY_ID; + primitive_ids[i] = RTC_INVALID_GEOMETRY_ID; + primitive_uvs[i * 2 + 0] = 0; + primitive_uvs[i * 2 + 1] = 0; + primitive_normals[i * 3 + 0] = 0; + primitive_normals[i * 3 + 1] = 0; + primitive_normals[i * 3 + 2] = 0; + } + }); }); queue_.wait_and_throw(); } - + void TestOcclusions(const float* const rays, const size_t num_rays, const float tnear, @@ -529,41 +541,44 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { const int nthreads) override { CommitScene(); - auto scene = this->scene_; - queue_.submit([=](sycl::handler& cgh) { - cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { - struct RTCRayQueryContext context; - rtcInitRayQueryContext(&context); - - RTCOccludedArguments args; - rtcInitOccludedArguments(&args); - args.context = &context; - - const size_t i = item.get_id(0); - - struct RTCRay ray; - const float* r = &rays[i * 6]; - ray.org_x = r[0]; - ray.org_y = r[1]; - ray.org_z = r[2]; - ray.dir_x = r[3]; - ray.dir_y = r[4]; - ray.dir_z = r[5]; - ray.tnear = tnear; - ray.tfar = tfar; - ray.mask = -1; - ray.id = i; - ray.flags = 0; - - rtcOccluded1(scene, &ray, &args); - - occluded[i] = int8_t( - -std::numeric_limits::infinity() == ray.tfar); - }); + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for( + sycl::range<1>(num_rays), + [=](sycl::item<1> item, sycl::kernel_handler kh) { + struct RTCRayQueryContext context; + rtcInitRayQueryContext(&context); + + RTCOccludedArguments args; + rtcInitOccludedArguments(&args); + args.context = &context; + + const size_t i = item.get_id(0); + + struct RTCRay ray; + const float* r = &rays[i * 6]; + ray.org_x = r[0]; + ray.org_y = r[1]; + ray.org_z = r[2]; + ray.dir_x = r[3]; + ray.dir_y = r[4]; + ray.dir_z = r[5]; + ray.tnear = tnear; + ray.tfar = tfar; + ray.mask = -1; + ray.id = i; + ray.flags = 0; + + rtcOccluded1(scene, &ray, &args); + + occluded[i] = int8_t( + -std::numeric_limits::infinity() == + ray.tfar); + }); }); queue_.wait_and_throw(); } - + void CountIntersections(const float* const rays, const size_t num_rays, int* intersections, @@ -571,63 +586,74 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { CommitScene(); queue_.memset(intersections, 0, sizeof(int) * num_rays).wait(); - - callbacks::GeomPrimID* previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); + + callbacks::GeomPrimID* previous_geom_prim_ID_tfar = + sycl::malloc_device(num_rays, queue_); // Check if allocation was successful if (!previous_geom_prim_ID_tfar) { throw std::runtime_error("Failed to allocate device memory"); } - - auto host_previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + + auto host_previous_geom_prim_ID_tfar = + std::unique_ptr>( + new callbacks::GeomPrimID[num_rays]); for (size_t i = 0; i < num_rays; ++i) { - host_previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), - uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + host_previous_geom_prim_ID_tfar[i] = { + uint32_t(RTC_INVALID_GEOMETRY_ID), + uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; } // Copy the initialized data to the device - queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), previous_geom_prim_ID_tfar, num_rays * sizeof(callbacks::GeomPrimID)).wait(); - - auto scene = this->scene_; - queue_.submit([=](sycl::handler& cgh) { - cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { - callbacks::CountIntersectionsContext context; - rtcInitRayQueryContext(&context.context); - context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar; - context.intersections = intersections; - - RTCIntersectArguments args; - rtcInitIntersectArguments(&args); - args.filter = callbacks::CountIntersectionsFunc; - args.context = &context.context; - - const size_t i = item.get_id(0); - - struct RTCRayHit rh; - const float* r = &rays[i * 6]; - rh.ray.org_x = r[0]; - rh.ray.org_y = r[1]; - rh.ray.org_z = r[2]; - rh.ray.dir_x = r[3]; - rh.ray.dir_y = r[4]; - rh.ray.dir_z = r[5]; - rh.ray.tnear = 0; - rh.ray.tfar = std::numeric_limits::infinity(); - rh.ray.mask = -1; - rh.ray.flags = 0; - rh.ray.id = i; - rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(scene, &rh, &args); - }); + queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), + previous_geom_prim_ID_tfar, + num_rays * sizeof(callbacks::GeomPrimID)) + .wait(); + + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for( + sycl::range<1>(num_rays), + [=](sycl::item<1> item, sycl::kernel_handler kh) { + callbacks::CountIntersectionsContext context; + rtcInitRayQueryContext(&context.context); + context.previous_geom_prim_ID_tfar = + previous_geom_prim_ID_tfar; + context.intersections = intersections; + + RTCIntersectArguments args; + rtcInitIntersectArguments(&args); + args.filter = callbacks::CountIntersectionsFunc; + args.context = &context.context; + + const size_t i = item.get_id(0); + + struct RTCRayHit rh; + const float* r = &rays[i * 6]; + rh.ray.org_x = r[0]; + rh.ray.org_y = r[1]; + rh.ray.org_z = r[2]; + rh.ray.dir_x = r[3]; + rh.ray.dir_y = r[4]; + rh.ray.dir_z = r[5]; + rh.ray.tnear = 0; + rh.ray.tfar = std::numeric_limits::infinity(); + rh.ray.mask = -1; + rh.ray.flags = 0; + rh.ray.id = i; + rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; + + rtcIntersect1(scene, &rh, &args); + }); }); queue_.wait_and_throw(); // Free the allocated memory sycl::free(previous_geom_prim_ID_tfar, queue_); } - + void ListIntersections(const float* const rays, const size_t num_rays, const size_t num_intersections, @@ -641,72 +667,87 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { const int nthreads) override { CommitScene(); - queue_.memset(track_intersections, 0, sizeof(uint32_t) * num_rays).wait(); + queue_.memset(track_intersections, 0, sizeof(uint32_t) * num_rays) + .wait(); queue_.memset(ray_ids, 0, sizeof(uint32_t) * num_intersections).wait(); - queue_.memset(geometry_ids, 0, sizeof(uint32_t) * num_intersections).wait(); - queue_.memset(primitive_ids, 0, sizeof(uint32_t) * num_intersections).wait(); - queue_.memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2).wait(); + queue_.memset(geometry_ids, 0, sizeof(uint32_t) * num_intersections) + .wait(); + queue_.memset(primitive_ids, 0, sizeof(uint32_t) * num_intersections) + .wait(); + queue_.memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2) + .wait(); queue_.memset(t_hit, 0, sizeof(float) * num_intersections).wait(); - callbacks::GeomPrimID* previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); + callbacks::GeomPrimID* previous_geom_prim_ID_tfar = + sycl::malloc_device(num_rays, queue_); // Check if allocation was successful if (!previous_geom_prim_ID_tfar) { throw std::runtime_error("Failed to allocate device memory"); } - - auto host_previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + + auto host_previous_geom_prim_ID_tfar = + std::unique_ptr>( + new callbacks::GeomPrimID[num_rays]); for (size_t i = 0; i < num_rays; ++i) { - host_previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), - uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + host_previous_geom_prim_ID_tfar[i] = { + uint32_t(RTC_INVALID_GEOMETRY_ID), + uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; } // Copy the initialized data to the device - queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), previous_geom_prim_ID_tfar, num_rays * sizeof(callbacks::GeomPrimID)).wait(); + queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), + previous_geom_prim_ID_tfar, + num_rays * sizeof(callbacks::GeomPrimID)) + .wait(); // cumsum int* cumsum_ = sycl::malloc_device(num_rays, queue_); queue_.memcpy(cumsum, cumsum_, num_rays * sizeof(int)).wait(); - auto scene = this->scene_; - queue_.submit([=](sycl::handler& cgh) { - cgh.parallel_for(sycl::range<1>(num_rays),[=](sycl::item<1> item, sycl::kernel_handler kh) { - callbacks::ListIntersectionsContext context; - rtcInitRayQueryContext(&context.context); - context.previous_geom_prim_ID_tfar = previous_geom_prim_ID_tfar; - context.ray_ids = ray_ids; - context.geometry_ids = geometry_ids; - context.primitive_ids = primitive_ids; - context.primitive_uvs = primitive_uvs; - context.t_hit = t_hit; - context.cumsum = cumsum_; - context.track_intersections = track_intersections; - - RTCIntersectArguments args; - rtcInitIntersectArguments(&args); - //args.filter = callbacks::ListIntersectionsFunc; - args.context = &context.context; - - const size_t i = item.get_id(0); - - struct RTCRayHit rh; - const float* r = &rays[i * 6]; - rh.ray.org_x = r[0]; - rh.ray.org_y = r[1]; - rh.ray.org_z = r[2]; - rh.ray.dir_x = r[3]; - rh.ray.dir_y = r[4]; - rh.ray.dir_z = r[5]; - rh.ray.tnear = 0; - rh.ray.tfar = std::numeric_limits::infinity(); - rh.ray.mask = -1; - rh.ray.flags = 0; - rh.ray.id = i; - rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; - - rtcIntersect1(scene, &rh, &args); - }); + auto scene = this->scene_; + queue_.submit([=](sycl::handler& cgh) { + cgh.parallel_for( + sycl::range<1>(num_rays), + [=](sycl::item<1> item, sycl::kernel_handler kh) { + callbacks::ListIntersectionsContext context; + rtcInitRayQueryContext(&context.context); + context.previous_geom_prim_ID_tfar = + previous_geom_prim_ID_tfar; + context.ray_ids = ray_ids; + context.geometry_ids = geometry_ids; + context.primitive_ids = primitive_ids; + context.primitive_uvs = primitive_uvs; + context.t_hit = t_hit; + context.cumsum = cumsum_; + context.track_intersections = track_intersections; + + RTCIntersectArguments args; + rtcInitIntersectArguments(&args); + // args.filter = callbacks::ListIntersectionsFunc; + args.context = &context.context; + + const size_t i = item.get_id(0); + + struct RTCRayHit rh; + const float* r = &rays[i * 6]; + rh.ray.org_x = r[0]; + rh.ray.org_y = r[1]; + rh.ray.org_z = r[2]; + rh.ray.dir_x = r[3]; + rh.ray.dir_y = r[4]; + rh.ray.dir_z = r[5]; + rh.ray.tnear = 0; + rh.ray.tfar = std::numeric_limits::infinity(); + rh.ray.mask = -1; + rh.ray.flags = 0; + rh.ray.id = i; + rh.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rh.hit.instID[0] = RTC_INVALID_GEOMETRY_ID; + + rtcIntersect1(scene, &rh, &args); + }); }); queue_.wait_and_throw(); @@ -714,7 +755,7 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { sycl::free(previous_geom_prim_ID_tfar, queue_); sycl::free(cumsum_, queue_); } - + void ComputeClosestPoints(const float* const query_points, const size_t num_query_points, float* closest_points, @@ -880,12 +921,16 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { const int nthreads) override { CommitScene(); - memset(intersections, 0, sizeof(int) * num_rays); + std::memset(intersections, 0, sizeof(int) * num_rays); - auto previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + auto previous_geom_prim_ID_tfar = + std::unique_ptr>( + new callbacks::GeomPrimID[num_rays]); for (size_t i = 0; i < num_rays; ++i) { previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), - uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + uint32_t(RTC_INVALID_GEOMETRY_ID), + 0.f}; } callbacks::CountIntersectionsContext context; @@ -949,17 +994,21 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { const int nthreads) override { CommitScene(); - memset(track_intersections, 0, sizeof(uint32_t) * num_rays); - memset(ray_ids, 0, sizeof(uint32_t) * num_intersections); - memset(geometry_ids, 0, sizeof(uint32_t) * num_intersections); - memset(primitive_ids, 0, sizeof(uint32_t) * num_intersections); - memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2); - memset(t_hit, 0, sizeof(float) * num_intersections); - - auto previous_geom_prim_ID_tfar = std::unique_ptr>(new callbacks::GeomPrimID[num_rays]); + std::memset(track_intersections, 0, sizeof(uint32_t) * num_rays); + std::memset(ray_ids, 0, sizeof(uint32_t) * num_intersections); + std::memset(geometry_ids, 0, sizeof(uint32_t) * num_intersections); + std::memset(primitive_ids, 0, sizeof(uint32_t) * num_intersections); + std::memset(primitive_uvs, 0, sizeof(float) * num_intersections * 2); + std::memset(t_hit, 0, sizeof(float) * num_intersections); + + auto previous_geom_prim_ID_tfar = + std::unique_ptr>( + new callbacks::GeomPrimID[num_rays]); for (size_t i = 0; i < num_rays; ++i) { previous_geom_prim_ID_tfar[i] = {uint32_t(RTC_INVALID_GEOMETRY_ID), - uint32_t(RTC_INVALID_GEOMETRY_ID), 0.f}; + uint32_t(RTC_INVALID_GEOMETRY_ID), + 0.f}; } callbacks::ListIntersectionsContext context; @@ -1073,14 +1122,17 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { RaycastingScene::RaycastingScene(int64_t nthreads #ifdef BUILD_SYCL_MODULE - , const core::Device& device + , + const core::Device& device #endif - ) { +) { #ifdef BUILD_SYCL_MODULE if (device.IsSYCL()) { impl_ = std::make_unique(); - dynamic_cast(impl_.get())->InitializeDevice(); + dynamic_cast(impl_.get()) + ->InitializeDevice(); + impl_->tensor_device_ = device; } else { #endif impl_ = std::make_unique(); @@ -1095,7 +1147,6 @@ RaycastingScene::RaycastingScene(int64_t nthreads } #endif - impl_->tensor_device_ = device; rtcSetDeviceErrorFunction(impl_->device_, ErrorFunction, NULL); impl_->scene_ = rtcNewScene(impl_->device_); @@ -1142,14 +1193,18 @@ uint32_t RaycastingScene::AddTriangles(const core::Tensor& vertex_positions, 3 * sizeof(uint32_t), num_triangles); { -#ifdef BUILD_SYCL_MODULE auto data = vertex_positions.Contiguous(); +#ifdef BUILD_SYCL_MODULE if (impl_->tensor_device_.IsSYCL()) { - dynamic_cast(impl_.get())->queue_.memcpy(vertex_buffer, data.GetDataPtr(), sizeof(float) * 3 * num_vertices).wait(); + dynamic_cast(impl_.get()) + ->queue_ + .memcpy(vertex_buffer, data.GetDataPtr(), + sizeof(float) * 3 * num_vertices) + .wait(); } else { #endif - memcpy(vertex_buffer, data.GetDataPtr(), - sizeof(float) * 3 * num_vertices); + std::memcpy(vertex_buffer, data.GetDataPtr(), + sizeof(float) * 3 * num_vertices); #ifdef BUILD_SYCL_MODULE } #endif @@ -1158,11 +1213,15 @@ uint32_t RaycastingScene::AddTriangles(const core::Tensor& vertex_positions, auto data = triangle_indices.Contiguous(); #ifdef BUILD_SYCL_MODULE if (impl_->tensor_device_.IsSYCL()) { - dynamic_cast(impl_.get())->queue_.memcpy(index_buffer, data.GetDataPtr(), sizeof(uint32_t) * 3 * num_triangles).wait(); + dynamic_cast(impl_.get()) + ->queue_ + .memcpy(index_buffer, data.GetDataPtr(), + sizeof(uint32_t) * 3 * num_triangles) + .wait(); } else { #endif - memcpy(index_buffer, data.GetDataPtr(), - sizeof(uint32_t) * 3 * num_triangles); + std::memcpy(index_buffer, data.GetDataPtr(), + sizeof(uint32_t) * 3 * num_triangles); #ifdef BUILD_SYCL_MODULE } #endif @@ -1201,22 +1260,25 @@ std::unordered_map RaycastingScene::CastRays( std::unordered_map result; result["t_hit"] = core::Tensor(shape, core::Float32, rays.GetDevice()); - result["geometry_ids"] = core::Tensor(shape, core::UInt32, rays.GetDevice()); - result["primitive_ids"] = core::Tensor(shape, core::UInt32, rays.GetDevice()); + result["geometry_ids"] = + core::Tensor(shape, core::UInt32, rays.GetDevice()); + result["primitive_ids"] = + core::Tensor(shape, core::UInt32, rays.GetDevice()); shape.push_back(2); - result["primitive_uvs"] = core::Tensor(shape, core::Float32, rays.GetDevice()); + result["primitive_uvs"] = + core::Tensor(shape, core::Float32, rays.GetDevice()); shape.back() = 3; - result["primitive_normals"] = core::Tensor(shape, core::Float32, rays.GetDevice()); + result["primitive_normals"] = + core::Tensor(shape, core::Float32, rays.GetDevice()); auto data = rays.Contiguous(); impl_->CastRays(data.GetDataPtr(), num_rays, - result["t_hit"].GetDataPtr(), - result["geometry_ids"].GetDataPtr(), - result["primitive_ids"].GetDataPtr(), - result["primitive_uvs"].GetDataPtr(), - result["primitive_normals"].GetDataPtr(), - nthreads, - false); + result["t_hit"].GetDataPtr(), + result["geometry_ids"].GetDataPtr(), + result["primitive_ids"].GetDataPtr(), + result["primitive_uvs"].GetDataPtr(), + result["primitive_normals"].GetDataPtr(), nthreads, + false); return result; } @@ -1251,7 +1313,8 @@ core::Tensor RaycastingScene::CountIntersections(const core::Tensor& rays, // results. size_t num_rays = shape.NumElements(); - core::Tensor intersections(shape, core::Dtype::FromType(), impl_->tensor_device_); + core::Tensor intersections(shape, core::Dtype::FromType(), + impl_->tensor_device_); auto data = rays.Contiguous(); @@ -1272,8 +1335,10 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, size_t num_rays = shape.NumElements(); // determine total number of intersections - core::Tensor intersections(shape, core::Dtype::FromType(), impl_->tensor_device_); - core::Tensor track_intersections(shape, core::Dtype::FromType(), impl_->tensor_device_); + core::Tensor intersections(shape, core::Dtype::FromType(), + impl_->tensor_device_); + core::Tensor track_intersections(shape, core::Dtype::FromType(), + impl_->tensor_device_); auto data = rays.Contiguous(); impl_->CountIntersections(data.GetDataPtr(), num_rays, intersections.GetDataPtr(), nthreads); @@ -1286,7 +1351,8 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, } // prepare ray allocations (cumsum) - core::Tensor cumsum_tensor = core::Tensor::Zeros(shape, core::Dtype::FromType(), impl_->tensor_device_); + core::Tensor cumsum_tensor = core::Tensor::Zeros( + shape, core::Dtype::FromType(), impl_->tensor_device_); int* cumsum_ptr = cumsum_tensor.GetDataPtr(); cumsum_ptr[0] = 0; @@ -1300,17 +1366,21 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, shape.push_back(num_rays + 1); result["ray_splits"] = core::Tensor(shape, core::UInt32); uint32_t* ptr = result["ray_splits"].GetDataPtr(); - for (int i = 0; i < num_rays; ++i) { + for (size_t i = 0; i < num_rays; ++i) { ptr[i] = cumsum_ptr[i]; } ptr[num_rays] = num_intersections; shape[0] = num_intersections; - result["ray_ids"] = core::Tensor(shape, core::UInt32, impl_->tensor_device_); - result["geometry_ids"] = core::Tensor(shape, core::UInt32, impl_->tensor_device_); - result["primitive_ids"] = core::Tensor(shape, core::UInt32, impl_->tensor_device_); + result["ray_ids"] = + core::Tensor(shape, core::UInt32, impl_->tensor_device_); + result["geometry_ids"] = + core::Tensor(shape, core::UInt32, impl_->tensor_device_); + result["primitive_ids"] = + core::Tensor(shape, core::UInt32, impl_->tensor_device_); result["t_hit"] = core::Tensor(shape, core::Float32, impl_->tensor_device_); shape.push_back(2); - result["primitive_uvs"] = core::Tensor(shape, core::Float32, impl_->tensor_device_); + result["primitive_uvs"] = + core::Tensor(shape, core::Float32, impl_->tensor_device_); impl_->ListIntersections(data.GetDataPtr(), num_rays, num_intersections, cumsum_ptr, @@ -1320,7 +1390,7 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, result["primitive_ids"].GetDataPtr(), result["primitive_uvs"].GetDataPtr(), result["t_hit"].GetDataPtr(), nthreads); - + return result; } diff --git a/cpp/open3d/t/geometry/RaycastingScene.h b/cpp/open3d/t/geometry/RaycastingScene.h index 20237344279..3d636ba012c 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.h +++ b/cpp/open3d/t/geometry/RaycastingScene.h @@ -32,7 +32,8 @@ class RaycastingScene { /// \brief Default Constructor. RaycastingScene(int64_t nthreads = 0 #ifdef BUILD_SYCL_MODULE - , const core::Device& device = core::Device("CPU:0") + , + const core::Device &device = core::Device("CPU:0") #endif ); diff --git a/cpp/pybind/CMakeLists.txt b/cpp/pybind/CMakeLists.txt index c79bbd96719..6efae9a17fd 100644 --- a/cpp/pybind/CMakeLists.txt +++ b/cpp/pybind/CMakeLists.txt @@ -106,7 +106,7 @@ endif() # Include additional libraries that may be absent from the user system # eg: libc++.so, libc++abi.so (needed by filament) for Linux. # libc++.so is a linker script including libc++.so.1 and libc++abi.so, so append 1 to libc++.so -set(PYTHON_EXTRA_LIBRARIES $) +set(PYTHON_EXTRA_LIBRARIES $) if (BUILD_GUI AND CMAKE_SYSTEM_NAME STREQUAL "Linux") list(APPEND PYTHON_EXTRA_LIBRARIES ${CPP_LIBRARY}.1 ${CPPABI_LIBRARY}) endif() diff --git a/cpp/pybind/core/sycl_utils.cpp b/cpp/pybind/core/sycl_utils.cpp index ba90cb6fc51..7db39b7acbc 100644 --- a/cpp/pybind/core/sycl_utils.cpp +++ b/cpp/pybind/core/sycl_utils.cpp @@ -12,7 +12,7 @@ namespace open3d { namespace core { -void pybind_sycl_utils_definitions(py::module& m) { +void pybind_sycl_utils_definitions(py::module& m) { m.def("sycl_demo", &sy::SYCLDemo); py::module m_sycl = m.def_submodule("sycl"); diff --git a/cpp/pybind/t/geometry/raycasting_scene.cpp b/cpp/pybind/t/geometry/raycasting_scene.cpp index a9156b212e5..732abf6cdd2 100644 --- a/cpp/pybind/t/geometry/raycasting_scene.cpp +++ b/cpp/pybind/t/geometry/raycasting_scene.cpp @@ -59,7 +59,8 @@ void pybind_raycasting_scene_definitions(py::module& m) { static_cast>(m.attr("RaycastingScene")); // Constructors. #ifdef BUILD_SYCL_MODULE - raycasting_scene.def(py::init(), "nthreads"_a = 0, "device"_a = core::Device("CPU:0"), R"doc( + raycasting_scene.def(py::init(), "nthreads"_a = 0, + "device"_a = core::Device("CPU:0"), R"doc( Create a RaycastingScene. Args: diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index 6629b129fc1..4bf8516d204 100755 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -3,7 +3,7 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} # For bash-specific commands -SHELL ["/bin/bash", "-c"] +SHELL ["/bin/bash", "-c", "-o", "pipefail"] # Required build args, should be specified in docker_build.sh ARG DEVELOPER_BUILD @@ -89,7 +89,7 @@ RUN apt-get update && apt-get install -y \ liblzma-dev \ && rm -rf /var/lib/apt/lists/* -# pyenv or Intel Python +# pyenv # The pyenv python paths are used during docker run, in this way docker run # does not need to activate the environment again. # The soft link from the python patch level version to the python mino version @@ -97,15 +97,12 @@ RUN apt-get update && apt-get install -y \ # which patch level pyenv will install (latest). ENV PYENV_ROOT=/root/.pyenv ENV PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PYENV_ROOT/versions/$PYTHON_VERSION/bin:$PATH" -ENV PATH="/opt/intel/oneapi/intelpython/latest/bin:${PATH}" -RUN if [ "${BUILD_SYCL_MODULE}" = "OFF" ]; then \ - curl https://pyenv.run | bash \ +RUN curl https://pyenv.run | bash \ && pyenv update \ && pyenv install $PYTHON_VERSION \ && pyenv global $PYTHON_VERSION \ && pyenv rehash \ && ln -s $PYENV_ROOT/versions/${PYTHON_VERSION}* $PYENV_ROOT/versions/${PYTHON_VERSION}; \ - fi RUN python --version && pip --version SHELL ["/bin/bash", "-o", "pipefail", "-c"] diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index 4dceb41e7eb..13e3e8cce51 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -19,14 +19,18 @@ @pytest.mark.parametrize("device", list_devices()) def test_cast_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32, device=device) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) + dtype=o3d.core.float32, + device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], + dtype=o3d.core.uint32, + device=device) scene = o3d.t.geometry.RaycastingScene(device=device) geom_id = scene.add_triangles(vertices, triangles) rays = o3d.core.Tensor([[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, device=device) + dtype=o3d.core.float32, + device=device) ans = scene.cast_rays(rays) # first ray hits the triangle @@ -34,7 +38,8 @@ def test_cast_rays(device): assert np.isclose(ans['t_hit'][0].item(), 1.0) # second ray misses - assert o3d.t.geometry.RaycastingScene.INVALID_ID == ans['geometry_ids'][1].cpu() + assert o3d.t.geometry.RaycastingScene.INVALID_ID == ans['geometry_ids'][ + 1].cpu() assert np.isinf(ans['t_hit'][1].item()) @@ -43,8 +48,11 @@ def test_cast_rays(device): @pytest.mark.parametrize("device", list_devices()) def test_cast_lots_of_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32, device=device) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) + dtype=o3d.core.float32, + device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], + dtype=o3d.core.uint32, + device=device) scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertices, triangles) @@ -60,14 +68,18 @@ def test_cast_lots_of_rays(device): @pytest.mark.parametrize("device", list_devices()) def test_test_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32, device=device) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) + dtype=o3d.core.float32, + device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], + dtype=o3d.core.uint32, + device=device) scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertices, triangles) rays = o3d.core.Tensor([[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, device=device) + dtype=o3d.core.float32, + device=device) ans = scene.test_occlusions(rays).cpu() # first ray is occluded by the triangle @@ -90,8 +102,11 @@ def test_test_occlusions(device): @pytest.mark.parametrize("device", list_devices()) def test_test_lots_of_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], - dtype=o3d.core.float32, device=device) - triangles = o3d.core.Tensor([[0, 1, 2]], dtype=o3d.core.uint32, device=device) + dtype=o3d.core.float32, + device=device) + triangles = o3d.core.Tensor([[0, 1, 2]], + dtype=o3d.core.uint32, + device=device) scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertices, triangles) @@ -118,7 +133,8 @@ def test_add_triangle_mesh(device): rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, device=device) + dtype=o3d.core.float32, + device=device) ans = scene.count_intersections(rays) np.testing.assert_equal(ans.cpu().numpy(), [2, 1, 0]) @@ -139,7 +155,8 @@ def test_count_intersections(device): rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, device=device) + dtype=o3d.core.float32, + device=device) ans = scene.count_intersections(rays) np.testing.assert_equal(ans.cpu().numpy(), [2, 1, 0]) @@ -182,7 +199,8 @@ def test_list_intersections(device): rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, device=device) + dtype=o3d.core.float32, + device=device) print("PYTHON TEST 1", device) ans = scene.list_intersections(rays) print("PYTHON TEST 2") From d3e68ea557c56a96e69467ecfc9ddfcefd9f1195 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Sat, 12 Oct 2024 10:42:17 -0700 Subject: [PATCH 15/35] Add array utils functions --- cpp/open3d/t/geometry/RaycastingScene.cpp | 74 +++++++++++++++++++---- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 2691d9d6fe9..4286ee36a37 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -424,6 +424,12 @@ struct RaycastingScene::Impl { float* primitive_uvs, float* primitive_normals, const int nthreads) = 0; + + virtual void ArraySum(int* data_ptr, size_t num_elements, size_t &result) = 0; + + virtual void ArrayPartialSum(int* input, int* output, size_t num_elements) = 0; + + virtual void CopyArray(int* src, uint32_t* dst, size_t num_elements) = 0; }; #ifdef BUILD_SYCL_MODULE @@ -766,6 +772,39 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { const int nthreads) override { throw std::logic_error("Function not yet implemented"); } + + void ArraySum(int* data_ptr, size_t num_elements, size_t &result) override { + int result_data[1] = {0}; + sycl::buffer result_buf(result_data, sycl::range<1>(1)); + + queue_.submit([&](sycl::handler& cgh) { + auto result_acc = result_buf.get_access(cgh); + cgh.parallel_for( + sycl::range<1>(num_elements), + [=](sycl::item<1> item, sycl::kernel_handler kh) { + const size_t i = item.get_id(0); + result_acc[0] += data_ptr[i]; + }); + }); + queue_.wait_and_throw(); + + result = result_data[0]; + } + + void ArrayPartialSum(int* input, int* output, size_t num_elements) override { + queue_.submit([&](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<1>(num_elements - 1), [=](sycl::id<1> i) { + size_t idx = i[0] + 1; + output[idx] = output[idx - 1] + input[idx - 1]; + }); + }); + + queue_.wait_and_throw(); + } + + void CopyArray(int* src, uint32_t* dst, size_t num_elements) override { + queue_.memcpy(dst, src, num_elements * sizeof(uint32_t)).wait(); + } }; #endif @@ -1118,6 +1157,25 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { LoopFn); } } + + void ArraySum(int* data_ptr, size_t num_elements, size_t &result) override { + for (size_t i = 0; i < num_elements; ++i) { + result += data_ptr[i]; + } + } + + void ArrayPartialSum(int* input, int* output, size_t num_elements) override { + output[0] = 0; + for (size_t i = 1; i < num_elements; ++i) { + output[i] = output[i - 1] + input[i - 1]; + } + } + + void CopyArray(int* src, uint32_t* dst, size_t num_elements) override { + for (size_t i = 0; i < num_elements; ++i) { + dst[i] = src[i]; + } + } }; RaycastingScene::RaycastingScene(int64_t nthreads @@ -1346,29 +1404,23 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, // prepare shape with that number of elements int* data_ptr = intersections.GetDataPtr(); size_t num_intersections = 0; - for (size_t i = 0; i < num_rays; ++i) { - num_intersections += data_ptr[i]; - } + impl_->ArraySum(data_ptr, num_rays, num_intersections); // prepare ray allocations (cumsum) core::Tensor cumsum_tensor = core::Tensor::Zeros( shape, core::Dtype::FromType(), impl_->tensor_device_); int* cumsum_ptr = cumsum_tensor.GetDataPtr(); - - cumsum_ptr[0] = 0; - for (size_t i = 1; i < num_rays; ++i) { - cumsum_ptr[i] = cumsum_ptr[i - 1] + data_ptr[i - 1]; - } + impl_->ArrayPartialSum(data_ptr, cumsum_ptr, num_rays); // generate results structure std::unordered_map result; shape.clear(); shape.push_back(num_rays + 1); result["ray_splits"] = core::Tensor(shape, core::UInt32); + uint32_t* ptr = result["ray_splits"].GetDataPtr(); - for (size_t i = 0; i < num_rays; ++i) { - ptr[i] = cumsum_ptr[i]; - } + impl_->CopyArray(cumsum_ptr, ptr, num_rays); + ptr[num_rays] = num_intersections; shape[0] = num_intersections; result["ray_ids"] = From 09860d008b8843409038a9e0a18f35ce07ac8c1e Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Sun, 13 Oct 2024 11:24:37 -0700 Subject: [PATCH 16/35] Set the SYCL ArrayPartialSum as a sequential implementation --- cpp/open3d/t/geometry/RaycastingScene.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 4286ee36a37..ddd28f776a5 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -793,9 +793,10 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { void ArrayPartialSum(int* input, int* output, size_t num_elements) override { queue_.submit([&](sycl::handler& cgh) { - cgh.parallel_for(sycl::range<1>(num_elements - 1), [=](sycl::id<1> i) { - size_t idx = i[0] + 1; - output[idx] = output[idx - 1] + input[idx - 1]; + cgh.single_task([=]() { + for (size_t idx = 1; idx < num_elements; ++idx) { + output[idx] = output[idx - 1] + input[idx - 1]; + } }); }); From 487596f60477347afc366e639d1c6704dd4bfc1b Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Sun, 20 Oct 2024 08:45:11 -0700 Subject: [PATCH 17/35] Make the ListIntersections function work on SYCL --- cpp/open3d/t/geometry/RaycastingScene.cpp | 17 ++++++++--------- python/test/t/geometry/test_raycasting_scene.py | 2 -- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index ddd28f776a5..221d2bae610 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -731,7 +731,7 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { RTCIntersectArguments args; rtcInitIntersectArguments(&args); - // args.filter = callbacks::ListIntersectionsFunc; + args.filter = callbacks::ListIntersectionsFunc; args.context = &context.context; const size_t i = item.get_id(0); @@ -774,8 +774,7 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { } void ArraySum(int* data_ptr, size_t num_elements, size_t &result) override { - int result_data[1] = {0}; - sycl::buffer result_buf(result_data, sycl::range<1>(1)); + sycl::buffer result_buf(&result, sycl::range<1>(1)); queue_.submit([&](sycl::handler& cgh) { auto result_acc = result_buf.get_access(cgh); @@ -783,12 +782,11 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { sycl::range<1>(num_elements), [=](sycl::item<1> item, sycl::kernel_handler kh) { const size_t i = item.get_id(0); - result_acc[0] += data_ptr[i]; + sycl::atomic_ref atomic_result_data(result_acc[0]); + atomic_result_data.fetch_add(data_ptr[i]); }); }); queue_.wait_and_throw(); - - result = result_data[0]; } void ArrayPartialSum(int* input, int* output, size_t num_elements) override { @@ -1408,8 +1406,9 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, impl_->ArraySum(data_ptr, num_rays, num_intersections); // prepare ray allocations (cumsum) - core::Tensor cumsum_tensor = core::Tensor::Zeros( - shape, core::Dtype::FromType(), impl_->tensor_device_); + core::Tensor cumsum_tensor_cpu = core::Tensor::Zeros( + shape, core::Dtype::FromType()); + core::Tensor cumsum_tensor = cumsum_tensor_cpu.To(impl_->tensor_device_); int* cumsum_ptr = cumsum_tensor.GetDataPtr(); impl_->ArrayPartialSum(data_ptr, cumsum_ptr, num_rays); @@ -1442,7 +1441,7 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, result["geometry_ids"].GetDataPtr(), result["primitive_ids"].GetDataPtr(), result["primitive_uvs"].GetDataPtr(), - result["t_hit"].GetDataPtr(), nthreads); + result["t_hit"].GetDataPtr(), nthreads); return result; } diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index 13e3e8cce51..bb810398ffb 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -201,9 +201,7 @@ def test_list_intersections(device): [10, 10, 10, 1, 0, 0]], dtype=o3d.core.float32, device=device) - print("PYTHON TEST 1", device) ans = scene.list_intersections(rays) - print("PYTHON TEST 2") np.testing.assert_allclose(ans['t_hit'].cpu().numpy(), np.array([1.0, 2.0, 0.5]), From 975e7f0d1d05bbd259099c908535a15009367b96 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 25 Oct 2024 16:16:13 -0700 Subject: [PATCH 18/35] Fix ListInteractions SYCL kernel --- cpp/open3d/t/geometry/RaycastingScene.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 221d2bae610..c9f98f3746b 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -703,15 +703,11 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { } // Copy the initialized data to the device - queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), - previous_geom_prim_ID_tfar, + queue_.memcpy(previous_geom_prim_ID_tfar, + host_previous_geom_prim_ID_tfar.get(), num_rays * sizeof(callbacks::GeomPrimID)) .wait(); - // cumsum - int* cumsum_ = sycl::malloc_device(num_rays, queue_); - queue_.memcpy(cumsum, cumsum_, num_rays * sizeof(int)).wait(); - auto scene = this->scene_; queue_.submit([=](sycl::handler& cgh) { cgh.parallel_for( @@ -726,7 +722,7 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { context.primitive_ids = primitive_ids; context.primitive_uvs = primitive_uvs; context.t_hit = t_hit; - context.cumsum = cumsum_; + context.cumsum = cumsum; context.track_intersections = track_intersections; RTCIntersectArguments args; @@ -759,7 +755,6 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { // Free the allocated memory sycl::free(previous_geom_prim_ID_tfar, queue_); - sycl::free(cumsum_, queue_); } void ComputeClosestPoints(const float* const query_points, From c0c2aeade66276ccc3ae48a9924a072a31347d0c Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 25 Oct 2024 17:23:12 -0700 Subject: [PATCH 19/35] Add list intersections test --- python/test/t/geometry/test_raycasting_scene.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index bb810398ffb..294c5af33a5 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -211,15 +211,22 @@ def test_list_intersections(device): # list lots of random ray intersections to test the internal batching # we expect no errors for this test -def test_list_lots_of_intersections(): +@pytest.mark.parametrize("device", list_devices()) +def test_list_lots_of_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) + vertex_positions = cube.vertex.positions + vertex_positions = vertex_positions.to(device) + triangle_indices = cube.triangle.indices + triangle_indices = triangle_indices.to(o3d.core.Dtype.UInt32) + triangle_indices = triangle_indices.to(device) - scene = o3d.t.geometry.RaycastingScene() - scene.add_triangles(cube) + scene = o3d.t.geometry.RaycastingScene(device=device) + scene.add_triangles(vertex_positions, triangle_indices) rs = np.random.RandomState(123) rays = o3d.core.Tensor.from_numpy(rs.rand(123456, 6).astype(np.float32)) + rays = rays.to(device) _ = scene.list_intersections(rays) From 278295c0e515f9d13f1bf604958797b32200a7e2 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Sat, 2 Nov 2024 11:34:43 -0700 Subject: [PATCH 20/35] Prepare ComputeClosestPoints for SYCL implementation --- cpp/open3d/t/geometry/RaycastingScene.cpp | 88 ++++++++++++----------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index c9f98f3746b..8b1078bfa8f 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -213,15 +213,16 @@ void AssertTensorDtypeLastDimDeviceMinNDim(const open3d::core::Tensor& tensor, } // Adapted from common/math/closest_point.h -inline Vec3fa closestPointTriangle(Vec3fa const& p, - Vec3fa const& a, - Vec3fa const& b, - Vec3fa const& c, - float& tex_u, - float& tex_v) { - const Vec3fa ab = b - a; - const Vec3fa ac = c - a; - const Vec3fa ap = p - a; +template +inline Vec3faType closestPointTriangle(Vec3faType const& p, + Vec3faType const& a, + Vec3faType const& b, + Vec3faType const& c, + float& tex_u, + float& tex_v) { + const Vec3faType ab = b - a; + const Vec3faType ac = c - a; + const Vec3faType ap = p - a; const float d1 = ab.dot(ap); const float d2 = ac.dot(ap); @@ -231,7 +232,7 @@ inline Vec3fa closestPointTriangle(Vec3fa const& p, return a; } - const Vec3fa bp = p - b; + const Vec3faType bp = p - b; const float d3 = ab.dot(bp); const float d4 = ac.dot(bp); if (d3 >= 0.f && d4 <= d3) { @@ -240,7 +241,7 @@ inline Vec3fa closestPointTriangle(Vec3fa const& p, return b; } - const Vec3fa cp = p - c; + const Vec3faType cp = p - c; const float d5 = ab.dot(cp); const float d6 = ac.dot(cp); if (d6 >= 0.f && d5 <= d6) { @@ -281,56 +282,59 @@ inline Vec3fa closestPointTriangle(Vec3fa const& p, return a + v * ab + w * ac; } +struct GeometryPtr { + RTCGeometryType geom_type; + const void* ptr1; + const void* ptr2; +}; + +template struct ClosestPointResult { ClosestPointResult() : primID(RTC_INVALID_GEOMETRY_ID), - geomID(RTC_INVALID_GEOMETRY_ID), - geometry_ptrs_ptr() {} + geomID(RTC_INVALID_GEOMETRY_ID) {} - Vec3f p; + Vec3fType p; unsigned int primID; unsigned int geomID; - Vec2f uv; - Vec3f n; - std::vector>* - geometry_ptrs_ptr; + Vec2fType uv; + Vec3fType n; + GeometryPtr* geometry_ptrs_ptr; }; // Code adapted from the embree closest_point tutorial. +template bool ClosestPointFunc(RTCPointQueryFunctionArguments* args) { assert(args->userPtr); const unsigned int geomID = args->geomID; const unsigned int primID = args->primID; // query position in world space - Vec3fa q(args->query->x, args->query->y, args->query->z); + Vec3faType q(args->query->x, args->query->y, args->query->z); - ClosestPointResult* result = - static_cast(args->userPtr); - const RTCGeometryType geom_type = - std::get<0>(result->geometry_ptrs_ptr->operator[](geomID)); - const void* ptr1 = - std::get<1>(result->geometry_ptrs_ptr->operator[](geomID)); - const void* ptr2 = - std::get<2>(result->geometry_ptrs_ptr->operator[](geomID)); + ClosestPointResult* result = + static_cast*>(args->userPtr); + const RTCGeometryType geom_type = result->geometry_ptrs_ptr[geomID].geom_type; + const void* ptr1 = result->geometry_ptrs_ptr[geomID].ptr1; + const void* ptr2 = result->geometry_ptrs_ptr[geomID].ptr2; if (RTC_GEOMETRY_TYPE_TRIANGLE == geom_type) { const float* vertex_positions = (const float*)ptr1; const uint32_t* triangle_indices = (const uint32_t*)ptr2; - Vec3fa v0(vertex_positions[3 * triangle_indices[3 * primID + 0] + 0], + Vec3faType v0(vertex_positions[3 * triangle_indices[3 * primID + 0] + 0], vertex_positions[3 * triangle_indices[3 * primID + 0] + 1], vertex_positions[3 * triangle_indices[3 * primID + 0] + 2]); - Vec3fa v1(vertex_positions[3 * triangle_indices[3 * primID + 1] + 0], + Vec3faType v1(vertex_positions[3 * triangle_indices[3 * primID + 1] + 0], vertex_positions[3 * triangle_indices[3 * primID + 1] + 1], vertex_positions[3 * triangle_indices[3 * primID + 1] + 2]); - Vec3fa v2(vertex_positions[3 * triangle_indices[3 * primID + 2] + 0], + Vec3faType v2(vertex_positions[3 * triangle_indices[3 * primID + 2] + 0], vertex_positions[3 * triangle_indices[3 * primID + 2] + 1], vertex_positions[3 * triangle_indices[3 * primID + 2] + 2]); // Determine distance to closest point on triangle float u, v; - const Vec3fa p = closestPointTriangle(q, v0, v1, v2, u, v); + const Vec3faType p = closestPointTriangle(q, v0, v1, v2, u, v); float d = (q - p).norm(); // Store result in userPtr and update the query radius if we found a @@ -341,9 +345,9 @@ bool ClosestPointFunc(RTCPointQueryFunctionArguments* args) { result->p = p; result->primID = primID; result->geomID = geomID; - Vec3fa e1 = v1 - v0; - Vec3fa e2 = v2 - v0; - result->uv = Vec2f(u, v); + Vec3faType e1 = v1 - v0; + Vec3faType e2 = v2 - v0; + result->uv = Vec2fType(u, v); result->n = (e1.cross(e2)).normalized(); return true; // Return true to indicate that the query radius // changed. @@ -363,8 +367,7 @@ struct RaycastingScene::Impl { bool scene_committed_; // true if the scene has been committed. RTCDevice device_; // Vector for storing some information about the added geometry. - std::vector> - geometry_ptrs_; + std::vector geometry_ptrs_; core::Device tensor_device_; // cpu or sycl bool devprop_join_commit; @@ -1117,12 +1120,12 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { query.radius = std::numeric_limits::infinity(); query.time = 0.f; - ClosestPointResult result; - result.geometry_ptrs_ptr = &geometry_ptrs_; + ClosestPointResult result; + result.geometry_ptrs_ptr = geometry_ptrs_.data(); RTCPointQueryContext instStack; rtcInitPointQueryContext(&instStack); - rtcPointQuery(scene_, &query, &instStack, &ClosestPointFunc, + rtcPointQuery(scene_, &query, &instStack, &ClosestPointFunc, (void*)&result); closest_points[3 * i + 0] = result.p.x(); @@ -1284,9 +1287,10 @@ uint32_t RaycastingScene::AddTriangles(const core::Tensor& vertex_positions, uint32_t geom_id = rtcAttachGeometry(impl_->scene_, geom); rtcReleaseGeometry(geom); - impl_->geometry_ptrs_.push_back(std::make_tuple(RTC_GEOMETRY_TYPE_TRIANGLE, - (const void*)vertex_buffer, - (const void*)index_buffer)); + GeometryPtr geometry_ptr = {RTC_GEOMETRY_TYPE_TRIANGLE, + (const void*)vertex_buffer, + (const void*)index_buffer}; + impl_->geometry_ptrs_.push_back(geometry_ptr); return geom_id; } From aa437b62482021cde879505e7a3b95244392fd99 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Mon, 4 Nov 2024 16:56:37 -0800 Subject: [PATCH 21/35] Add SYCL enable parameter to list_devices --- python/test/open3d_test.py | 4 ++-- .../test/t/geometry/test_raycasting_scene.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/test/open3d_test.py b/python/test/open3d_test.py index 271cdadf3a1..013df23f36a 100755 --- a/python/test/open3d_test.py +++ b/python/test/open3d_test.py @@ -23,7 +23,7 @@ def torch_available(): return True -def list_devices(): +def list_devices(enable_sycl=False): """ If Open3D is built with CUDA support: - If cuda device is available, returns [Device("CPU:0"), Device("CUDA:0")]. @@ -39,7 +39,7 @@ def list_devices(): import open3d as o3d if o3d.core.cuda.device_count() > 0: return [o3d.core.Device("CPU:0"), o3d.core.Device("CUDA:0")] - elif o3d.core.sycl.is_available(): + elif enable_sycl and o3d.core.sycl.is_available(): return [o3d.core.Device("CPU:0"), o3d.core.Device("SYCL:0")] else: return [o3d.core.Device("CPU:0")] diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index 294c5af33a5..ec8ae54acea 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -16,7 +16,7 @@ # test intersection with a single triangle -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_cast_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -45,7 +45,7 @@ def test_cast_rays(device): # cast lots of random rays to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_cast_lots_of_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -65,7 +65,7 @@ def test_cast_lots_of_rays(device): # test occlusion with a single triangle -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_test_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -99,7 +99,7 @@ def test_test_occlusions(device): # test lots of random rays for occlusions to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_test_lots_of_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -118,7 +118,7 @@ def test_test_lots_of_occlusions(device): _ = scene.test_occlusions(rays) -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_add_triangle_mesh(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) @@ -140,7 +140,7 @@ def test_add_triangle_mesh(device): np.testing.assert_equal(ans.cpu().numpy(), [2, 1, 0]) -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_count_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) @@ -164,7 +164,7 @@ def test_count_intersections(device): # count lots of random ray intersections to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_count_lots_of_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) @@ -184,7 +184,7 @@ def test_count_lots_of_intersections(device): _ = scene.count_intersections(rays) -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_list_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) @@ -211,7 +211,7 @@ def test_list_intersections(device): # list lots of random ray intersections to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices()) +@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_list_lots_of_intersections(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) From 1eec474566e3fb13e0e084f292e76db85a75bcca Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Mon, 4 Nov 2024 16:57:44 -0800 Subject: [PATCH 22/35] Fix SYCL version of CountIntersections function --- cpp/open3d/t/geometry/RaycastingScene.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 8b1078bfa8f..d6993308658 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -615,8 +615,8 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { } // Copy the initialized data to the device - queue_.memcpy(host_previous_geom_prim_ID_tfar.get(), - previous_geom_prim_ID_tfar, + queue_.memcpy(previous_geom_prim_ID_tfar, + host_previous_geom_prim_ID_tfar.get(), num_rays * sizeof(callbacks::GeomPrimID)) .wait(); From 2c98133233f7a918609b81d9b5017255be695610 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 8 Nov 2024 16:20:01 -0800 Subject: [PATCH 23/35] Fix Dockerfile --- docker/Dockerfile.ci | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.ci b/docker/Dockerfile.ci index 4bf8516d204..ac98e50236f 100755 --- a/docker/Dockerfile.ci +++ b/docker/Dockerfile.ci @@ -3,7 +3,7 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} # For bash-specific commands -SHELL ["/bin/bash", "-c", "-o", "pipefail"] +SHELL ["/bin/bash", "-c"] # Required build args, should be specified in docker_build.sh ARG DEVELOPER_BUILD @@ -102,7 +102,7 @@ RUN curl https://pyenv.run | bash \ && pyenv install $PYTHON_VERSION \ && pyenv global $PYTHON_VERSION \ && pyenv rehash \ - && ln -s $PYENV_ROOT/versions/${PYTHON_VERSION}* $PYENV_ROOT/versions/${PYTHON_VERSION}; \ + && ln -s $PYENV_ROOT/versions/${PYTHON_VERSION}* $PYENV_ROOT/versions/${PYTHON_VERSION}; RUN python --version && pip --version SHELL ["/bin/bash", "-o", "pipefail", "-c"] From 7b3472068c6669ab3c3a989362641d60339d5ec5 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Fri, 8 Nov 2024 17:38:23 -0800 Subject: [PATCH 24/35] Fix code style --- cpp/open3d/t/geometry/RaycastingScene.cpp | 77 ++++++++++++++--------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index d6993308658..24103f79920 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -291,8 +291,7 @@ struct GeometryPtr { template struct ClosestPointResult { ClosestPointResult() - : primID(RTC_INVALID_GEOMETRY_ID), - geomID(RTC_INVALID_GEOMETRY_ID) {} + : primID(RTC_INVALID_GEOMETRY_ID), geomID(RTC_INVALID_GEOMETRY_ID) {} Vec3fType p; unsigned int primID; @@ -313,8 +312,10 @@ bool ClosestPointFunc(RTCPointQueryFunctionArguments* args) { Vec3faType q(args->query->x, args->query->y, args->query->z); ClosestPointResult* result = - static_cast*>(args->userPtr); - const RTCGeometryType geom_type = result->geometry_ptrs_ptr[geomID].geom_type; + static_cast*>( + args->userPtr); + const RTCGeometryType geom_type = + result->geometry_ptrs_ptr[geomID].geom_type; const void* ptr1 = result->geometry_ptrs_ptr[geomID].ptr1; const void* ptr2 = result->geometry_ptrs_ptr[geomID].ptr2; @@ -322,19 +323,23 @@ bool ClosestPointFunc(RTCPointQueryFunctionArguments* args) { const float* vertex_positions = (const float*)ptr1; const uint32_t* triangle_indices = (const uint32_t*)ptr2; - Vec3faType v0(vertex_positions[3 * triangle_indices[3 * primID + 0] + 0], - vertex_positions[3 * triangle_indices[3 * primID + 0] + 1], - vertex_positions[3 * triangle_indices[3 * primID + 0] + 2]); - Vec3faType v1(vertex_positions[3 * triangle_indices[3 * primID + 1] + 0], - vertex_positions[3 * triangle_indices[3 * primID + 1] + 1], - vertex_positions[3 * triangle_indices[3 * primID + 1] + 2]); - Vec3faType v2(vertex_positions[3 * triangle_indices[3 * primID + 2] + 0], - vertex_positions[3 * triangle_indices[3 * primID + 2] + 1], - vertex_positions[3 * triangle_indices[3 * primID + 2] + 2]); + Vec3faType v0( + vertex_positions[3 * triangle_indices[3 * primID + 0] + 0], + vertex_positions[3 * triangle_indices[3 * primID + 0] + 1], + vertex_positions[3 * triangle_indices[3 * primID + 0] + 2]); + Vec3faType v1( + vertex_positions[3 * triangle_indices[3 * primID + 1] + 0], + vertex_positions[3 * triangle_indices[3 * primID + 1] + 1], + vertex_positions[3 * triangle_indices[3 * primID + 1] + 2]); + Vec3faType v2( + vertex_positions[3 * triangle_indices[3 * primID + 2] + 0], + vertex_positions[3 * triangle_indices[3 * primID + 2] + 1], + vertex_positions[3 * triangle_indices[3 * primID + 2] + 2]); // Determine distance to closest point on triangle float u, v; - const Vec3faType p = closestPointTriangle(q, v0, v1, v2, u, v); + const Vec3faType p = + closestPointTriangle(q, v0, v1, v2, u, v); float d = (q - p).norm(); // Store result in userPtr and update the query radius if we found a @@ -427,10 +432,14 @@ struct RaycastingScene::Impl { float* primitive_uvs, float* primitive_normals, const int nthreads) = 0; - - virtual void ArraySum(int* data_ptr, size_t num_elements, size_t &result) = 0; - virtual void ArrayPartialSum(int* input, int* output, size_t num_elements) = 0; + virtual void ArraySum(int* data_ptr, + size_t num_elements, + size_t& result) = 0; + + virtual void ArrayPartialSum(int* input, + int* output, + size_t num_elements) = 0; virtual void CopyArray(int* src, uint32_t* dst, size_t num_elements) = 0; }; @@ -771,23 +780,28 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { throw std::logic_error("Function not yet implemented"); } - void ArraySum(int* data_ptr, size_t num_elements, size_t &result) override { + void ArraySum(int* data_ptr, size_t num_elements, size_t& result) override { sycl::buffer result_buf(&result, sycl::range<1>(1)); queue_.submit([&](sycl::handler& cgh) { - auto result_acc = result_buf.get_access(cgh); + auto result_acc = + result_buf.get_access(cgh); cgh.parallel_for( sycl::range<1>(num_elements), [=](sycl::item<1> item, sycl::kernel_handler kh) { - const size_t i = item.get_id(0); - sycl::atomic_ref atomic_result_data(result_acc[0]); - atomic_result_data.fetch_add(data_ptr[i]); - }); + const size_t i = item.get_id(0); + sycl::atomic_ref + atomic_result_data(result_acc[0]); + atomic_result_data.fetch_add(data_ptr[i]); + }); }); queue_.wait_and_throw(); } - void ArrayPartialSum(int* input, int* output, size_t num_elements) override { + void ArrayPartialSum(int* input, + int* output, + size_t num_elements) override { queue_.submit([&](sycl::handler& cgh) { cgh.single_task([=]() { for (size_t idx = 1; idx < num_elements; ++idx) { @@ -1125,7 +1139,8 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { RTCPointQueryContext instStack; rtcInitPointQueryContext(&instStack); - rtcPointQuery(scene_, &query, &instStack, &ClosestPointFunc, + rtcPointQuery(scene_, &query, &instStack, + &ClosestPointFunc, (void*)&result); closest_points[3 * i + 0] = result.p.x(); @@ -1155,13 +1170,15 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { } } - void ArraySum(int* data_ptr, size_t num_elements, size_t &result) override { + void ArraySum(int* data_ptr, size_t num_elements, size_t& result) override { for (size_t i = 0; i < num_elements; ++i) { result += data_ptr[i]; } } - void ArrayPartialSum(int* input, int* output, size_t num_elements) override { + void ArrayPartialSum(int* input, + int* output, + size_t num_elements) override { output[0] = 0; for (size_t i = 1; i < num_elements; ++i) { output[i] = output[i - 1] + input[i - 1]; @@ -1405,8 +1422,8 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, impl_->ArraySum(data_ptr, num_rays, num_intersections); // prepare ray allocations (cumsum) - core::Tensor cumsum_tensor_cpu = core::Tensor::Zeros( - shape, core::Dtype::FromType()); + core::Tensor cumsum_tensor_cpu = + core::Tensor::Zeros(shape, core::Dtype::FromType()); core::Tensor cumsum_tensor = cumsum_tensor_cpu.To(impl_->tensor_device_); int* cumsum_ptr = cumsum_tensor.GetDataPtr(); impl_->ArrayPartialSum(data_ptr, cumsum_ptr, num_rays); @@ -1440,7 +1457,7 @@ RaycastingScene::ListIntersections(const core::Tensor& rays, result["geometry_ids"].GetDataPtr(), result["primitive_ids"].GetDataPtr(), result["primitive_uvs"].GetDataPtr(), - result["t_hit"].GetDataPtr(), nthreads); + result["t_hit"].GetDataPtr(), nthreads); return result; } From 6284e3d25d5e620cfa0cb135d1de1cfcb71fccc2 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Sat, 9 Nov 2024 05:53:27 -0800 Subject: [PATCH 25/35] Simplify RayCastingScene constructor --- cpp/open3d/t/geometry/RaycastingScene.cpp | 8 +------- cpp/open3d/t/geometry/RaycastingScene.h | 8 ++------ cpp/pybind/t/geometry/raycasting_scene.cpp | 11 +---------- 3 files changed, 4 insertions(+), 23 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 24103f79920..f9ebbd6219a 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -1192,13 +1192,7 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { } }; -RaycastingScene::RaycastingScene(int64_t nthreads -#ifdef BUILD_SYCL_MODULE - , - const core::Device& device -#endif -) { - +RaycastingScene::RaycastingScene(int64_t nthreads, const core::Device& device) { #ifdef BUILD_SYCL_MODULE if (device.IsSYCL()) { impl_ = std::make_unique(); diff --git a/cpp/open3d/t/geometry/RaycastingScene.h b/cpp/open3d/t/geometry/RaycastingScene.h index 3d636ba012c..66d06b207dc 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.h +++ b/cpp/open3d/t/geometry/RaycastingScene.h @@ -30,12 +30,8 @@ namespace geometry { class RaycastingScene { public: /// \brief Default Constructor. - RaycastingScene(int64_t nthreads = 0 -#ifdef BUILD_SYCL_MODULE - , - const core::Device &device = core::Device("CPU:0") -#endif - ); + RaycastingScene(int64_t nthreads = 0, + const core::Device &device = core::Device("CPU:0")); ~RaycastingScene(); diff --git a/cpp/pybind/t/geometry/raycasting_scene.cpp b/cpp/pybind/t/geometry/raycasting_scene.cpp index 732abf6cdd2..1b71ba5bfaf 100644 --- a/cpp/pybind/t/geometry/raycasting_scene.cpp +++ b/cpp/pybind/t/geometry/raycasting_scene.cpp @@ -58,23 +58,14 @@ void pybind_raycasting_scene_definitions(py::module& m) { auto raycasting_scene = static_cast>(m.attr("RaycastingScene")); // Constructors. -#ifdef BUILD_SYCL_MODULE raycasting_scene.def(py::init(), "nthreads"_a = 0, "device"_a = core::Device("CPU:0"), R"doc( Create a RaycastingScene. Args: nthreads (int): The number of threads to use for building the scene. Set to 0 for automatic. - enable_sycl (bool): Enable SYCL for building the scene. Default is False. + device (open3d.core.Device): The device to use. )doc"); -#else - raycasting_scene.def(py::init(), "nthreads"_a = 0, R"doc( -Create a RaycastingScene. - -Args: - nthreads (int): The number of threads to use for building the scene. Set to 0 for automatic. -)doc"); -#endif raycasting_scene.def( "add_triangles", From 460e3f7e748c6a2fab5c0db330bb8643885eca5a Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Thu, 14 Nov 2024 14:57:07 -0800 Subject: [PATCH 26/35] Move enable JIT cache function to SYCL utils --- cpp/open3d/core/SYCLUtils.cpp | 10 ++++++++++ cpp/open3d/core/SYCLUtils.h | 3 +++ cpp/open3d/t/geometry/RaycastingScene.cpp | 14 -------------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cpp/open3d/core/SYCLUtils.cpp b/cpp/open3d/core/SYCLUtils.cpp index ae8ccf63b7d..5f7acaac964 100644 --- a/cpp/open3d/core/SYCLUtils.cpp +++ b/cpp/open3d/core/SYCLUtils.cpp @@ -225,6 +225,16 @@ std::vector GetAvailableSYCLDevices() { #endif } +void enablePersistentJITCache() { +#if defined(_WIN32) + _putenv_s("SYCL_CACHE_PERSISTENT", "1"); + _putenv_s("SYCL_CACHE_DIR", "cache"); +#else + setenv("SYCL_CACHE_PERSISTENT", "1", 1); + setenv("SYCL_CACHE_DIR", "cache", 1); +#endif +} + } // namespace sy } // namespace core } // namespace open3d diff --git a/cpp/open3d/core/SYCLUtils.h b/cpp/open3d/core/SYCLUtils.h index a1cb74426fb..a149bb7dcb5 100644 --- a/cpp/open3d/core/SYCLUtils.h +++ b/cpp/open3d/core/SYCLUtils.h @@ -40,6 +40,9 @@ bool IsDeviceAvailable(const Device& device); /// Return a list of available SYCL devices. std::vector GetAvailableSYCLDevices(); +/// Enables the JIT cache for SYCL. +void enablePersistentJITCache(); + } // namespace sy } // namespace core } // namespace open3d diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index f9ebbd6219a..9bd07f2bea7 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -170,18 +170,6 @@ typedef Eigen::AlignedVector3 Vec3fa; typedef Eigen::Matrix Vec2f; typedef Eigen::Vector3f Vec3f; -#ifdef BUILD_SYCL_MODULE -void enablePersistentJITCache() { -#if defined(_WIN32) - _putenv_s("SYCL_CACHE_PERSISTENT", "1"); - _putenv_s("SYCL_CACHE_DIR", "cache"); -#else - setenv("SYCL_CACHE_PERSISTENT", "1", 1); - setenv("SYCL_CACHE_DIR", "cache", 1); -#endif -} -#endif - // Error function called by embree. void ErrorFunction(void* userPtr, enum RTCError error, const char* str) { open3d::utility::LogError("Embree error: {} {}", rtcGetErrorString(error), @@ -452,8 +440,6 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { sycl::device sycl_device_; void InitializeDevice() { - enablePersistentJITCache(); - try { sycl_device_ = sycl::device(rtcSYCLDeviceSelector); } catch (std::exception& e) { From 8da0afce79c9a116a0645917941fa1d558492698 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Thu, 14 Nov 2024 15:09:16 -0800 Subject: [PATCH 27/35] Do not change the SYCL cache dir --- cpp/open3d/core/SYCLUtils.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/open3d/core/SYCLUtils.cpp b/cpp/open3d/core/SYCLUtils.cpp index 5f7acaac964..a279f755568 100644 --- a/cpp/open3d/core/SYCLUtils.cpp +++ b/cpp/open3d/core/SYCLUtils.cpp @@ -228,10 +228,8 @@ std::vector GetAvailableSYCLDevices() { void enablePersistentJITCache() { #if defined(_WIN32) _putenv_s("SYCL_CACHE_PERSISTENT", "1"); - _putenv_s("SYCL_CACHE_DIR", "cache"); #else setenv("SYCL_CACHE_PERSISTENT", "1", 1); - setenv("SYCL_CACHE_DIR", "cache", 1); #endif } From 8dbb6faa86c79702e39fc4588c410367f612d174 Mon Sep 17 00:00:00 2001 From: Benjamin Ummenhofer Date: Sun, 17 Nov 2024 08:57:55 -0800 Subject: [PATCH 28/35] test RaycastingScene.add_triangles with TriangleMesh --- python/test/t/geometry/test_raycasting_scene.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index ec8ae54acea..ea26aaf3494 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -122,14 +122,10 @@ def test_test_lots_of_occlusions(device): def test_add_triangle_mesh(device): cube = o3d.t.geometry.TriangleMesh.from_legacy( o3d.geometry.TriangleMesh.create_box()) - vertex_positions = cube.vertex.positions - vertex_positions = vertex_positions.to(device) - triangle_indices = cube.triangle.indices - triangle_indices = triangle_indices.to(o3d.core.Dtype.UInt32) - triangle_indices = triangle_indices.to(device) + cube = cube.to(device) scene = o3d.t.geometry.RaycastingScene(device=device) - scene.add_triangles(vertex_positions, triangle_indices) + scene.add_triangles(cube) rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], [10, 10, 10, 1, 0, 0]], From 23b8c7de5b95be40dafe416f3ab926ee3cd42b00 Mon Sep 17 00:00:00 2001 From: Benjamin Ummenhofer Date: Mon, 18 Nov 2024 01:36:13 -0800 Subject: [PATCH 29/35] create objects for testing with tensor TriangleMesh --- .../test/t/geometry/test_raycasting_scene.py | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index ea26aaf3494..90384b147c8 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -120,8 +120,7 @@ def test_test_lots_of_occlusions(device): @pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_add_triangle_mesh(device): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() cube = cube.to(device) scene = o3d.t.geometry.RaycastingScene(device=device) @@ -138,8 +137,7 @@ def test_add_triangle_mesh(device): @pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_count_intersections(device): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions vertex_positions = vertex_positions.to(device) triangle_indices = cube.triangle.indices @@ -162,8 +160,7 @@ def test_count_intersections(device): # we expect no errors for this test @pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_count_lots_of_intersections(device): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions vertex_positions = vertex_positions.to(device) triangle_indices = cube.triangle.indices @@ -182,8 +179,7 @@ def test_count_lots_of_intersections(device): @pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_list_intersections(device): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions vertex_positions = vertex_positions.to(device) triangle_indices = cube.triangle.indices @@ -209,8 +205,7 @@ def test_list_intersections(device): # we expect no errors for this test @pytest.mark.parametrize("device", list_devices(enable_sycl=True)) def test_list_lots_of_intersections(device): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions vertex_positions = vertex_positions.to(device) triangle_indices = cube.triangle.indices @@ -264,8 +259,7 @@ def test_compute_lots_of_closest_points(): def test_compute_distance(): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() scene = o3d.t.geometry.RaycastingScene() scene.add_triangles(cube) @@ -278,8 +272,7 @@ def test_compute_distance(): def test_compute_signed_distance(): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() scene = o3d.t.geometry.RaycastingScene() scene.add_triangles(cube) @@ -292,8 +285,7 @@ def test_compute_signed_distance(): def test_compute_occupancy(): - cube = o3d.t.geometry.TriangleMesh.from_legacy( - o3d.geometry.TriangleMesh.create_box()) + cube = o3d.t.geometry.TriangleMesh.create_box() scene = o3d.t.geometry.RaycastingScene() scene.add_triangles(cube) @@ -378,8 +370,7 @@ def test_sphere_wrong_occupancy(): # This test checks a specific scenario where the old implementation # without ray jitter produced wrong results for a sphere because some # rays miss hitting exactly a vertex or an edge. - mesh = o3d.geometry.TriangleMesh.create_sphere(0.8) - mesh = o3d.t.geometry.TriangleMesh.from_legacy(mesh) + mesh = o3d.t.geometry.TriangleMesh.create_sphere(0.8) scene = o3d.t.geometry.RaycastingScene() scene.add_triangles(mesh) From 4b81b70cfa649d3517a1cf7728fe857abeaed4d0 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Mon, 18 Nov 2024 16:21:33 -0800 Subject: [PATCH 30/35] Fix possible memory leak in SYCL implementation of RayCastingScene --- cpp/open3d/t/geometry/RaycastingScene.cpp | 38 ++++++++++++++++------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 9bd07f2bea7..f2e12a9a55f 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -439,6 +439,18 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { sycl::context context_; sycl::device sycl_device_; + callbacks::GeomPrimID* li_previous_geom_prim_ID_tfar = nullptr; + callbacks::GeomPrimID* ci_previous_geom_prim_ID_tfar = nullptr; + + ~SYCLImpl() { + if (li_previous_geom_prim_ID_tfar) { + sycl::free(li_previous_geom_prim_ID_tfar, queue_); + } + if (ci_previous_geom_prim_ID_tfar) { + sycl::free(ci_previous_geom_prim_ID_tfar, queue_); + } + } + void InitializeDevice() { try { sycl_device_ = sycl::device(rtcSYCLDeviceSelector); @@ -591,11 +603,10 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { queue_.memset(intersections, 0, sizeof(int) * num_rays).wait(); - callbacks::GeomPrimID* previous_geom_prim_ID_tfar = - sycl::malloc_device(num_rays, queue_); + ci_previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); // Check if allocation was successful - if (!previous_geom_prim_ID_tfar) { + if (!ci_previous_geom_prim_ID_tfar) { throw std::runtime_error("Failed to allocate device memory"); } @@ -610,12 +621,13 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { } // Copy the initialized data to the device - queue_.memcpy(previous_geom_prim_ID_tfar, + queue_.memcpy(ci_previous_geom_prim_ID_tfar, host_previous_geom_prim_ID_tfar.get(), num_rays * sizeof(callbacks::GeomPrimID)) .wait(); auto scene = this->scene_; + auto ci_previous_geom_prim_ID_tfar_ = ci_previous_geom_prim_ID_tfar; queue_.submit([=](sycl::handler& cgh) { cgh.parallel_for( sycl::range<1>(num_rays), @@ -623,7 +635,7 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { callbacks::CountIntersectionsContext context; rtcInitRayQueryContext(&context.context); context.previous_geom_prim_ID_tfar = - previous_geom_prim_ID_tfar; + ci_previous_geom_prim_ID_tfar_; context.intersections = intersections; RTCIntersectArguments args; @@ -655,7 +667,8 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { queue_.wait_and_throw(); // Free the allocated memory - sycl::free(previous_geom_prim_ID_tfar, queue_); + sycl::free(ci_previous_geom_prim_ID_tfar, queue_); + ci_previous_geom_prim_ID_tfar = nullptr; } void ListIntersections(const float* const rays, @@ -682,11 +695,10 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { .wait(); queue_.memset(t_hit, 0, sizeof(float) * num_intersections).wait(); - callbacks::GeomPrimID* previous_geom_prim_ID_tfar = - sycl::malloc_device(num_rays, queue_); + li_previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); // Check if allocation was successful - if (!previous_geom_prim_ID_tfar) { + if (!li_previous_geom_prim_ID_tfar) { throw std::runtime_error("Failed to allocate device memory"); } @@ -701,12 +713,13 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { } // Copy the initialized data to the device - queue_.memcpy(previous_geom_prim_ID_tfar, + queue_.memcpy(li_previous_geom_prim_ID_tfar, host_previous_geom_prim_ID_tfar.get(), num_rays * sizeof(callbacks::GeomPrimID)) .wait(); auto scene = this->scene_; + auto li_previous_geom_prim_ID_tfar_ = li_previous_geom_prim_ID_tfar; queue_.submit([=](sycl::handler& cgh) { cgh.parallel_for( sycl::range<1>(num_rays), @@ -714,7 +727,7 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { callbacks::ListIntersectionsContext context; rtcInitRayQueryContext(&context.context); context.previous_geom_prim_ID_tfar = - previous_geom_prim_ID_tfar; + li_previous_geom_prim_ID_tfar_; context.ray_ids = ray_ids; context.geometry_ids = geometry_ids; context.primitive_ids = primitive_ids; @@ -752,7 +765,8 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { queue_.wait_and_throw(); // Free the allocated memory - sycl::free(previous_geom_prim_ID_tfar, queue_); + sycl::free(li_previous_geom_prim_ID_tfar, queue_); + li_previous_geom_prim_ID_tfar = nullptr; } void ComputeClosestPoints(const float* const query_points, From 5b6a6dee1f65dffd63c55073db1d796e1d9eaa83 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Mon, 18 Nov 2024 16:25:08 -0800 Subject: [PATCH 31/35] Fix code style --- cpp/open3d/t/geometry/RaycastingScene.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index f2e12a9a55f..3d8af41d485 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -603,7 +603,8 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { queue_.memset(intersections, 0, sizeof(int) * num_rays).wait(); - ci_previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); + ci_previous_geom_prim_ID_tfar = + sycl::malloc_device(num_rays, queue_); // Check if allocation was successful if (!ci_previous_geom_prim_ID_tfar) { @@ -695,7 +696,8 @@ struct RaycastingScene::SYCLImpl : public RaycastingScene::Impl { .wait(); queue_.memset(t_hit, 0, sizeof(float) * num_intersections).wait(); - li_previous_geom_prim_ID_tfar = sycl::malloc_device(num_rays, queue_); + li_previous_geom_prim_ID_tfar = + sycl::malloc_device(num_rays, queue_); // Check if allocation was successful if (!li_previous_geom_prim_ID_tfar) { From b2689cbe1d0b26c0cccef23bc355d3429bbd4692 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Mon, 18 Nov 2024 17:08:00 -0800 Subject: [PATCH 32/35] Use STL functions in CPU array operations for RayCastingScene --- cpp/open3d/t/geometry/RaycastingScene.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index 3d8af41d485..d3f1701133b 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -1173,24 +1173,18 @@ struct RaycastingScene::CPUImpl : public RaycastingScene::Impl { } void ArraySum(int* data_ptr, size_t num_elements, size_t& result) override { - for (size_t i = 0; i < num_elements; ++i) { - result += data_ptr[i]; - } + result = std::accumulate(data_ptr, data_ptr + num_elements, result); } void ArrayPartialSum(int* input, int* output, size_t num_elements) override { output[0] = 0; - for (size_t i = 1; i < num_elements; ++i) { - output[i] = output[i - 1] + input[i - 1]; - } + std::partial_sum(input, input + num_elements - 1, output + 1); } void CopyArray(int* src, uint32_t* dst, size_t num_elements) override { - for (size_t i = 0; i < num_elements; ++i) { - dst[i] = src[i]; - } + std::copy(src, src + num_elements, dst); } }; From 239ccf8876c35e101f0e284f178c72e9e9daff42 Mon Sep 17 00:00:00 2001 From: "Murillo Rojas, Luis" Date: Mon, 18 Nov 2024 17:11:51 -0800 Subject: [PATCH 33/35] Move comment --- cpp/open3d/t/geometry/RaycastingScene.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/open3d/t/geometry/RaycastingScene.cpp b/cpp/open3d/t/geometry/RaycastingScene.cpp index d3f1701133b..444f4f7a5ff 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.cpp +++ b/cpp/open3d/t/geometry/RaycastingScene.cpp @@ -11,10 +11,10 @@ #endif #include "open3d/t/geometry/RaycastingScene.h" -// This header is in the embree src dir (embree/src/ext_embree/..). #ifdef BUILD_SYCL_MODULE #include #endif +// This header is in the embree src dir (embree/src/ext_embree/..). #include #include From 9ef538ac8344f3ff8e9817e79f3586a9a3d40a2a Mon Sep 17 00:00:00 2001 From: Sameer Sheorey Date: Mon, 18 Nov 2024 22:41:18 -0800 Subject: [PATCH 34/35] Do not include CUDA devices in raycast tests. pybind some SYCL util functions. --- cpp/open3d/core/SYCLUtils.cpp | 5 + cpp/open3d/core/SYCLUtils.h | 3 +- cpp/open3d/t/geometry/RaycastingScene.h | 3 +- cpp/pybind/core/sycl_utils.cpp | 8 + python/test/open3d_test.py | 37 ++--- .../test/t/geometry/test_raycasting_scene.py | 139 ++++++++++-------- 6 files changed, 108 insertions(+), 87 deletions(-) diff --git a/cpp/open3d/core/SYCLUtils.cpp b/cpp/open3d/core/SYCLUtils.cpp index a279f755568..c60552937c2 100644 --- a/cpp/open3d/core/SYCLUtils.cpp +++ b/cpp/open3d/core/SYCLUtils.cpp @@ -226,11 +226,16 @@ std::vector GetAvailableSYCLDevices() { } void enablePersistentJITCache() { +#ifdef BUILD_SYCL_MODULE #if defined(_WIN32) _putenv_s("SYCL_CACHE_PERSISTENT", "1"); #else setenv("SYCL_CACHE_PERSISTENT", "1", 1); #endif +#endif + utility::LogInfo( + "enablePersistentJITCache is not compiled with " + "BUILD_SYCL_MODULE=ON."); } } // namespace sy diff --git a/cpp/open3d/core/SYCLUtils.h b/cpp/open3d/core/SYCLUtils.h index a149bb7dcb5..8a38ad662d0 100644 --- a/cpp/open3d/core/SYCLUtils.h +++ b/cpp/open3d/core/SYCLUtils.h @@ -40,7 +40,8 @@ bool IsDeviceAvailable(const Device& device); /// Return a list of available SYCL devices. std::vector GetAvailableSYCLDevices(); -/// Enables the JIT cache for SYCL. +/// Enables the JIT cache for SYCL. This sets an environment variable and will +/// affect the entire process and any child processes. void enablePersistentJITCache(); } // namespace sy diff --git a/cpp/open3d/t/geometry/RaycastingScene.h b/cpp/open3d/t/geometry/RaycastingScene.h index 66d06b207dc..3a533aba96e 100644 --- a/cpp/open3d/t/geometry/RaycastingScene.h +++ b/cpp/open3d/t/geometry/RaycastingScene.h @@ -38,7 +38,8 @@ class RaycastingScene { /// \brief Add a triangle mesh to the scene. /// \param vertex_positions Vertices as Tensor of dim {N,3} and dtype float. /// \param triangle_indices Triangles as Tensor of dim {M,3} and dtype - /// uint32_t. \return The geometry ID of the added mesh. + /// uint32_t. + /// \return The geometry ID of the added mesh. uint32_t AddTriangles(const core::Tensor &vertex_positions, const core::Tensor &triangle_indices); diff --git a/cpp/pybind/core/sycl_utils.cpp b/cpp/pybind/core/sycl_utils.cpp index 7db39b7acbc..e50d82b8f53 100644 --- a/cpp/pybind/core/sycl_utils.cpp +++ b/cpp/pybind/core/sycl_utils.cpp @@ -19,6 +19,14 @@ void pybind_sycl_utils_definitions(py::module& m) { m_sycl.def("is_available", sy::IsAvailable, "Returns true if Open3D is compiled with SYCL support and at " "least one compatible SYCL device is detected."); + + m_sycl.def("get_available_devices", sy::GetAvailableSYCLDevices, + "Return a list of available SYCL devices."); + + m_sycl.def("enable_persistent_jit_cache", sy::enablePersistentJITCache, + "Enables the JIT cache for SYCL. This sets an environment " + "variable and " + "will affect the entire process and any child processes."); } } // namespace core diff --git a/python/test/open3d_test.py b/python/test/open3d_test.py index 013df23f36a..e6997f7a418 100755 --- a/python/test/open3d_test.py +++ b/python/test/open3d_test.py @@ -5,14 +5,6 @@ # SPDX-License-Identifier: MIT # ---------------------------------------------------------------------------- -import os -import sys -import urllib.request -import zipfile - -import numpy as np -import pytest - def torch_available(): try: @@ -23,26 +15,21 @@ def torch_available(): return True -def list_devices(enable_sycl=False): +def list_devices(enable_cuda=True, enable_sycl=False): """ - If Open3D is built with CUDA support: - - If cuda device is available, returns [Device("CPU:0"), Device("CUDA:0")]. - - If cuda device is not available, returns [Device("CPU:0")]. - - If Open3D is built with SYCL support: - - If SYCL device is available, returns [Device("CPU:0"), Device("SYCL:0")]. - - If SYCL device is not available, returns [Device("CPU:0")]. - - If Open3D is built without CUDA support: - - returns [Device("CPU:0")]. + Returns a list of devices that are available for Open3D to use: + - Device("CPU:0") + - Device("CUDA:0") if built with CUDA support and a CUDA device is available. + - Device("SYCL:0") if built with SYCL support and a SYCL device is available. """ import open3d as o3d - if o3d.core.cuda.device_count() > 0: - return [o3d.core.Device("CPU:0"), o3d.core.Device("CUDA:0")] - elif enable_sycl and o3d.core.sycl.is_available(): - return [o3d.core.Device("CPU:0"), o3d.core.Device("SYCL:0")] - else: - return [o3d.core.Device("CPU:0")] + + devices = [o3d.core.Device("CPU:0")] + if enable_cuda and o3d.core.cuda.device_count() > 0: + devices.append(o3d.core.Device("CUDA:0")) + if enable_sycl and o3d.core.sycl.is_available(): + return devices.append(o3d.core.Device("SYCL:0")) + return devices def list_devices_with_torch(): diff --git a/python/test/t/geometry/test_raycasting_scene.py b/python/test/t/geometry/test_raycasting_scene.py index 90384b147c8..3fcb9ea2d28 100755 --- a/python/test/t/geometry/test_raycasting_scene.py +++ b/python/test/t/geometry/test_raycasting_scene.py @@ -11,12 +11,14 @@ import sys import os + sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../..") from open3d_test import list_devices # test intersection with a single triangle -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_cast_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -28,24 +30,27 @@ def test_cast_rays(device): scene = o3d.t.geometry.RaycastingScene(device=device) geom_id = scene.add_triangles(vertices, triangles) - rays = o3d.core.Tensor([[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, - device=device) + rays = o3d.core.Tensor( + [[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], + dtype=o3d.core.float32, + device=device, + ) ans = scene.cast_rays(rays) # first ray hits the triangle - assert geom_id == ans['geometry_ids'][0].cpu() - assert np.isclose(ans['t_hit'][0].item(), 1.0) + assert geom_id == ans["geometry_ids"][0].cpu() + assert np.isclose(ans["t_hit"][0].item(), 1.0) # second ray misses - assert o3d.t.geometry.RaycastingScene.INVALID_ID == ans['geometry_ids'][ + assert o3d.t.geometry.RaycastingScene.INVALID_ID == ans["geometry_ids"][ 1].cpu() - assert np.isinf(ans['t_hit'][1].item()) + assert np.isinf(ans["t_hit"][1].item()) # cast lots of random rays to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_cast_lots_of_rays(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -65,7 +70,8 @@ def test_cast_lots_of_rays(device): # test occlusion with a single triangle -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_test_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -77,9 +83,11 @@ def test_test_occlusions(device): scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertices, triangles) - rays = o3d.core.Tensor([[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, - device=device) + rays = o3d.core.Tensor( + [[0.2, 0.1, 1, 0, 0, -1], [10, 10, 10, 1, 0, 0]], + dtype=o3d.core.float32, + device=device, + ) ans = scene.test_occlusions(rays).cpu() # first ray is occluded by the triangle @@ -99,7 +107,8 @@ def test_test_occlusions(device): # test lots of random rays for occlusions to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_test_lots_of_occlusions(device): vertices = o3d.core.Tensor([[0, 0, 0], [1, 0, 0], [1, 1, 0]], dtype=o3d.core.float32, @@ -118,7 +127,8 @@ def test_test_lots_of_occlusions(device): _ = scene.test_occlusions(rays) -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_add_triangle_mesh(device): cube = o3d.t.geometry.TriangleMesh.create_box() cube = cube.to(device) @@ -126,16 +136,19 @@ def test_add_triangle_mesh(device): scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(cube) - rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], - [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, - device=device) + rays = o3d.core.Tensor( + [[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], + [10, 10, 10, 1, 0, 0]], + dtype=o3d.core.float32, + device=device, + ) ans = scene.count_intersections(rays) np.testing.assert_equal(ans.cpu().numpy(), [2, 1, 0]) -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_count_intersections(device): cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions @@ -147,10 +160,12 @@ def test_count_intersections(device): scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertex_positions, triangle_indices) - rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], - [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, - device=device) + rays = o3d.core.Tensor( + [[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], + [10, 10, 10, 1, 0, 0]], + dtype=o3d.core.float32, + device=device, + ) ans = scene.count_intersections(rays) np.testing.assert_equal(ans.cpu().numpy(), [2, 1, 0]) @@ -158,7 +173,8 @@ def test_count_intersections(device): # count lots of random ray intersections to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_count_lots_of_intersections(device): cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions @@ -177,7 +193,8 @@ def test_count_lots_of_intersections(device): _ = scene.count_intersections(rays) -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_list_intersections(device): cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions @@ -189,13 +206,15 @@ def test_list_intersections(device): scene = o3d.t.geometry.RaycastingScene(device=device) scene.add_triangles(vertex_positions, triangle_indices) - rays = o3d.core.Tensor([[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], - [10, 10, 10, 1, 0, 0]], - dtype=o3d.core.float32, - device=device) + rays = o3d.core.Tensor( + [[0.5, 0.5, -1, 0, 0, 1], [0.5, 0.5, 0.5, 0, 0, 1], + [10, 10, 10, 1, 0, 0]], + dtype=o3d.core.float32, + device=device, + ) ans = scene.list_intersections(rays) - np.testing.assert_allclose(ans['t_hit'].cpu().numpy(), + np.testing.assert_allclose(ans["t_hit"].cpu().numpy(), np.array([1.0, 2.0, 0.5]), rtol=1e-6, atol=1e-6) @@ -203,7 +222,8 @@ def test_list_intersections(device): # list lots of random ray intersections to test the internal batching # we expect no errors for this test -@pytest.mark.parametrize("device", list_devices(enable_sycl=True)) +@pytest.mark.parametrize("device", + list_devices(enable_cuda=False, enable_sycl=True)) def test_list_lots_of_intersections(device): cube = o3d.t.geometry.TriangleMesh.create_box() vertex_positions = cube.vertex.positions @@ -234,12 +254,14 @@ def test_compute_closest_points(): dtype=o3d.core.float32) ans = scene.compute_closest_points(query_points) - assert (geom_id == ans['geometry_ids']).all() - assert (0 == ans['primitive_ids']).all() - np.testing.assert_allclose(ans['points'].numpy(), - np.array([[0.2, 0.1, 0.0], [1, 1, 0]]), - rtol=1e-6, - atol=1e-6) + assert (geom_id == ans["geometry_ids"]).all() + assert (0 == ans["primitive_ids"]).all() + np.testing.assert_allclose( + ans["points"].numpy(), + np.array([[0.2, 0.1, 0.0], [1, 1, 0]]), + rtol=1e-6, + atol=1e-6, + ) # compute lots of closest points to test the internal batching @@ -325,45 +347,42 @@ def test_output_shapes(shape): # some outputs append a specific last dim last_dim = { - 't_hit': [], - 'geometry_ids': [], - 'primitive_ids': [], - 'primitive_uvs': [2], - 'primitive_normals': [3], - 'points': [3], - 'ray_ids': [], - 'ray_splits': [] + "t_hit": [], + "geometry_ids": [], + "primitive_ids": [], + "primitive_uvs": [2], + "primitive_normals": [3], + "points": [3], + "ray_ids": [], + "ray_splits": [], } ans = scene.cast_rays(rays) for k, v in ans.items(): expected_shape = shape + last_dim[k] - assert list( - v.shape - ) == expected_shape, 'shape mismatch: expected {} but got {} for {}'.format( - expected_shape, list(v.shape), k) + assert (list(v.shape) == expected_shape + ), "shape mismatch: expected {} but got {} for {}".format( + expected_shape, list(v.shape), k) ans = scene.compute_closest_points(query_points) for k, v in ans.items(): expected_shape = shape + last_dim[k] - assert list( - v.shape - ) == expected_shape, 'shape mismatch: expected {} but got {} for {}'.format( - expected_shape, list(v.shape), k) + assert (list(v.shape) == expected_shape + ), "shape mismatch: expected {} but got {} for {}".format( + expected_shape, list(v.shape), k) ans = scene.list_intersections(rays) nx = np.sum(scene.count_intersections(rays).numpy()).tolist() for k, v in ans.items(): - if k == 'ray_splits': + if k == "ray_splits": alt_shape = [np.prod(rays.shape[:-1]) + 1] else: alt_shape = [nx] - #use np.append otherwise issues if alt_shape = [0] and last_dim[k] = [] + # use np.append otherwise issues if alt_shape = [0] and last_dim[k] = [] expected_shape = np.append(alt_shape, last_dim[k]).tolist() - assert list( - v.shape - ) == expected_shape, 'shape mismatch: expected {} but got {} for {}'.format( - expected_shape, list(v.shape), k) + assert (list(v.shape) == expected_shape + ), "shape mismatch: expected {} but got {} for {}".format( + expected_shape, list(v.shape), k) def test_sphere_wrong_occupancy(): From e3baed6a5f55f5a7a5c92d1b79960570996c314c Mon Sep 17 00:00:00 2001 From: Sameer Sheorey Date: Tue, 19 Nov 2024 16:28:09 -0800 Subject: [PATCH 35/35] Convert deprecated SYCL 1.2 code to SYCL 2020 --- cpp/open3d/core/Device.h | 2 +- cpp/open3d/core/SYCLContext.cpp | 4 +- cpp/open3d/core/SYCLUtils.cpp | 43 +++++++++------------- cpp/pybind/t/geometry/raycasting_scene.cpp | 4 +- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/cpp/open3d/core/Device.h b/cpp/open3d/core/Device.h index 5b04875ed20..13d6715f025 100644 --- a/cpp/open3d/core/Device.h +++ b/cpp/open3d/core/Device.h @@ -21,7 +21,7 @@ class Device { enum class DeviceType { CPU = 0, CUDA = 1, - SYCL = 2, // SYCL gpu_selector(). + SYCL = 2, // SYCL gpu_selector_v. }; /// Default constructor -> "CPU:0". diff --git a/cpp/open3d/core/SYCLContext.cpp b/cpp/open3d/core/SYCLContext.cpp index 65e3f9c4bc4..6a3007d7dba 100644 --- a/cpp/open3d/core/SYCLContext.cpp +++ b/cpp/open3d/core/SYCLContext.cpp @@ -46,7 +46,7 @@ SYCLContext::SYCLContext() { // SYCL GPU. // TODO: Currently we only support one GPU device. try { - const sycl::device &sycl_device = sycl::device(sycl::gpu_selector()); + const sycl::device &sycl_device = sycl::device(sycl::gpu_selector_v); const Device open3d_device = Device("SYCL:0"); devices_.push_back(open3d_device); device_to_sycl_device_[open3d_device] = sycl_device; @@ -60,7 +60,7 @@ SYCLContext::SYCLContext() { // your CPU does not have integrated GPU. try { const sycl::device &sycl_device = - sycl::device(sycl::host_selector()); + sycl::device(sycl::cpu_selector_v); const Device open3d_device = Device("SYCL:0"); utility::LogWarning( "SYCL GPU device is not available, falling back to SYCL " diff --git a/cpp/open3d/core/SYCLUtils.cpp b/cpp/open3d/core/SYCLUtils.cpp index c60552937c2..5860021996d 100644 --- a/cpp/open3d/core/SYCLUtils.cpp +++ b/cpp/open3d/core/SYCLUtils.cpp @@ -35,7 +35,7 @@ int SYCLDemo() { #ifdef BUILD_SYCL_MODULE // Ref: https://intel.github.io/llvm-docs/GetStartedGuide.html // Creating buffer of 4 ints to be used inside the kernel code. - sycl::buffer buffer(4); + sycl::buffer buffer(4); // Creating SYCL queue. sycl::queue q; @@ -51,13 +51,13 @@ int SYCLDemo() { cgh.parallel_for( num_workloads, [=](sycl::id<1> WIid) { // Fill buffer with indexes. - accessor[WIid] = (sycl::cl_int)WIid.get(0); + accessor[WIid] = (int)WIid.get(0); }); }); - // Getting read only access to the buffer on the host. + // Getting access to the buffer on the host. // Implicit barrier waiting for q to complete the work. - const auto host_accessor = buffer.get_access(); + const auto host_accessor = buffer.get_host_access(); // Check the results. bool mismatch_found = false; @@ -140,45 +140,38 @@ void PrintSYCLDevices(bool print_all) { utility::LogInfo("# Default SYCL selectors"); try { - const sycl::device &device = sycl::device(sycl::default_selector()); - utility::LogInfo("- sycl::default_selector() : {}", + const sycl::device &device = sycl::device(sycl::default_selector_v); + utility::LogInfo("- sycl::default_selector_v : {}", SYCLDeviceToString(device)); } catch (const sycl::exception &e) { - utility::LogInfo("- sycl::default_selector() : N/A"); + utility::LogInfo("- sycl::default_selector_v : N/A"); } try { - const sycl::device &device = sycl::device(sycl::host_selector()); - utility::LogInfo("- sycl::host_selector() : {}", + const sycl::device &device = sycl::device(sycl::cpu_selector_v); + utility::LogInfo("- sycl::cpu_selector_v : {}", SYCLDeviceToString(device)); } catch (const sycl::exception &e) { - utility::LogInfo("- sycl::host_selector() : N/A"); + utility::LogInfo("- sycl::cpu_selector_v : N/A"); } try { - const sycl::device &device = sycl::device(sycl::cpu_selector()); - utility::LogInfo("- sycl::cpu_selector() : {}", + const sycl::device &device = sycl::device(sycl::gpu_selector_v); + utility::LogInfo("- sycl::gpu_selector_v : {}", SYCLDeviceToString(device)); } catch (const sycl::exception &e) { - utility::LogInfo("- sycl::cpu_selector() : N/A"); - } - try { - const sycl::device &device = sycl::device(sycl::gpu_selector()); - utility::LogInfo("- sycl::gpu_selector() : {}", - SYCLDeviceToString(device)); - } catch (const sycl::exception &e) { - utility::LogInfo("- sycl::gpu_selector() : N/A"); + utility::LogInfo("- sycl::gpu_selector_v : N/A"); } try { const sycl::device &device = - sycl::device(sycl::accelerator_selector()); - utility::LogInfo("- sycl::accelerator_selector(): {}", + sycl::device(sycl::accelerator_selector_v); + utility::LogInfo("- sycl::accelerator_selector_v: {}", SYCLDeviceToString(device)); } catch (const sycl::exception &e) { - utility::LogInfo("- sycl::accelerator_selector(): N/A"); + utility::LogInfo("- sycl::accelerator_selector_v: N/A"); } utility::LogInfo("# Open3D SYCL device"); try { - const sycl::device &device = sycl::device(sycl::gpu_selector()); + const sycl::device &device = sycl::device(sycl::gpu_selector_v); utility::LogInfo("- Device(\"SYCL:0\"): {}", SYCLDeviceToString(device)); } catch (const sycl::exception &e) { @@ -187,7 +180,7 @@ void PrintSYCLDevices(bool print_all) { } else { utility::LogInfo("# Open3D SYCL device"); try { - const sycl::device &device = sycl::device(sycl::gpu_selector()); + const sycl::device &device = sycl::device(sycl::gpu_selector_v); utility::LogInfo("- Device(\"SYCL:0\"): {}", SYCLDeviceToString(device)); } catch (const sycl::exception &e) { diff --git a/cpp/pybind/t/geometry/raycasting_scene.cpp b/cpp/pybind/t/geometry/raycasting_scene.cpp index 1b71ba5bfaf..b611dfcfbb0 100644 --- a/cpp/pybind/t/geometry/raycasting_scene.cpp +++ b/cpp/pybind/t/geometry/raycasting_scene.cpp @@ -22,7 +22,7 @@ or compute the closest point on the surface of a mesh with respect to one or more query points. It builds an internal acceleration structure to speed up those queries. -This class supports only the CPU device. +This class supports the CPU device and SYCL GPU device. The following shows how to create a scene and compute ray intersections:: @@ -64,7 +64,7 @@ Create a RaycastingScene. Args: nthreads (int): The number of threads to use for building the scene. Set to 0 for automatic. - device (open3d.core.Device): The device to use. + device (open3d.core.Device): The device to use. Currently CPU and SYCL devices are supported. )doc"); raycasting_scene.def(