From 7f6f1314c4fe544b086e103eecff5c703683939a Mon Sep 17 00:00:00 2001 From: Corentin ALLAIRE <62873125+Corentin-Allaire@users.noreply.github.com> Date: Mon, 15 Jan 2024 15:46:15 +0100 Subject: [PATCH] feat!: Implement a DBScan clustering algorithm directly in ACTS (#2863) With this branch we removes all dependencies on MLPack for the clustering used in the ML seed filtering algorithm and replaces them with a custom implementation of the DBScan density-based clustering algorithm. This new algorithm is far more powerful than the previous MLPack implementation and supports the full C++. With it, we observe a speed-up of the seed filtering algorithm by a factor of 2 and remove external dependency. All tracking performances should stay unchanged even if some minor difference might appear at the border of 2 nearby clusters (since the entries are not processed in the same order compared to the old mlpack implementation) --- CMakeLists.txt | 5 - Core/include/Acts/Utilities/DBScan.hpp | 233 ++++++++++++++++++ .../Algorithms/TrackFindingML/CMakeLists.txt | 14 +- .../AmbiguityDBScanClustering.hpp | 38 ++- .../SeedFilterDBScanClustering.hpp | 48 ++++ .../AmbiguityResolutionMLDBScanAlgorithm.cpp | 2 +- .../src/SeedFilterMLAlgorithm.cpp | 4 +- Examples/Python/CMakeLists.txt | 8 - .../python/acts/examples/onnx/mlpack.py | 9 - .../python/acts/examples/reconstruction.py | 2 +- Examples/Python/src/ModuleEntry.cpp | 2 - Examples/Python/src/Onnx.cpp | 13 + Examples/Python/src/OnnxMlpack.cpp | 38 --- Examples/Python/src/OnnxMlpackStub.cpp | 19 -- Plugins/CMakeLists.txt | 1 - Plugins/Mlpack/CMakeLists.txt | 29 --- .../Mlpack/SeedFilterDBScanClustering.hpp | 62 ----- Tests/UnitTests/Core/Utilities/CMakeLists.txt | 1 + .../UnitTests/Core/Utilities/DBScanTests.cpp | 108 ++++++++ cmake/Findmlpack.cmake | 16 -- docs/getting_started.md | 1 - 21 files changed, 425 insertions(+), 228 deletions(-) create mode 100644 Core/include/Acts/Utilities/DBScan.hpp rename {Plugins/Mlpack/include/Acts/Plugins/Mlpack => Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML}/AmbiguityDBScanClustering.hpp (66%) create mode 100644 Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/SeedFilterDBScanClustering.hpp delete mode 100644 Examples/Python/python/acts/examples/onnx/mlpack.py delete mode 100644 Examples/Python/src/OnnxMlpack.cpp delete mode 100644 Examples/Python/src/OnnxMlpackStub.cpp delete mode 100644 Plugins/Mlpack/CMakeLists.txt delete mode 100644 Plugins/Mlpack/include/Acts/Plugins/Mlpack/SeedFilterDBScanClustering.hpp create mode 100644 Tests/UnitTests/Core/Utilities/DBScanTests.cpp delete mode 100644 cmake/Findmlpack.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 352c1303432..2db571156e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,7 +47,6 @@ option(ACTS_BUILD_PLUGIN_JSON "Build json plugin" OFF) option(ACTS_USE_SYSTEM_NLOHMANN_JSON "Use nlohmann::json provided by the system instead of the bundled version" ${ACTS_USE_SYSTEM_LIBS}) option(ACTS_BUILD_PLUGIN_LEGACY "Build legacy plugin" OFF) option(ACTS_BUILD_PLUGIN_ONNX "Build ONNX plugin" OFF) -option(ACTS_BUILD_PLUGIN_MLPACK "Build MLpack plugin" OFF) option(ACTS_SETUP_VECMEM "Explicitly set up vecmem for the project" OFF) option(ACTS_USE_SYSTEM_VECMEM "Use a system-provided vecmem installation" ${ACTS_USE_SYSTEM_LIBS}) option(ACTS_BUILD_PLUGIN_SYCL "Build SYCL plugin" OFF) @@ -197,7 +196,6 @@ set(_acts_eigen3_version 3.3.7) set(_acts_hepmc3_version 3.2.1) set(_acts_nlohmanjson_version 3.2.0) set(_acts_onnxruntime_version 1.12.0) -set(_acts_mlpack_version 3.1.1) set(_acts_root_version 6.20) set(_acts_tbb_version 2020.1) @@ -309,9 +307,6 @@ if(ACTS_BUILD_PLUGIN_JSON) add_subdirectory(thirdparty/nlohmann_json) endif() endif() -if(ACTS_BUILD_PLUGIN_MLPACK) - find_package(mlpack ${_acts_mlpack_version} REQUIRED) -endif() if(ACTS_BUILD_PLUGIN_SYCL) find_package(SYCL REQUIRED) endif() diff --git a/Core/include/Acts/Utilities/DBScan.hpp b/Core/include/Acts/Utilities/DBScan.hpp new file mode 100644 index 00000000000..f978dde06a2 --- /dev/null +++ b/Core/include/Acts/Utilities/DBScan.hpp @@ -0,0 +1,233 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2024 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#pragma once + +#include "Acts/Utilities/KDTree.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Acts { +/// @brief A general implementation of an N dimensional DBScan clustering algorithm. +/// +/// This is a general implementation of an N dimensional DBScan clustering +/// algorithm. The DBScan algorithm uses density information to cluster together +/// points that are close to each other. +/// +/// For each point, we will look for the neighbours that are within the epsilon +/// radius. If the number of neighbours is greater than the minimum number of +/// points, we will start a new cluster and assign the current point to it. We +/// will then look for the neighbours of the neighbours and assign them to the +/// current cluster if they are not already assigned to a cluster. If the +/// neighbours have itself more than the minimum number of points as neighbours, +/// we will repeat the process on those neighbours. +/// +/// To speed up the search for the neighbours, we use the KDTree implemented in +/// ACTS. It performs a range search in the orthogonal hypercube with a length +/// of 2 epsilon. An extra cut is used to only keep the neighbours that are +/// within the epsilon radius. +/// +/// @tparam kDims The number of dimensions. +/// @tparam scalar_t The scalar type used to construct position vectors. +/// @tparam kLeafSize The maximum number of points in a leaf node of the KDTree. +template +class DBScan { + public: + // The type of coordinates for points. + using Point = std::array; + + // The type of a vector of coordinate. + using VectorPoints = std::vector; + + // The type to pair the points with an ID. + using Pair = std::pair; + + // The type of a vector of coordinate-ID pairs. + using VectorPairs = std::vector; + + // KDTree used before the DBScan algorithm to find the neighbours. + using Tree = KDTree; + + // Remove the default constructor. + DBScan() = delete; + + /// @brief Construct the DBScan algorithm with a given epsilon and minPoints. + /// + /// @param epsilon The epsilon radius used to find the neighbours. + /// @param minPoints The minimum number of points to form a cluster. + /// @param onePointCluster If true, all the noise points are considered as + /// individual one point clusters. + DBScan(scalar_t epsilon = 1.0, std::size_t minPoints = 1, + bool onePointCluster = false) + : m_eps(epsilon), + m_minPoints(minPoints), + m_onePointCluster(onePointCluster) {} + + /// @brief Cluster the input points. + /// + /// This function implements the main loop of the DBScan algorithm. + /// It loops over all the point and will try to start new cluster + /// if it finds points that have yet to be clustered. + /// + /// @param inputPoints The input points to cluster. + /// @param clusteredPoints Vector containing the cluster ID of each point. + /// @return The number of clusters (excluding noise if onePointCluster==False). + /// + int cluster(const VectorPoints& inputPoints, + std::vector& clusteredPoints) { + // Transform the initial vector of input point to a vector of pairs + // with the index of the point in the initial vector. + VectorPairs inputPointsWithIndex; + for (std::size_t id = 0; id < inputPoints.size(); id++) { + inputPointsWithIndex.push_back(std::make_pair(inputPoints[id], id)); + } + // Build the KDTree with the input points. + Tree tree = Tree(std::move(inputPointsWithIndex)); + + // Initialize the cluster ID to 0. + int clusterID = 0; + // By default all the points are considered as noise. + clusteredPoints = std::vector(inputPoints.size(), -1); + + // Loop over all the points + for (std::size_t id = 0; id < inputPoints.size(); id++) { + // If the point is already assigned to a cluster, skip it. + if (clusteredPoints[id] != -1) { + continue; + } + // If not we try to build a new cluster + std::vector pointToProcess{id}; + expandCluster(tree, inputPoints, pointToProcess, clusteredPoints, + clusterID); + // If the cluster has been created, increment the cluster ID. + if (clusteredPoints[id] != -1) { + clusterID++; + } + } + if (m_onePointCluster) { + // If noise is present and onePointCluster is true, all the noise points + // are considered as individual one point clusters. Loop over all the + // points in the KDTree. + for (auto& cluster : clusteredPoints) { + // If the point is assigned to noise, assign it to a new cluster. + if (cluster == -1) { + cluster = clusterID; + clusterID++; + } + } + } + return clusterID; + } + + private: + /// @brief Extend the cluster. + /// + /// This function will extend the cluster by finding all the neighbours of the + /// current point and assign them to the current cluster. + /// The KDTree is used to find the neighbours and an extra cut is used to only + /// keep the neighbours that are within the epsilon radius. + /// + /// @param tree The KDTree containing all the points. + /// @param inputPoints The vector containing the input points. + /// @param pointsToProcess The vector containing the ids of the points that need to be + /// processed. + /// @param clusteredPoints Vector containing the cluster ID of each point. + /// @param clusterID The ID of the current cluster. + /// + void expandCluster(const Tree& tree, const VectorPoints& inputPoints, + const std::vector& pointsToProcess, + std::vector& clusteredPoints, const int clusterID) { + // Loop over all the points that need to be process. + for (const auto id : pointsToProcess) { + // Lets look for the neighbours of the current point. + const Point currentPoint = inputPoints[id]; + std::vector neighbours; + // We create the range in which we will look for the neighbours (an + // hypercube with a length of 2 epsilon). + typename Tree::range_t range; + for (std::size_t dim = 0; dim < kDims; dim++) { + range[dim] = std::make_pair(currentPoint[dim] - m_eps, + currentPoint[dim] + m_eps); + } + // We use the KDTree to find the neighbours. + // An extra cut needs to be applied to only keep the neighbours that + // are within the epsilon radius. + tree.rangeSearchMapDiscard( + range, [this, &neighbours, currentPoint]( + const typename Tree::coordinate_t& pos, + const typename Tree::value_t& val) { + scalar_t distance = 0; + for (std::size_t dim = 0; dim < kDims; dim++) { + distance += (pos[dim] - currentPoint[dim]) * + (pos[dim] - currentPoint[dim]); + } + if (distance <= m_eps * m_eps) { + neighbours.push_back(val); + } + }); + std::size_t nNeighbours = neighbours.size(); + // If a cluster has already been started we add the neighbours to it + if (clusteredPoints[id] != -1) { + updateNeighbours(neighbours, clusteredPoints, clusterID); + } + if (nNeighbours >= m_minPoints) { + // If the cluster has not been started yet and we have enough + // neighbours, we start the cluster and assign the current point and its + // neighbours to it. + if (clusteredPoints[id] == -1) { + clusteredPoints[id] = clusterID; + updateNeighbours(neighbours, clusteredPoints, clusterID); + } + // Try to extend the cluster with the neighbours. + expandCluster(tree, inputPoints, neighbours, clusteredPoints, + clusterID); + } + } + } + + /// @brief Update the neighbours. + /// + /// This function will remove the neighbours that are already assigned to a + /// cluster and assign the remaining ones to the current cluster. + /// + /// @param neighbours The vector containing the ids of the neighbours. + /// @param clusteredPoints Vector containing the cluster ID of each point. + /// @param clusterID The ID of the current cluster. + /// + void updateNeighbours(std::vector& neighbours, + std::vector& clusteredPoints, + const int clusterID) { + neighbours.erase(std::remove_if(neighbours.begin(), neighbours.end(), + [&clusteredPoints](int i) { + return clusteredPoints[i] != -1; + }), + neighbours.end()); + + for (const auto& neighbour : neighbours) { + clusteredPoints[neighbour] = clusterID; + } + } + + // The epsilon radius used to find the neighbours. + scalar_t m_eps; + // The minimum number of points to form a cluster. + std::size_t m_minPoints = 1; + // If true, all the noise points are considered as individual one point + // clusters. + bool m_onePointCluster = false; +}; + +} // namespace Acts diff --git a/Examples/Algorithms/TrackFindingML/CMakeLists.txt b/Examples/Algorithms/TrackFindingML/CMakeLists.txt index e517d0060e8..845a9d1ece6 100644 --- a/Examples/Algorithms/TrackFindingML/CMakeLists.txt +++ b/Examples/Algorithms/TrackFindingML/CMakeLists.txt @@ -1,15 +1,10 @@ set(SOURCES src/AmbiguityResolutionML.cpp src/AmbiguityResolutionMLAlgorithm.cpp + src/AmbiguityResolutionMLDBScanAlgorithm.cpp + src/SeedFilterMLAlgorithm.cpp ) -if(ACTS_BUILD_PLUGIN_MLPACK) - list(APPEND SOURCES - src/AmbiguityResolutionMLDBScanAlgorithm.cpp - src/SeedFilterMLAlgorithm.cpp - ) -endif() - add_library( ActsExamplesTrackFindingML SHARED ${SOURCES} @@ -28,11 +23,6 @@ target_link_libraries( ActsExamplesFramework ) -if(ACTS_BUILD_PLUGIN_MLPACK) - target_link_libraries( - ActsExamplesTrackFindingML PUBLIC ActsPluginMlpack) -endif() - install( TARGETS ActsExamplesTrackFindingML LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/Plugins/Mlpack/include/Acts/Plugins/Mlpack/AmbiguityDBScanClustering.hpp b/Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/AmbiguityDBScanClustering.hpp similarity index 66% rename from Plugins/Mlpack/include/Acts/Plugins/Mlpack/AmbiguityDBScanClustering.hpp rename to Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/AmbiguityDBScanClustering.hpp index d726a6db888..16a5801f971 100644 --- a/Plugins/Mlpack/include/Acts/Plugins/Mlpack/AmbiguityDBScanClustering.hpp +++ b/Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/AmbiguityDBScanClustering.hpp @@ -1,6 +1,6 @@ // This file is part of the Acts project. // -// Copyright (C) 2023 CERN for the benefit of the Acts project +// Copyright (C) 2023-2024 CERN for the benefit of the Acts project // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -10,21 +10,20 @@ #include "Acts/EventData/TrackContainer.hpp" #include "Acts/TrackFinding/detail/AmbiguityTrackClustering.hpp" +#include "Acts/Utilities/DBScan.hpp" #include #include #include -#include "mlpack/methods/dbscan.hpp" - namespace Acts { /// Clusterise tracks based on shared hits /// -/// @param trackMap : Multimap storing pair of track ID and vector of measurement ID. The keys are the number of measurement and are just there to facilitate the ordering. -/// @param tracks : Track container with all the track to be clustered -/// @param epsilon : Maximum distance between 2 tracks to be clustered -/// @param minPoints : Minimum number of tracks to create a cluster +/// @param trackMap Multimap storing pair of track ID and vector of measurement ID. The keys are the number of measurement and are just there to facilitate the ordering. +/// @param tracks Track container with all the track to be clustered +/// @param epsilon Maximum distance between 2 tracks to be clustered +/// @param minPoints Minimum number of tracks to create a cluster /// @return an unordered map representing the clusters, the keys the ID of the primary track of each cluster and the store a vector of track IDs. template class holder_t> @@ -40,34 +39,29 @@ std::unordered_map> dbscanTrackClustering( // different clusters. std::unordered_map hitToTrack; - // DBSCAN algorithm from MLpack used in the track clustering - mlpack::DBSCAN dbscan(epsilon, minPoints); + // Initialize a DBScan of dimension 4 (phi, eta, z, Pt) + using DBSCAN = Acts::DBScan<4, double, 4>; + DBSCAN dbscan(epsilon, minPoints, true); - arma::mat data(2, trackMap.size()); + std::vector> data; std::size_t trackID = 0; - arma::Row assignments; + std::vector clusterAssignments; // Get the input feature of the network for all the tracks for (const auto& [key, val] : trackMap) { auto traj = tracks.getTrack(val.first); - data(0, trackID) = Acts::VectorHelpers::eta(traj.momentum()); - data(1, trackID) = Acts::VectorHelpers::phi(traj.momentum()); - trackID++; + data.push_back({Acts::VectorHelpers::eta(traj.momentum()), + Acts::VectorHelpers::phi(traj.momentum())}); } - std::size_t clusterNb = dbscan.Cluster(data, assignments); - trackID = 0; + std::size_t clusterNb = dbscan.cluster(data, clusterAssignments); // Cluster track with DBScan std::vector< std::multimap>>> dbscanClusters(clusterNb); for (const auto& [key, val] : trackMap) { - std::size_t clusterID = assignments(trackID); - if (assignments(trackID) == SIZE_MAX) { - cluster.emplace(val.first, std::vector(1, val.first)); - } else { - dbscanClusters[clusterID].emplace(key, val); - } + std::size_t clusterID = clusterAssignments[trackID]; + dbscanClusters[clusterID].emplace(key, val); trackID++; } diff --git a/Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/SeedFilterDBScanClustering.hpp b/Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/SeedFilterDBScanClustering.hpp new file mode 100644 index 00000000000..ab6d0aa0438 --- /dev/null +++ b/Examples/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/SeedFilterDBScanClustering.hpp @@ -0,0 +1,48 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2023-2024 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#pragma once + +#include "Acts/Utilities/DBScan.hpp" + +#include +#include +#include + +namespace Acts { + +/// Clusters seed based on their direction, their Z impact parameter and their +/// momentum using DBScan +/// +/// @param input Input parameters for the clustering (phi, eta, z, Pt) +/// @param epsilon Maximum distance between 2 seed to be clustered +/// @param minPoints Minimum number of seeds to create a cluster +/// @return an unordered map representing the clusters, the keys the ID of the primary seed of each cluster and the stored value a vector of seed IDs. +std::vector> dbscanSeedClustering( + const std::vector>& input, float epsilon = 0.03, + int minPoints = 2) { + // Initialize a DBScan of dimension 4 (phi, eta, z, Pt) + using DBSCAN = Acts::DBScan<4, double, 4>; + DBSCAN dbscan(epsilon, minPoints, true); + + // Cluster track with DBScan + std::vector clusterAssignments; + std::size_t clusterNb = dbscan.cluster(input, clusterAssignments); + + // Prepare the output + std::vector> cluster(clusterNb, + std::vector()); + for (std::size_t iD = 0; iD < input.size(); iD++) { + int clusterID = clusterAssignments[iD]; + cluster[clusterID].push_back(iD); + } + + return cluster; +} + +} // namespace Acts diff --git a/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLDBScanAlgorithm.cpp b/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLDBScanAlgorithm.cpp index 4c0601c4ece..cf552639486 100644 --- a/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLDBScanAlgorithm.cpp +++ b/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLDBScanAlgorithm.cpp @@ -8,9 +8,9 @@ #include "ActsExamples/TrackFindingML/AmbiguityResolutionMLDBScanAlgorithm.hpp" -#include "Acts/Plugins/Mlpack/AmbiguityDBScanClustering.hpp" #include "ActsExamples/Framework/ProcessCode.hpp" #include "ActsExamples/Framework/WhiteBoard.hpp" +#include "ActsExamples/TrackFindingML/AmbiguityDBScanClustering.hpp" #include #include diff --git a/Examples/Algorithms/TrackFindingML/src/SeedFilterMLAlgorithm.cpp b/Examples/Algorithms/TrackFindingML/src/SeedFilterMLAlgorithm.cpp index 05eb6a9a02e..fa784a258db 100644 --- a/Examples/Algorithms/TrackFindingML/src/SeedFilterMLAlgorithm.cpp +++ b/Examples/Algorithms/TrackFindingML/src/SeedFilterMLAlgorithm.cpp @@ -8,9 +8,9 @@ #include "ActsExamples/TrackFindingML/SeedFilterMLAlgorithm.hpp" -#include "Acts/Plugins/Mlpack/SeedFilterDBScanClustering.hpp" #include "ActsExamples/Framework/ProcessCode.hpp" #include "ActsExamples/Framework/WhiteBoard.hpp" +#include "ActsExamples/TrackFindingML/SeedFilterDBScanClustering.hpp" #include #include @@ -50,7 +50,7 @@ ActsExamples::ProcessCode ActsExamples::SeedFilterMLAlgorithm::execute( Eigen::Array networkInput(seeds.size(), 14); - std::vector> clusteringParams; + std::vector> clusteringParams; // Loop over the seed and parameters to fill the input for the clustering // and the NN for (std::size_t i = 0; i < seeds.size(); i++) { diff --git a/Examples/Python/CMakeLists.txt b/Examples/Python/CMakeLists.txt index 8225d1db66a..282f4d73852 100644 --- a/Examples/Python/CMakeLists.txt +++ b/Examples/Python/CMakeLists.txt @@ -175,16 +175,8 @@ if(ACTS_BUILD_PLUGIN_ONNX) target_sources(ActsPythonBindings PRIVATE src/OnnxNeuralCalibrator.cpp) list(APPEND py_files examples/onnx/__init__.py) - if(ACTS_BUILD_PLUGIN_MLPACK) - target_sources(ActsPythonBindings PRIVATE src/OnnxMlpack.cpp) - list(APPEND py_files examples/onnx/mlpack.py) - else() - target_sources(ActsPythonBindings PRIVATE src/OnnxMlpackStub.cpp) - endif() - else() target_sources(ActsPythonBindings PRIVATE src/OnnxStub.cpp) - target_sources(ActsPythonBindings PRIVATE src/OnnxMlpackStub.cpp) target_sources(ActsPythonBindings PRIVATE src/OnnxNeuralCalibratorStub.cpp) endif() diff --git a/Examples/Python/python/acts/examples/onnx/mlpack.py b/Examples/Python/python/acts/examples/onnx/mlpack.py deleted file mode 100644 index 1bec62f9d9a..00000000000 --- a/Examples/Python/python/acts/examples/onnx/mlpack.py +++ /dev/null @@ -1,9 +0,0 @@ -from acts._adapter import _patch_config -from acts import ActsPythonBindings - -if not hasattr(ActsPythonBindings._examples, "_mlpack"): - raise ImportError("ActsPythonBindings._examples._mlpack not found") - -_patch_config(ActsPythonBindings._examples._mlpack) - -from acts.ActsPythonBindings._examples._mlpack import * diff --git a/Examples/Python/python/acts/examples/reconstruction.py b/Examples/Python/python/acts/examples/reconstruction.py index 8c52327175f..b1971f49e9b 100644 --- a/Examples/Python/python/acts/examples/reconstruction.py +++ b/Examples/Python/python/acts/examples/reconstruction.py @@ -951,7 +951,7 @@ def addSeedFilterML( outputDirCsv: Optional[Union[Path, str]] = None, ) -> None: customLogLevel = acts.examples.defaultLogging(s, logLevel)() - from acts.examples.onnx.mlpack import SeedFilterMLAlgorithm + from acts.examples.onnx import SeedFilterMLAlgorithm inputParticles = "particles" selectedParticles = "truth_seeds_selected" diff --git a/Examples/Python/src/ModuleEntry.cpp b/Examples/Python/src/ModuleEntry.cpp index 0c8f3f7ee41..3254c88fb6f 100644 --- a/Examples/Python/src/ModuleEntry.cpp +++ b/Examples/Python/src/ModuleEntry.cpp @@ -76,7 +76,6 @@ void addEDM4hep(Context& ctx); void addSvg(Context& ctx); void addObj(Context& ctx); void addOnnx(Context& ctx); -void addOnnxMlpack(Context& ctx); void addOnnxNeuralCalibrator(Context& ctx); } // namespace Acts::Python @@ -127,6 +126,5 @@ PYBIND11_MODULE(ActsPythonBindings, m) { addObj(ctx); addSvg(ctx); addOnnx(ctx); - addOnnxMlpack(ctx); addOnnxNeuralCalibrator(ctx); } diff --git a/Examples/Python/src/Onnx.cpp b/Examples/Python/src/Onnx.cpp index ee844586458..edc9c627efd 100644 --- a/Examples/Python/src/Onnx.cpp +++ b/Examples/Python/src/Onnx.cpp @@ -8,6 +8,8 @@ #include "Acts/Plugins/Python/Utilities.hpp" #include "ActsExamples/TrackFindingML/AmbiguityResolutionMLAlgorithm.hpp" +#include "ActsExamples/TrackFindingML/AmbiguityResolutionMLDBScanAlgorithm.hpp" +#include "ActsExamples/TrackFindingML/SeedFilterMLAlgorithm.hpp" #include #include @@ -28,5 +30,16 @@ void addOnnx(Context& ctx) { onnx, "AmbiguityResolutionMLAlgorithm", inputTracks, inputDuplicateNN, outputTracks, nMeasurementsMin); + + ACTS_PYTHON_DECLARE_ALGORITHM( + ActsExamples::AmbiguityResolutionMLDBScanAlgorithm, onnx, + "AmbiguityResolutionMLDBScanAlgorithm", inputTracks, inputDuplicateNN, + outputTracks, nMeasurementsMin, epsilonDBScan, minPointsDBScan); + + ACTS_PYTHON_DECLARE_ALGORITHM(ActsExamples::SeedFilterMLAlgorithm, onnx, + "SeedFilterMLAlgorithm", inputTrackParameters, + inputSimSeeds, inputSeedFilterNN, + outputTrackParameters, outputSimSeeds, + epsilonDBScan, minPointsDBScan, minSeedScore); } } // namespace Acts::Python diff --git a/Examples/Python/src/OnnxMlpack.cpp b/Examples/Python/src/OnnxMlpack.cpp deleted file mode 100644 index 2c2da4cfb7b..00000000000 --- a/Examples/Python/src/OnnxMlpack.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// This file is part of the Acts project. -// -// Copyright (C) 2023 CERN for the benefit of the Acts project -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "Acts/Plugins/Python/Utilities.hpp" -#include "ActsExamples/TrackFindingML/AmbiguityResolutionMLDBScanAlgorithm.hpp" -#include "ActsExamples/TrackFindingML/SeedFilterMLAlgorithm.hpp" - -#include -#include - -namespace py = pybind11; - -using namespace ActsExamples; -using namespace Acts; - -namespace Acts::Python { - -void addOnnxMlpack(Context& ctx) { - auto [m, mex, onnx] = ctx.get("main", "examples", "onnx"); - auto mlpack = mex.def_submodule("_mlpack"); - - ACTS_PYTHON_DECLARE_ALGORITHM( - ActsExamples::AmbiguityResolutionMLDBScanAlgorithm, mlpack, - "AmbiguityResolutionMLDBScanAlgorithm", inputTracks, inputDuplicateNN, - outputTracks, nMeasurementsMin, epsilonDBScan, minPointsDBScan); - - ACTS_PYTHON_DECLARE_ALGORITHM(ActsExamples::SeedFilterMLAlgorithm, mlpack, - "SeedFilterMLAlgorithm", inputTrackParameters, - inputSimSeeds, inputSeedFilterNN, - outputTrackParameters, outputSimSeeds, - epsilonDBScan, minPointsDBScan, minSeedScore); -} -} // namespace Acts::Python diff --git a/Examples/Python/src/OnnxMlpackStub.cpp b/Examples/Python/src/OnnxMlpackStub.cpp deleted file mode 100644 index 65d2f180403..00000000000 --- a/Examples/Python/src/OnnxMlpackStub.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// This file is part of the Acts project. -// -// Copyright (C) 2023 CERN for the benefit of the Acts project -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -namespace Acts { -namespace Python { -struct Context; -} // namespace Python -} // namespace Acts - -namespace Acts::Python { -void addOnnxMlpack(Context& /*unused*/) { - // dummy function -} -} // namespace Acts::Python diff --git a/Plugins/CMakeLists.txt b/Plugins/CMakeLists.txt index db2151753ec..36cb678b67f 100644 --- a/Plugins/CMakeLists.txt +++ b/Plugins/CMakeLists.txt @@ -10,7 +10,6 @@ add_component_if(Identification PluginIdentification ACTS_BUILD_PLUGIN_IDENTIFIC add_component_if(Json PluginJson ACTS_BUILD_PLUGIN_JSON) add_component_if(Legacy PluginLegacy ACTS_BUILD_PLUGIN_LEGACY) add_component_if(Onnx PluginOnnx ACTS_BUILD_PLUGIN_ONNX) -add_component_if(Mlpack PluginMlpack ACTS_BUILD_PLUGIN_MLPACK) add_component_if(Sycl PluginSycl ACTS_BUILD_PLUGIN_SYCL) add_component_if(ExaTrkX PluginExaTrkX ACTS_BUILD_PLUGIN_EXATRKX) diff --git a/Plugins/Mlpack/CMakeLists.txt b/Plugins/Mlpack/CMakeLists.txt deleted file mode 100644 index 132921eb814..00000000000 --- a/Plugins/Mlpack/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -add_library( - ActsPluginMlpack SHARED - # header files - include/Acts/Plugins/Mlpack/AmbiguityDBScanClustering.hpp - include/Acts/Plugins/Mlpack/SeedFilterDBScanClustering.hpp) - -target_include_directories( - ActsPluginMlpack - PUBLIC - $ - $) - -target_link_libraries( - ActsPluginMlpack - PUBLIC ActsCore) - -target_include_directories(ActsPluginMlpack - SYSTEM INTERFACE ${mlpack_INCLUDE_DIR}) - -set_target_properties(ActsPluginMlpack PROPERTIES LINKER_LANGUAGE CXX) - -install( - TARGETS ActsPluginMlpack - EXPORT ActsPluginMlpackTargets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) - -install( - DIRECTORY include/Acts - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) diff --git a/Plugins/Mlpack/include/Acts/Plugins/Mlpack/SeedFilterDBScanClustering.hpp b/Plugins/Mlpack/include/Acts/Plugins/Mlpack/SeedFilterDBScanClustering.hpp deleted file mode 100644 index 369650ed3e5..00000000000 --- a/Plugins/Mlpack/include/Acts/Plugins/Mlpack/SeedFilterDBScanClustering.hpp +++ /dev/null @@ -1,62 +0,0 @@ -// This file is part of the Acts project. -// -// Copyright (C) 2023 CERN for the benefit of the Acts project -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include -#include - -#include "mlpack/methods/dbscan.hpp" - -namespace Acts { - -/// Clusters seed based on their direction, their Z impact parameter and their -/// momentum using DBScan -/// -/// @param input : Input parameters for the clustering (phi, eta, z, Pt) -/// @param epsilon : Maximum distance between 2 tracks to be clustered -/// @param minPoints : Minimum number of tracks to create a cluster -/// @return an unordered map representing the clusters, the keys the ID of the primary seed of each cluster and the stored value a vector of seed IDs. -std::vector> dbscanSeedClustering( - const std::vector>& input, float epsilon = 0.03, - int minPoints = 2) { - // DBSCAN algorithm from MLpack used in the seed clustering - mlpack::DBSCAN dbscan(epsilon, minPoints); - - // Compute the space dimension of the input - int dim = input[0].size(); - - // Prepare the input for the DBScan - arma::mat data(dim, input.size()); - arma::Row assignments; - std::size_t trackID = 0; - for (const auto& param : input) { - for (int i = 0; i < dim; i++) { - data(i, trackID) = param[i]; - } - trackID++; - } - // Cluster track with DBScan - std::size_t clusterNb = dbscan.Cluster(data, assignments); - - // Prepare the output - std::vector> cluster(clusterNb, - std::vector()); - for (std::size_t iD = 0; iD < input.size(); iD++) { - std::size_t clusterID = assignments(iD); - if (assignments(iD) == SIZE_MAX) { - cluster.push_back(std::vector(1, iD)); - } else { - cluster[clusterID].push_back(iD); - } - } - return cluster; -} - -} // namespace Acts diff --git a/Tests/UnitTests/Core/Utilities/CMakeLists.txt b/Tests/UnitTests/Core/Utilities/CMakeLists.txt index 19c95b134c1..1a0a7f86a31 100644 --- a/Tests/UnitTests/Core/Utilities/CMakeLists.txt +++ b/Tests/UnitTests/Core/Utilities/CMakeLists.txt @@ -10,6 +10,7 @@ add_unittest(BinUtility BinUtilityTests.cpp) add_unittest(BoundingBox BoundingBoxTest.cpp) target_link_libraries(ActsUnitTestBoundingBox PRIVATE std::filesystem) +add_unittest(DBScan DBScanTests.cpp) add_unittest(Extendable ExtendableTests.cpp) add_unittest(FiniteStateMachine FiniteStateMachineTests.cpp) add_unittest(Frustum FrustumTest.cpp) diff --git a/Tests/UnitTests/Core/Utilities/DBScanTests.cpp b/Tests/UnitTests/Core/Utilities/DBScanTests.cpp new file mode 100644 index 00000000000..ab47c630d62 --- /dev/null +++ b/Tests/UnitTests/Core/Utilities/DBScanTests.cpp @@ -0,0 +1,108 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2024 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include "Acts/Utilities/DBScan.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace { +// Test vector of 2D points generated from the make_blobs function of +// scikit-learn to correspond to 4 clusters with a standard deviation of 0.3 +std::vector> test_vector{ + {-2.83739915, 2.62792556}, {-2.02847331, -1.90722196}, + {4.42609249, -2.42439165}, {-2.54167208, -1.31586441}, + {-2.74072011, 1.88175176}, {-2.44805173, -1.72270269}, + {4.32373114, -3.04946856}, {-3.02908065, 3.05502207}, + {4.21551681, -2.72367792}, {6.94454243, -8.26702198}, + {4.57729285, -2.98832874}, {-2.05999536, -1.60664607}, + {7.29942963, -7.49254664}, {-1.76560555, -1.94316957}, + {-3.08697607, 2.38012823}, {-2.68133439, -1.96863594}, + {-3.04707961, 2.42381653}, {-1.6693666 - 1.98996212}, + {4.87565038, -2.42067792}, {6.57829525 - 8.14269767}, + {-1.89777458, -1.71029565}, {-2.82010574, 2.27963425}, + {-1.8938416, -1.76676642}, {-2.8088788, 2.14373147}, + {-2.7111892, 2.7343114}, {5.00997563, -3.03311654}, + {-3.00272791, 1.59086316}, {-2.69800242, 2.19671366}, + {5.35757875, -2.98359632}, {6.41134781, -7.79582109}, + {5.06123223, -2.84952632}, {6.33969189, -7.83811637}, + {5.11101701, -2.80965778}, {7.01442234, -7.47047664}, + {6.82239627, -7.97467806}, {6.82647513, -7.64299033}, + {-2.02651791, -1.81791892}, {-2.53859699, -2.20157508}, + {5.07240334, -2.48183097}, {-1.58869273, -2.30974576}, + {5.24011121, -2.78045434}, {4.89256735, -2.98154234}, + {-2.61589554, -1.38994103}, {-2.37898031, 2.02633106}, + {6.71148996, -7.87697906}, {-2.24310299, -2.01958434}, + {4.80875851, -3.00716459}, {-2.20240163, -1.45942015}, + {5.0317719, -3.33571147}, {4.68497184, -2.2794554}, + {6.57950453, -7.84613618}, {-2.39557904, -0.97990746}, + {4.89489222, -3.31597619}, {5.22670358, -2.79577779}, + {4.87625814, -2.70562793}, {5.37121464, -2.78439938}, + {6.48510206, -7.89652351}, {-2.78153003, 1.79940689}, + {6.80163025, -7.7267214}, {-2.42494396, -1.95543603}, + {7.01502605, -7.93922357}, {-2.00219795, -1.95198446}, + {-2.82690524, 1.83749478}, {-2.81153684, 2.30020325}, + {-1.46316156, -1.70854783}, {-2.36754202, -1.62836379}, + {-3.12179904, 1.86079695}, {-2.80228975, 2.16674687}, + {7.25447808, -7.87780152}, {6.34182023, -7.72244414}, + {6.85296593, -7.6565112}, {6.40782187, -7.95817435}, + {4.60981662, -2.6214774}, {6.82470403, -7.8453859}, + {-2.94909893, 2.4408267}, {6.48588252, -8.42529572}, + {6.55194867, -7.54354929}, {-2.64178285, 2.28031333}, + {-1.95664147, -2.44817923}, {-2.00957937, -2.01412199}, + {-2.24603999, 2.48964234}, {4.73274418, -2.89077558}, + {-2.47534453, 1.85935482}, {-2.35722712, -1.99652695}, + {5.15661108, -2.88549784}, {6.68114631, -7.73743642}, + {4.93268708, -2.97510717}, {6.54260932, -8.82618456}, + {-3.57448792, 2.06852256}, {6.63296723, -8.32752766}, + {-3.58610661, 2.2761471}, {-2.73077783, 1.8138345}, + {-2.14150912, 1.94984708}, {-2.27235876, -1.67574786}, + {6.92208545, -8.46326386}, {4.58953972, -3.22764749}, + {-3.36912131, 2.58470911}, {5.28526348, -2.55723196}, + {6.55276593, -7.81387909}, {-1.79854507, -2.10170986}}; +} // namespace + +namespace Acts { +namespace Test { + +BOOST_AUTO_TEST_CASE(ClusteringTest2D) { + using DBSCAN = Acts::DBScan<2, double, 4>; + DBSCAN dbscan(0.3, 3, false); + + DBSCAN dbscan_onePoint(0, 3, true); + + std::vector clusteredPoints; + + // Test the clustering we expect 4 clusters + int clusterNb = dbscan.cluster(test_vector, clusteredPoints); + BOOST_CHECK_EQUAL(clusterNb, 4); + + clusteredPoints.clear(); + + // Check that it works with empty input + std::vector> empty_vector; + clusterNb = dbscan.cluster(empty_vector, clusteredPoints); + BOOST_CHECK_EQUAL(clusterNb, 0); + + clusteredPoints.clear(); + + clusterNb = dbscan_onePoint.cluster(test_vector, clusteredPoints); + BOOST_CHECK_EQUAL(clusterNb, test_vector.size()); + + clusteredPoints.clear(); +} + +} // namespace Test +} // namespace Acts diff --git a/cmake/Findmlpack.cmake b/cmake/Findmlpack.cmake deleted file mode 100644 index 3b594b34d80..00000000000 --- a/cmake/Findmlpack.cmake +++ /dev/null @@ -1,16 +0,0 @@ -# UNIX paths are standard, no need to specify them. -find_path(mlpack_INCLUDE_DIR - NAMES mlpack/core.hpp mlpack/prereqs.hpp - PATHS /opt/mlpack /usr/mlpack -) - -find_package_handle_standard_args(mlpack - REQUIRED_VARS mlpack_INCLUDE_DIR -) - -if(mlpack_FOUND) - set(mlpack_INCLUDE_DIRS ${mlpack_INCLUDE_DIR}) -endif() - -# Hide internal variables -mark_as_advanced(mlpack_INCLUDE_DIR) diff --git a/docs/getting_started.md b/docs/getting_started.md index aca3288c7ab..add9992b825 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -278,7 +278,6 @@ components. | ACTS_USE_SYSTEM_NLOHMANN_JSON | Use nlohmann::json provided by the
system instead of the bundled version
type: `bool`, default: `ACTS_USE_SYSTEM_LIBS -> OFF` | | ACTS_BUILD_PLUGIN_LEGACY | Build legacy plugin
type: `bool`, default: `OFF` | | ACTS_BUILD_PLUGIN_ONNX | Build ONNX plugin
type: `bool`, default: `OFF` | -| ACTS_BUILD_PLUGIN_MLPACK | Build MLpack plugin
type: `bool`, default: `OFF` | | ACTS_SETUP_VECMEM | Explicitly set up vecmem for the project
type: `bool`, default: `OFF` | | ACTS_USE_SYSTEM_VECMEM | Use a system-provided vecmem
installation
type: `bool`, default: `ACTS_USE_SYSTEM_LIBS -> OFF` | | ACTS_BUILD_PLUGIN_SYCL | Build SYCL plugin
type: `bool`, default: `OFF` |