feat: ML based seed filtering (#2709)

This pull request add a ML based seed selection that can be used to reduce the number of seed after the seeding step. This PR is pending on the merging of #2690.
acts-project · Dec 14, 2023 · 61bc4e1 · 61bc4e1
1 parent 66c671c
commit 61bc4e1
Show file tree

Hide file tree

Showing 32 changed files with 1,532 additions and 84 deletions.
diff --git a/Core/include/Acts/TrackFinding/detail/AmbiguityTrackClustering.hpp b/Core/include/Acts/TrackFinding/detail/AmbiguityTrackClustering.hpp
@@ -20,8 +20,10 @@ namespace detail {
 ///
 /// @param trackMap : Multimap storing pair of track ID and vector of measurement ID. The keys are the number of measurement and are just there to facilitate the ordering.
 /// @return an unordered map representing the clusters, the keys the ID of the primary track of each cluster and the store a vector of track IDs.
-std::unordered_map<int, std::vector<int>> clusterDuplicateTracks(
-    const std::multimap<int, std::pair<int, std::vector<int>>>& trackMap);
+std::unordered_map<std::size_t, std::vector<std::size_t>>
+clusterDuplicateTracks(
+    const std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>&
+        trackMap);
 
 }  // namespace detail
 }  // namespace Acts
diff --git a/Core/src/TrackFinding/AmbiguityTrackClustering.cpp b/Core/src/TrackFinding/AmbiguityTrackClustering.cpp
@@ -10,18 +10,20 @@
 
 #include <iterator>
 
-std::unordered_map<int, std::vector<int>> Acts::detail::clusterDuplicateTracks(
-    const std::multimap<int, std::pair<int, std::vector<int>>>& trackMap) {
+std::unordered_map<std::size_t, std::vector<std::size_t>>
+Acts::detail::clusterDuplicateTracks(
+    const std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>&
+        trackMap) {
   // Unordered map associating a vector with all the track ID of a cluster to
   // the ID of the first track of the cluster
-  std::unordered_map<int, std::vector<int>> cluster;
+  std::unordered_map<std::size_t, std::vector<std::size_t>> cluster;
   // Unordered map associating hits to the ID of the first track of the
   // different clusters.
-  std::unordered_map<int, int> hitToTrack;
+  std::unordered_map<std::size_t, std::size_t> hitToTrack;
 
   // Loop over all the tracks
   for (auto track = trackMap.rbegin(); track != trackMap.rend(); ++track) {
-    std::vector<int> hits = track->second.second;
+    std::vector<std::size_t> hits = track->second.second;
     auto matchedTrack = hitToTrack.end();
     // Loop over all the hits in the track
     for (auto hit = hits.begin(); hit != hits.end(); hit++) {
@@ -36,7 +38,7 @@ std::unordered_map<int, std::vector<int>> Acts::detail::clusterDuplicateTracks(
     // None of the hits have been matched to a track create a new cluster
     if (matchedTrack == hitToTrack.end()) {
       cluster.emplace(track->second.first,
-                      std::vector<int>(1, track->second.first));
+                      std::vector<std::size_t>(1, track->second.first));
       for (const auto& hit : hits) {
         // Add the hits of the new cluster to the hitToTrack
         hitToTrack.emplace(hit, track->second.first);

diff --git a/Examples/Algorithms/TrackFindingML/CMakeLists.txt b/Examples/Algorithms/TrackFindingML/CMakeLists.txt
@@ -6,6 +6,7 @@ set(SOURCES
 if(ACTS_BUILD_PLUGIN_MLPACK)
   list(APPEND SOURCES 
     src/AmbiguityResolutionMLDBScanAlgorithm.cpp   
+    src/SeedFilterMLAlgorithm.cpp
   )
 endif()
 

diff --git a/...s/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/AmbiguityResolutionML.hpp b/...s/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/AmbiguityResolutionML.hpp
@@ -33,15 +33,16 @@ class AmbiguityResolutionML : public IAlgorithm {
   /// @param tracks is the input track container
   /// @param nMeasurementsMin minimum number of measurement per track
   /// @return an ordered list containing pairs of track ID and associated measurement ID
-  std::multimap<int, std::pair<int, std::vector<int>>> mapTrackHits(
-      const ConstTrackContainer& tracks, int nMeasurementsMin) const;
+  std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>
+  mapTrackHits(const ConstTrackContainer& tracks, int nMeasurementsMin) const;
 
   /// Prepare the output track container to be written
   ///
   /// @param tracks is the input track container
   /// @param goodTracks is list of the IDs of all the tracks we want to keep
-  ConstTrackContainer prepareOutputTrack(const ConstTrackContainer& tracks,
-                                         std::vector<int>& goodTracks) const;
+  ConstTrackContainer prepareOutputTrack(
+      const ConstTrackContainer& tracks,
+      std::vector<std::size_t>& goodTracks) const;
 };
 
 }  // namespace ActsExamples
diff --git a/...s/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/SeedFilterMLAlgorithm.hpp b/...s/Algorithms/TrackFindingML/include/ActsExamples/TrackFindingML/SeedFilterMLAlgorithm.hpp
@@ -0,0 +1,83 @@
+// This file is part of the Acts project.
+//
+// Copyright (C) 2023 CERN for the benefit of the Acts project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/Plugins/Onnx/SeedClassifier.hpp"
+#include "ActsExamples/EventData/SimSeed.hpp"
+#include "ActsExamples/EventData/Track.hpp"
+#include "ActsExamples/Framework/DataHandle.hpp"
+#include "ActsExamples/TrackFindingML/AmbiguityResolutionML.hpp"
+
+#include <string>
+
+namespace ActsExamples {
+
+/// Removes seeds that seem to be duplicated and fake.
+///
+/// The implementation works as follows:
+///  1) Cluster together nearby seeds using a DBScan
+///  2) For each seed use a neural network to compute a score
+///  3) In each cluster keep the seed with the highest score
+class SeedFilterMLAlgorithm : public IAlgorithm {
+ public:
+  struct Config {
+    /// Input estimated track parameters collection.
+    std::string inputTrackParameters;
+    /// Input seeds collection.
+    std::string inputSimSeeds;
+    /// Path to the ONNX model for the duplicate neural network
+    std::string inputSeedFilterNN;
+    /// Output estimated track parameters collection.
+    std::string outputTrackParameters;
+    /// Output seeds collection.
+    std::string outputSimSeeds;
+    /// Maximum distance between 2 tracks to be clustered in the DBScan
+    float epsilonDBScan = 0.03;
+    /// Minimum number of tracks to create a cluster in the DBScan
+    int minPointsDBScan = 2;
+    /// Minimum score a seed need to be selected
+    float minSeedScore = 0.1;
+    /// Clustering parameters weight for phi used before the DBSCAN
+    double clusteringWeighPhi = 1.0;
+    /// Clustering parameters weight for eta used before the DBSCAN
+    double clusteringWeighEta = 1.0;
+    /// Clustering parameters weight for z used before the DBSCAN
+    double clusteringWeighZ = 50.0;
+    /// Clustering parameters weight for pT used before the DBSCAN
+    double clusteringWeighPt = 1.0;
+  };
+
+  /// Construct the seed filter algorithm.
+  ///
+  /// @param cfg is the algorithm configuration
+  /// @param lvl is the logging level
+  SeedFilterMLAlgorithm(Config cfg, Acts::Logging::Level lvl);
+
+  /// Run the seed filter algorithm.
+  ///
+  /// @param cxt is the algorithm context with event information
+  /// @return a process code indication success or failure
+  ProcessCode execute(const AlgorithmContext& ctx) const final;
+
+  /// Const access to the config
+  const Config& config() const { return m_cfg; }
+
+ private:
+  Config m_cfg;
+  // ONNX model for track selection
+  Acts::SeedClassifier m_seedClassifier;
+  ReadDataHandle<TrackParametersContainer> m_inputTrackParameters{
+      this, "InputTrackParameters"};
+  ReadDataHandle<SimSeedContainer> m_inputSimSeeds{this, "InputSimSeeds"};
+  WriteDataHandle<TrackParametersContainer> m_outputTrackParameters{
+      this, "OutputTrackParameters"};
+  WriteDataHandle<SimSeedContainer> m_outputSimSeeds{this, "OutputSimSeeds"};
+};
+
+}  // namespace ActsExamples
diff --git a/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionML.cpp b/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionML.cpp
@@ -15,23 +15,24 @@ ActsExamples::AmbiguityResolutionML::AmbiguityResolutionML(
     std::string name, Acts::Logging::Level lvl)
     : ActsExamples::IAlgorithm(name, lvl) {}
 
-std::multimap<int, std::pair<int, std::vector<int>>>
+std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>
 ActsExamples::AmbiguityResolutionML::mapTrackHits(
     const ActsExamples::ConstTrackContainer& tracks,
     int nMeasurementsMin) const {
-  std::multimap<int, std::pair<int, std::vector<int>>> trackMap;
+  std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>> trackMap;
   // Loop over all the trajectories in the events
   for (const auto& track : tracks) {
-    std::vector<int> hits;
+    std::vector<std::size_t> hits;
     int nbMeasurements = 0;
     // Store the hits id for the trajectory and compute the number of
     // measurement
     tracks.trackStateContainer().visitBackwards(
         track.tipIndex(), [&](const auto& state) {
           if (state.typeFlags().test(Acts::TrackStateFlag::MeasurementFlag)) {
-            int indexHit = state.getUncalibratedSourceLink()
-                               .template get<ActsExamples::IndexSourceLink>()
-                               .index();
+            std::size_t indexHit =
+                state.getUncalibratedSourceLink()
+                    .template get<ActsExamples::IndexSourceLink>()
+                    .index();
             hits.emplace_back(indexHit);
             ++nbMeasurements;
           }
@@ -47,7 +48,7 @@ ActsExamples::AmbiguityResolutionML::mapTrackHits(
 ActsExamples::ConstTrackContainer
 ActsExamples::AmbiguityResolutionML::prepareOutputTrack(
     const ActsExamples::ConstTrackContainer& tracks,
-    std::vector<int>& goodTracks) const {
+    std::vector<std::size_t>& goodTracks) const {
   std::shared_ptr<Acts::ConstVectorMultiTrajectory> trackStateContainer =
       tracks.trackStateContainerHolder();
   auto trackContainer = std::make_shared<Acts::VectorTrackContainer>();

diff --git a/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLAlgorithm.cpp b/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLAlgorithm.cpp
@@ -35,12 +35,12 @@ ActsExamples::ProcessCode ActsExamples::AmbiguityResolutionMLAlgorithm::execute(
   // Read input data
   const auto& tracks = m_inputTracks(ctx);
   // Associate measurement to their respective tracks
-  std::multimap<int, std::pair<int, std::vector<int>>> trackMap =
-      mapTrackHits(tracks, m_cfg.nMeasurementsMin);
+  std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>
+      trackMap = mapTrackHits(tracks, m_cfg.nMeasurementsMin);
   auto cluster = Acts::detail::clusterDuplicateTracks(trackMap);
   // Select the ID of the track we want to keep
-  std::vector<int> goodTracks =
-      m_duplicateClassifier.solveAmbuguity(cluster, tracks);
+  std::vector<std::size_t> goodTracks =
+      m_duplicateClassifier.solveAmbiguity(cluster, tracks);
   // Prepare the output track collection from the IDs
   auto outputTracks = prepareOutputTrack(tracks, goodTracks);
   m_outputTracks(ctx, std::move(outputTracks));

diff --git a/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLDBScanAlgorithm.cpp b/Examples/Algorithms/TrackFindingML/src/AmbiguityResolutionMLDBScanAlgorithm.cpp
@@ -39,14 +39,14 @@ ActsExamples::AmbiguityResolutionMLDBScanAlgorithm::execute(
   // Read input data
   const auto& tracks = m_inputTracks(ctx);
   // Associate measurement to their respective tracks
-  std::multimap<int, std::pair<int, std::vector<int>>> trackMap =
-      mapTrackHits(tracks, m_cfg.nMeasurementsMin);
+  std::multimap<int, std::pair<std::size_t, std::vector<std::size_t>>>
+      trackMap = mapTrackHits(tracks, m_cfg.nMeasurementsMin);
   // Cluster the tracks using DBscan
   auto cluster = Acts::dbscanTrackClustering(
       trackMap, tracks, m_cfg.epsilonDBScan, m_cfg.minPointsDBScan);
   // Select the ID of the track we want to keep
-  std::vector<int> goodTracks =
-      m_duplicateClassifier.solveAmbuguity(cluster, tracks);
+  std::vector<std::size_t> goodTracks =
+      m_duplicateClassifier.solveAmbiguity(cluster, tracks);
   // Prepare the output track collection from the IDs
   auto outputTracks = prepareOutputTrack(tracks, goodTracks);
   m_outputTracks(ctx, std::move(outputTracks));

diff --git a/Examples/Algorithms/TrackFindingML/src/SeedFilterMLAlgorithm.cpp b/Examples/Algorithms/TrackFindingML/src/SeedFilterMLAlgorithm.cpp
@@ -0,0 +1,102 @@
+// This file is part of the Acts project.
+//
+// Copyright (C) 2023 CERN for the benefit of the Acts project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "ActsExamples/TrackFindingML/SeedFilterMLAlgorithm.hpp"
+
+#include "Acts/Plugins/Mlpack/SeedFilterDBScanClustering.hpp"
+#include "ActsExamples/Framework/ProcessCode.hpp"
+#include "ActsExamples/Framework/WhiteBoard.hpp"
+
+#include <iterator>
+#include <map>
+
+ActsExamples::SeedFilterMLAlgorithm::SeedFilterMLAlgorithm(
+    ActsExamples::SeedFilterMLAlgorithm::Config cfg, Acts::Logging::Level lvl)
+    : ActsExamples::IAlgorithm("SeedFilterMLAlgorithm", lvl),
+      m_cfg(std::move(cfg)),
+      m_seedClassifier(m_cfg.inputSeedFilterNN.c_str()) {
+  if (m_cfg.inputTrackParameters.empty()) {
+    throw std::invalid_argument("Missing track parameters input collection");
+  }
+  if (m_cfg.inputSimSeeds.empty()) {
+    throw std::invalid_argument("Missing seed input collection");
+  }
+  if (m_cfg.outputTrackParameters.empty()) {
+    throw std::invalid_argument("Missing track parameters output collection");
+  }
+  if (m_cfg.outputSimSeeds.empty()) {
+    throw std::invalid_argument("Missing seed output collection");
+  }
+  m_inputTrackParameters.initialize(m_cfg.inputTrackParameters);
+  m_inputSimSeeds.initialize(m_cfg.inputSimSeeds);
+  m_outputTrackParameters.initialize(m_cfg.outputTrackParameters);
+  m_outputSimSeeds.initialize(m_cfg.outputSimSeeds);
+}
+
+ActsExamples::ProcessCode ActsExamples::SeedFilterMLAlgorithm::execute(
+    const AlgorithmContext& ctx) const {
+  // Read input data
+  const auto& seeds = m_inputSimSeeds(ctx);
+  const auto& params = m_inputTrackParameters(ctx);
+  if (seeds.size() != params.size()) {
+    throw std::invalid_argument(
+        "The number of seeds and track parameters is different");
+  }
+
+  Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
+      networkInput(seeds.size(), 14);
+  std::vector<std::vector<double>> clusteringParams;
+  // Loop over the seed and parameters to fill the input for the clustering
+  // and the NN
+  for (std::size_t i = 0; i < seeds.size(); i++) {
+    // Compute the track parameters
+    double pT = std::abs(1.0 / params[i].parameters()[Acts::eBoundQOverP]) *
+                std::sin(params[i].parameters()[Acts::eBoundTheta]);
+    double eta =
+        std::atanh(std::cos(params[i].parameters()[Acts::eBoundTheta]));
+    double phi = params[i].parameters()[Acts::eBoundPhi];
+
+    // Fill and weight the clustering inputs
+    clusteringParams.push_back(
+        {phi / m_cfg.clusteringWeighPhi, eta / m_cfg.clusteringWeighEta,
+         seeds[i].z() / m_cfg.clusteringWeighZ, pT / m_cfg.clusteringWeighPt});
+
+    // Fill the NN input
+    networkInput.row(i) << pT, eta, phi, seeds[i].sp()[0]->x(),
+        seeds[i].sp()[0]->y(), seeds[i].sp()[0]->z(), seeds[i].sp()[1]->x(),
+        seeds[i].sp()[1]->y(), seeds[i].sp()[1]->z(), seeds[i].sp()[2]->x(),
+        seeds[i].sp()[2]->y(), seeds[i].sp()[2]->z(), seeds[i].z(),
+        seeds[i].seedQuality();
+  }
+
+  // Cluster the tracks using DBscan
+  auto cluster = Acts::dbscanSeedClustering(
+      clusteringParams, m_cfg.epsilonDBScan, m_cfg.minPointsDBScan);
+
+  // Select the ID of the track we want to keep
+  std::vector<std::size_t> goodSeed = m_seedClassifier.solveAmbiguity(
+      cluster, networkInput, m_cfg.minSeedScore);
+
+  // Create the output seed collection
+  SimSeedContainer outputSeeds;
+  outputSeeds.reserve(goodSeed.size());
+
+  // Create the output track parameters collection
+  TrackParametersContainer outputTrackParameters;
+  outputTrackParameters.reserve(goodSeed.size());
+
+  for (auto i : goodSeed) {
+    outputSeeds.push_back(seeds[i]);
+    outputTrackParameters.push_back(params[i]);
+  }
+
+  m_outputSimSeeds(ctx, SimSeedContainer{outputSeeds});
+  m_outputTrackParameters(ctx, TrackParametersContainer{outputTrackParameters});
+
+  return ActsExamples::ProcessCode::SUCCESS;
+}
diff --git a/Examples/Io/Csv/include/ActsExamples/Io/Csv/CsvSeedWriter.hpp b/Examples/Io/Csv/include/ActsExamples/Io/Csv/CsvSeedWriter.hpp
@@ -90,7 +90,7 @@ class CsvSeedWriter : public WriterT<TrackParametersContainer> {
   /// @brief Struct for brief seed summary info
   ///
   struct SeedInfo {
-    std::size_t seedId = 0;
+    std::size_t seedID = 0;
     ActsFatras::Barcode particleId;
     float seedPt = -1;
     float seedPhi = 0;

diff --git a/Examples/Io/Csv/include/ActsExamples/Io/Csv/CsvTrackWriter.hpp b/Examples/Io/Csv/include/ActsExamples/Io/Csv/CsvTrackWriter.hpp
@@ -89,7 +89,7 @@ class CsvTrackWriter : public WriterT<ConstTrackContainer> {
   ///
   struct TrackInfo : public Acts::MultiTrajectoryHelpers::TrajectoryState {
     std::size_t trackId = 0;
-    unsigned int seedId = 0;
+    unsigned int seedID = 0;
     ActsFatras::Barcode particleId;
     std::size_t nMajorityHits = 0;
     std::string trackType;

diff --git a/Examples/Io/Csv/src/CsvSeedWriter.cpp b/Examples/Io/Csv/src/CsvSeedWriter.cpp
@@ -147,7 +147,7 @@ ActsExamples::ProcessCode ActsExamples::CsvSeedWriter::writeT(
 
     // track info
     SeedInfo toAdd;
-    toAdd.seedId = iparams;
+    toAdd.seedID = iparams;
     toAdd.particleId = majorityParticleId;
     toAdd.seedPt = std::abs(1.0 / params[Acts::eBoundQOverP]) *
                    std::sin(params[Acts::eBoundTheta]);
@@ -160,7 +160,7 @@ ActsExamples::ProcessCode ActsExamples::CsvSeedWriter::writeT(
     toAdd.seedType = truthMatched ? "duplicate" : "fake";
     toAdd.measurementsID = ptrack;
 
-    infoMap[toAdd.seedId] = toAdd;
+    infoMap[toAdd.seedID] = toAdd;
   }
 
   mos << "seed_id,particleId,"
@@ -177,7 +177,7 @@ ActsExamples::ProcessCode ActsExamples::CsvSeedWriter::writeT(
       info.seedType = "good";
     }
     // write the track info
-    mos << info.seedId << ",";
+    mos << info.seedID << ",";
     mos << info.particleId << ",";
     mos << info.seedPt << ",";
     mos << info.seedEta << ",";