Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Cuda Plugin Improvements, master branch (2020.08.26.) #398

Merged
7 changes: 7 additions & 0 deletions Plugins/Cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,20 @@ add_library(
include/Acts/Plugins/Cuda/Seeding2/SeedFinder.ipp
include/Acts/Plugins/Cuda/Seeding2/TripletFilterConfig.hpp
include/Acts/Plugins/Cuda/Utilities/Arrays.hpp
include/Acts/Plugins/Cuda/Utilities/Info.hpp
include/Acts/Plugins/Cuda/Utilities/MemoryManager.hpp
include/Acts/Plugins/Cuda/Utilities/StreamWrapper.hpp
src/Seeding2/CountDublets.cu
src/Seeding2/FindDublets.cu
src/Seeding2/FindTriplets.cu
src/Utilities/Arrays.cu
src/Utilities/ErrorCheck.cuh
src/Utilities/ErrorCheck.cu
src/Utilities/Info.cu
src/Utilities/MatrixMacros.hpp
src/Utilities/MemoryManager.cu
src/Utilities/StreamHandlers.cuh
src/Utilities/StreamWrapper.cu
)
target_include_directories(
ActsPluginCuda2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// CUDA plugin include(s).
#include "Acts/Plugins/Cuda/Seeding2/Details/Types.hpp"
#include "Acts/Plugins/Cuda/Utilities/Arrays.hpp"
#include "Acts/Plugins/Cuda/Utilities/Info.hpp"

// System include(s).
#include <cstddef>
Expand All @@ -34,6 +35,7 @@ namespace Details {
/// that still need to be filtered through
/// @c Acts::SeedFilter::filterSeeds_1SpFixed before returning it to the user.
///
/// @param[in] device Properties of the device that the code will be running on
/// @param[in] maxBlockSize The maximum block size to use on the GPU
/// @param[in] dubletCounts The output object from
/// @c Acts::Cuda::Details::countDublets
Expand Down Expand Up @@ -70,11 +72,12 @@ namespace Details {
/// for each middle spacepoint
///
std::vector<std::vector<Triplet> > findTriplets(
std::size_t maxBlockSize, const DubletCounts& dubletCounts,
const SeedFilterConfig& seedConfig, const TripletFilterConfig& filterConfig,
std::size_t nBottomSPs, const device_array<SpacePoint>& bottomSPs,
std::size_t nMiddleSPs, const device_array<SpacePoint>& middleSPs,
std::size_t nTopSPs, const device_array<SpacePoint>& topSPs,
const Info::Device& device, std::size_t maxBlockSize,
const DubletCounts& dubletCounts, const SeedFilterConfig& seedConfig,
const TripletFilterConfig& filterConfig, std::size_t nBottomSPs,
const device_array<SpacePoint>& bottomSPs, std::size_t nMiddleSPs,
const device_array<SpacePoint>& middleSPs, std::size_t nTopSPs,
const device_array<SpacePoint>& topSPs,
const device_array<unsigned int>& middleBottomCounts,
const device_array<std::size_t>& middleBottomDublets,
const device_array<unsigned int>& middleTopCounts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

#pragma once

// System include(s).
#include <cstddef>

namespace Acts {
namespace Cuda {
namespace Details {
Expand All @@ -28,9 +25,9 @@ struct SpacePoint {
/// Helper struct summarising the results of the dublet search
struct DubletCounts {
/// The total number of dublets (M-B and M-T) found
std::size_t nDublets = 0;
unsigned int nDublets = 0;
/// The total number of triplet candidates found
std::size_t nTriplets = 0;
unsigned int nTriplets = 0;
/// The maximal number of middle-bottom dublets
unsigned int maxMBDublets = 0;
/// The maximal number of middle-top dublets
Expand All @@ -51,8 +48,9 @@ struct LinCircle {

/// Structure used in the CUDA-based triplet finding
struct Triplet {
std::size_t bottomIndex = static_cast<std::size_t>(-1);
std::size_t topIndex = static_cast<std::size_t>(-1);
unsigned int bottomIndex = static_cast<unsigned int>(-1);
unsigned int middleIndex = static_cast<unsigned int>(-1);
unsigned int topIndex = static_cast<unsigned int>(-1);
float impactParameter = 0.0f;
float invHelixDiameter = 0.0f;
float weight = 0.0f;
Expand Down
15 changes: 14 additions & 1 deletion Plugins/Cuda/include/Acts/Plugins/Cuda/Seeding2/SeedFinder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "Acts/Seeding/Seed.hpp"
#include "Acts/Seeding/SeedFilterConfig.hpp"
#include "Acts/Seeding/SeedfinderConfig.hpp"
#include "Acts/Utilities/Logger.hpp"

namespace Acts {
namespace Cuda {
Expand All @@ -26,9 +27,19 @@ class SeedFinder {
///////////////////////////////////////////////////////////////////

public:
/// Create a CUDA backed seed finder object
///
/// @param commonConfig Configuration shared with @c Acts::Seedfinder
/// @param seedFilterConfig Configuration shared with @c Acts::SeedFilter
/// @param tripletFilterConfig Configuration for the GPU based triplet
/// filtering
/// @param device The identifier of the CUDA device to run on
/// @param loggerLevel Output level of messages coming from the object
///
SeedFinder(SeedfinderConfig<external_spacepoint_t> commonConfig,
const SeedFilterConfig& seedFilterConfig,
const TripletFilterConfig& tripletFilterConfig);
const TripletFilterConfig& tripletFilterConfig, int device = 0,
Acts::Logging::Level loggerLevel = Acts::Logging::INFO);

/// Create all seeds from the space points in the three iterators.
/// Can be used to parallelize the seed creation
Expand All @@ -49,6 +60,8 @@ class SeedFinder {
SeedFilterConfig m_seedFilterConfig;
/// Configuration for the (device) triplet filter
TripletFilterConfig m_tripletFilterConfig;
/// CUDA device identifier
int m_device;
};

} // namespace Cuda
Expand Down
33 changes: 26 additions & 7 deletions Plugins/Cuda/include/Acts/Plugins/Cuda/Seeding2/SeedFinder.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include "Acts/Plugins/Cuda/Seeding2/Details/FindTriplets.hpp"
#include "Acts/Plugins/Cuda/Seeding2/Details/Types.hpp"
#include "Acts/Plugins/Cuda/Utilities/Arrays.hpp"
#include "Acts/Plugins/Cuda/Utilities/Info.hpp"
#include "Acts/Plugins/Cuda/Utilities/MemoryManager.hpp"

// Acts include(s).
#include "Acts/Seeding/InternalSeed.hpp"
Expand All @@ -30,10 +32,12 @@ template <typename external_spacepoint_t>
SeedFinder<external_spacepoint_t>::SeedFinder(
Acts::SeedfinderConfig<external_spacepoint_t> commonConfig,
const SeedFilterConfig& seedFilterConfig,
const TripletFilterConfig& tripletFilterConfig)
const TripletFilterConfig& tripletFilterConfig, int device,
Acts::Logging::Level loggerLevel)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you consider accepting a logger instance here, and storing as a member variable? You can then default it to Acts::getDefaultLogger. This way, other logging backends (like then Athena logging for example) can potentially be passed in.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure thing. I'll do that later today.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you look around, the pattern we usually use is accept an std::unique_ptr<Logger>, store as a member, and then provide a const Logger& logger() method that the macros call. But I'm sure you figured that out anyway 😉

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Please have a look. 😄

: m_commonConfig(std::move(commonConfig)),
m_seedFilterConfig(seedFilterConfig),
m_tripletFilterConfig(tripletFilterConfig) {
m_tripletFilterConfig(tripletFilterConfig),
m_device(device) {
// calculation of scattering using the highland formula
// convert pT to p once theta angle is known
m_commonConfig.highland =
Expand All @@ -50,6 +54,17 @@ SeedFinder<external_spacepoint_t>::SeedFinder(
std::pow(m_commonConfig.minPt * 2 / m_commonConfig.pTPerHelixRadius, 2);
m_commonConfig.pT2perRadius =
std::pow(m_commonConfig.highland / m_commonConfig.pTPerHelixRadius, 2);

// Tell the user what CUDA device will be used by the object.
ACTS_LOCAL_LOGGER(
Acts::getDefaultLogger("Acts::Cuda::SeedFinder", loggerLevel));
if (static_cast<std::size_t>(m_device) < Info::instance().devices().size()) {
ACTS_DEBUG("Will be using device:\n"
<< Info::instance().devices()[m_device]);
} else {
ACTS_FATAL("Invalid CUDA device requested");
throw std::runtime_error("Invalid CUDA device requested");
}
}

template <typename external_spacepoint_t>
Expand Down Expand Up @@ -161,11 +176,12 @@ SeedFinder<external_spacepoint_t>::createSeedsForGroup(

// Launch the triplet finding code on all of the previously found dublets.
auto tripletCandidates = Details::findTriplets(
m_commonConfig.maxBlockSize, dubletCounts, m_seedFilterConfig,
m_tripletFilterConfig, bottomSPVec.size(), bottomSPDeviceArray,
middleSPVec.size(), middleSPDeviceArray, topSPVec.size(),
topSPDeviceArray, middleBottomCounts, middleBottomDublets,
middleTopCounts, middleTopDublets, m_commonConfig.maxScatteringAngle2,
Info::instance().devices()[m_device], m_commonConfig.maxBlockSize,
dubletCounts, m_seedFilterConfig, m_tripletFilterConfig,
bottomSPVec.size(), bottomSPDeviceArray, middleSPVec.size(),
middleSPDeviceArray, topSPVec.size(), topSPDeviceArray,
middleBottomCounts, middleBottomDublets, middleTopCounts,
middleTopDublets, m_commonConfig.maxScatteringAngle2,
m_commonConfig.sigmaScattering, m_commonConfig.minHelixDiameter2,
m_commonConfig.pT2perRadius, m_commonConfig.impactMax);
assert(tripletCandidates.size() == middleSPVec.size());
Expand All @@ -192,6 +208,9 @@ SeedFinder<external_spacepoint_t>::createSeedsForGroup(
m_commonConfig.seedFilter->filterSeeds_1SpFixed(seedsPerSPM, outputVec);
}

// Free up all allocated device memory.
MemoryManager::instance().reset(m_device);

// Return the collected spacepoints.
return outputVec;
}
Expand Down
13 changes: 13 additions & 0 deletions Plugins/Cuda/include/Acts/Plugins/Cuda/Utilities/Arrays.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

#pragma once

// CUDA plugin include(s).
#include "Acts/Plugins/Cuda/Utilities/StreamWrapper.hpp"

// System include(s).
#include <cstddef>
#include <memory>
Expand Down Expand Up @@ -58,10 +61,20 @@ template <typename T>
void copyToDevice(device_array<T>& dev, const host_array<T>& host,
std::size_t arraySize);

/// Copy one array from the host to the device asynchronously
template <typename T>
void copyToDevice(device_array<T>& dev, const host_array<T>& host,
std::size_t arraySize, const StreamWrapper& stream);

/// Copy one array from the device to the host
template <typename T>
void copyToHost(host_array<T>& host, const device_array<T>& dev,
std::size_t arraySize);

/// Copy one array from the device to the host asynchronously
template <typename T>
void copyToHost(host_array<T>& host, const device_array<T>& dev,
std::size_t arraySize, const StreamWrapper& stream);

} // namespace Cuda
} // namespace Acts
73 changes: 73 additions & 0 deletions Plugins/Cuda/include/Acts/Plugins/Cuda/Utilities/Info.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// This file is part of the Acts project.
//
// Copyright (C) 2020 CERN for the benefit of the Acts project
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#pragma once

// System include(s).
#include <iosfwd>
#include <string>
#include <vector>

namespace Acts {
namespace Cuda {

/// Class providing information about the CUDA devices at runtime
///
/// Without exposing any CUDA dependencies publicly to the clients.
///
class Info {
public:
/// @name Declarations preventing any copies of the singleton object
/// @{

/// Explicitly delete the copy constructor
Info(const Info&) = delete;
/// Explicitly delete the move constructor
Info(Info&&) = delete;

/// Explicitly delete the copy assignment operator
Info& operator=(const Info&) = delete;
/// Explicitly delete the move assignment operator
Info& operator=(Info&&) = delete;

/// @}

/// Singleton accessor function
static Info& instance();

/// Helper struct describing one available CUDA device
struct Device {
/// Identifier that CUDA knows this device by
int id = -1;
/// The name of this device
std::string name;
/// The maximal number of threads per block for this device
int maxThreadsPerBlock = -1;
/// Whether the device supports multiple kernel executions in parallel
bool concurrentKernels = false;
/// The total amount of (global) memory on the device
std::size_t totalMemory = 0;
}; // struct Device

/// Get all the available CUDA devices
const std::vector<Device>& devices() const;

private:
/// The constructor is private to implement the singleton behaviour
Info();

/// Information about all available devices
std::vector<Device> m_devices;

}; // class Info

/// Print operator for @c Acts::Cuda::Info::Device
std::ostream& operator<<(std::ostream& out, const Info::Device& device);

} // namespace Cuda
} // namespace Acts
Loading