Skip to content

Commit

Permalink
Make CollectionIDs a 32bit hash value of the collection name (#412)
Browse files Browse the repository at this point in the history
* add hashing feature to CollectionID table

* move collectionID from mix of int/unsigned to consistent uint32_t

* fix collectionID in SIOBlock

* use 32 bit hash; use murmurhash3

* Ignore tests in UB sanitizer runs

* Add standalone executable for collision detection

* protect frame against double insert

---------

Co-authored-by: tmadlener <thomas.madlener@desy.de>
  • Loading branch information
hegner and tmadlener committed Jun 8, 2023
1 parent e7e70c6 commit ac086e0
Show file tree
Hide file tree
Showing 30 changed files with 691 additions and 71 deletions.
4 changes: 2 additions & 2 deletions include/podio/CollectionBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ class CollectionBase {
virtual bool setReferences(const ICollectionProvider* collectionProvider) = 0;

/// set collection ID
virtual void setID(unsigned id) = 0;
virtual void setID(uint32_t id) = 0;

/// get collection ID
virtual unsigned getID() const = 0;
virtual uint32_t getID() const = 0;

/// Get the collection buffers for this collection
virtual podio::CollectionWriteBuffers getBuffers() = 0;
Expand Down
15 changes: 8 additions & 7 deletions include/podio/CollectionIDTable.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef PODIO_COLLECTIONIDTABLE_H
#define PODIO_COLLECTIONIDTABLE_H

#include <cstdint>
#include <memory>
#include <mutex>
#include <string>
Expand All @@ -20,15 +21,15 @@ class CollectionIDTable {
CollectionIDTable& operator=(CollectionIDTable&&) = default;

/// constructor from existing ID:name mapping
CollectionIDTable(std::vector<int>&& ids, std::vector<std::string>&& names);
CollectionIDTable(std::vector<uint32_t>&& ids, std::vector<std::string>&& names);

CollectionIDTable(const std::vector<int>& ids, const std::vector<std::string>& names);
CollectionIDTable(const std::vector<uint32_t>& ids, const std::vector<std::string>& names);

/// return collection ID for given name
int collectionID(const std::string& name) const;
uint32_t collectionID(const std::string& name) const;

/// return name for given collection ID
const std::string name(int collectionID) const;
const std::string name(uint32_t collectionID) const;

/// Check if collection name is known
bool present(const std::string& name) const;
Expand All @@ -39,13 +40,13 @@ class CollectionIDTable {
};

/// return the ids
const std::vector<int>& ids() const {
const std::vector<uint32_t>& ids() const {
return m_collectionIDs;
}

/// register new name to the table
/// returns assigned collection ID
int add(const std::string& name);
uint32_t add(const std::string& name);

/// Prints collection information
void print() const;
Expand All @@ -56,7 +57,7 @@ class CollectionIDTable {
}

private:
std::vector<int> m_collectionIDs{};
std::vector<uint32_t> m_collectionIDs{};
std::vector<std::string> m_names{};
mutable std::unique_ptr<std::mutex> m_mutex{nullptr};
};
Expand Down
12 changes: 3 additions & 9 deletions include/podio/EventStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,8 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro
template <typename T>
bool get(const std::string& name, const T*& collection);

/// fast access to cached collections
CollectionBase* getFast(int id) const {
return (m_cachedCollections.size() > (unsigned)id ? m_cachedCollections[id] : nullptr);
}

/// access a collection by ID. returns true if successful
bool get(int id, CollectionBase*& coll) const final;
bool get(uint32_t id, CollectionBase*& coll) const final;

/// access a collection by name
/// returns a collection w/ setting isValid to true if successful
Expand Down Expand Up @@ -96,7 +91,7 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro
GenericParameters& getRunMetaData(int runID) override;

/// return the collection meta data for the given colID
GenericParameters& getCollectionMetaData(int colID) override;
GenericParameters& getCollectionMetaData(uint32_t colID) override;

RunMDMap* getRunMetaDataMap() {
return &m_runMDMap;
Expand All @@ -118,9 +113,8 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro
}

// members
mutable std::set<int> m_retrievedIDs{};
mutable std::set<uint32_t> m_retrievedIDs{};
mutable CollContainer m_collections{};
mutable std::vector<CollectionBase*> m_cachedCollections{};
IReader* m_reader{nullptr};
std::shared_ptr<CollectionIDTable> m_table;

Expand Down
11 changes: 7 additions & 4 deletions include/podio/Frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <mutex>
#include <optional>
#include <set>
#include <stdexcept>
#include <string>
#include <type_traits>
#include <unordered_map>
Expand Down Expand Up @@ -120,7 +121,7 @@ class Frame {
return *m_parameters;
};

bool get(int collectionID, podio::CollectionBase*& collection) const override;
bool get(uint32_t collectionID, podio::CollectionBase*& collection) const override;

podio::CollectionIDTable getIDTable() const override {
// Make a copy
Expand All @@ -140,8 +141,8 @@ class Frame {
mutable std::unique_ptr<std::mutex> m_dataMtx{nullptr}; ///< The mutex for guarding the raw data
podio::CollectionIDTable m_idTable{}; ///< The collection ID table
std::unique_ptr<podio::GenericParameters> m_parameters{nullptr}; ///< The generic parameter store for this frame
mutable std::set<int> m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet put
///< into the map)
mutable std::set<uint32_t> m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet
///< put into the map)
};

std::unique_ptr<FrameConcept> m_self; ///< The internal concept pointer through which all the work is done
Expand Down Expand Up @@ -386,7 +387,7 @@ podio::CollectionBase* Frame::FrameModel<FrameDataT>::doGet(const std::string& n
}

template <typename FrameDataT>
bool Frame::FrameModel<FrameDataT>::get(int collectionID, CollectionBase*& collection) const {
bool Frame::FrameModel<FrameDataT>::get(uint32_t collectionID, CollectionBase*& collection) const {
const auto& name = m_idTable.name(collectionID);
const auto& [_, inserted] = m_retrievedIDs.insert(collectionID);

Expand Down Expand Up @@ -420,6 +421,8 @@ const podio::CollectionBase* Frame::FrameModel<FrameDataT>::put(std::unique_ptr<
// collisions from collections that are potentially present from rawdata?
it->second->setID(m_idTable.add(name));
return it->second.get();
} else {
throw std::invalid_argument("An object with key " + name + " already exists in the frame");
}
}

Expand Down
4 changes: 3 additions & 1 deletion include/podio/ICollectionProvider.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef PODIO_ICOLLECTIONPROVIDER_H
#define PODIO_ICOLLECTIONPROVIDER_H

#include <cstdint>

namespace podio {

class CollectionBase;
Expand All @@ -10,7 +12,7 @@ class ICollectionProvider {
/// destructor
virtual ~ICollectionProvider() = default;
/// access a collection by ID. returns true if successful
virtual bool get(int collectionID, CollectionBase*& collection) const = 0;
virtual bool get(uint32_t collectionID, CollectionBase*& collection) const = 0;
};

} // namespace podio
Expand Down
2 changes: 1 addition & 1 deletion include/podio/IMetaDataProvider.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DEPR_EVTSTORE IMetaDataProvider {
virtual GenericParameters& getRunMetaData(int runID) = 0;

/// return the collection meta data for the given colID
virtual GenericParameters& getCollectionMetaData(int colID) = 0;
virtual GenericParameters& getCollectionMetaData(uint32_t colID) = 0;
};

} // namespace podio
Expand Down
4 changes: 3 additions & 1 deletion include/podio/ObjectID.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef PODIO_OBJECTID_H
#define PODIO_OBJECTID_H

#include <cstdint>

namespace podio {

class ObjectID {
Expand All @@ -9,7 +11,7 @@ class ObjectID {
/// index of object in collection
int index;
/// ID of the collection
int collectionID;
uint32_t collectionID;

/// not part of a collection
static const int untracked = -1;
Expand Down
2 changes: 1 addition & 1 deletion include/podio/ROOTFrameWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class ROOTFrameWriter {

// collectionID, collectionType, subsetCollection
// NOTE: same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<int, std::string, bool, unsigned int>;
using CollectionInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;

/**
* Helper struct to group together all necessary state to write / process a
Expand Down
2 changes: 1 addition & 1 deletion include/podio/ROOTLegacyReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class ROOTLegacyReader {
private:
std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);

void createCollectionBranches(const std::vector<std::tuple<int, std::string, bool, unsigned int>>& collInfo);
void createCollectionBranches(const std::vector<std::tuple<uint32_t, std::string, bool, unsigned int>>& collInfo);

podio::GenericParameters readEventMetaData();

Expand Down
2 changes: 1 addition & 1 deletion include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class ROOTReader : public IReader {
std::map<int, GenericParameters>* readRunMetaData() override;

private:
void createCollectionBranches(const std::vector<std::tuple<int, std::string, bool, unsigned int>>& collInfo);
void createCollectionBranches(const std::vector<std::tuple<uint32_t, std::string, bool, unsigned int>>& collInfo);

std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);
// Information about the data vector as wall as the collection class type
Expand Down
6 changes: 3 additions & 3 deletions include/podio/SIOBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ class SIOCollectionIDTableBlock : public sio::block {

SIOCollectionIDTableBlock(podio::EventStore* store);

SIOCollectionIDTableBlock(std::vector<std::string>&& names, std::vector<int>&& ids, std::vector<std::string>&& types,
std::vector<short>&& isSubsetColl) :
SIOCollectionIDTableBlock(std::vector<std::string>&& names, std::vector<uint32_t>&& ids,
std::vector<std::string>&& types, std::vector<short>&& isSubsetColl) :
sio::block("CollectionIDs", sio::version::encode_version(0, 3)),
_names(std::move(names)),
_ids(std::move(ids)),
Expand All @@ -131,7 +131,7 @@ class SIOCollectionIDTableBlock : public sio::block {

private:
std::vector<std::string> _names{};
std::vector<int> _ids{};
std::vector<uint32_t> _ids{};
std::vector<std::string> _types{};
std::vector<short> _isSubsetColl{};
};
Expand Down
6 changes: 3 additions & 3 deletions include/podio/UserDataCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class UserDataCollection : public CollectionBase {
// simpler move-semantics this will be set and properly initialized on
// demand during the call to getBuffers
std::vector<BasicType>* _vecPtr{nullptr};
int m_collectionID{0};
uint32_t m_collectionID{0};
CollRefCollection m_refCollections{};
VectorMembersInfo m_vecmem_info{};

Expand Down Expand Up @@ -107,12 +107,12 @@ class UserDataCollection : public CollectionBase {
}

/// set collection ID
void setID(unsigned id) override {
void setID(uint32_t id) override {
m_collectionID = id;
}

/// get collection ID
unsigned getID() const override {
uint32_t getID() const override {
return m_collectionID;
}

Expand Down
10 changes: 5 additions & 5 deletions python/templates/Collection.h.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public:
{{ class.bare_type }}Collection({{ class.bare_type }}Collection&&) = default;
{{ class.bare_type }}Collection& operator=({{ class.bare_type }}Collection&&) = default;

// {{ class.bare_type }}Collection({{ class.bare_type }}Vector* data, int collectionID);
// {{ class.bare_type }}Collection({{ class.bare_type }}Vector* data, uint32_t collectionID);
~{{ class.bare_type }}Collection();

void clear() final;
Expand Down Expand Up @@ -116,17 +116,17 @@ public:
/// Get the collection buffers for this collection
podio::CollectionWriteBuffers getBuffers() final;

void setID(unsigned ID) final {
void setID(uint32_t ID) final {
m_collectionID = ID;
if (!m_isSubsetColl) {
std::for_each(m_storage.entries.begin(), m_storage.entries.end(),
[ID] ({{ class.bare_type }}Obj* obj) { obj->id = {obj->id.index, static_cast<int>(ID)}; }
[ID] ({{ class.bare_type }}Obj* obj) { obj->id = {obj->id.index, static_cast<uint32_t>(ID)}; }
);
}
m_isValid = true;
};

unsigned getID() const final {
uint32_t getID() const final {
return m_collectionID;
}

Expand Down Expand Up @@ -163,7 +163,7 @@ private:
bool m_isValid{false};
mutable bool m_isPrepared{false};
bool m_isSubsetColl{false};
int m_collectionID{0};
uint32_t m_collectionID{0};
mutable std::unique_ptr<std::mutex> m_storageMtx{nullptr};
mutable {{ class.bare_type }}CollectionData m_storage{};
};
Expand Down
2 changes: 1 addition & 1 deletion python/templates/CollectionData.cc.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ const auto {{ member.name }}_size = std::accumulate(entries.begin(), entries.end
{% endfor %}
}

void {{ class_type }}::prepareAfterRead(int collectionID) {
void {{ class_type }}::prepareAfterRead(uint32_t collectionID) {
int index = 0;
for (auto& data : *m_data) {
auto obj = new {{ class.bare_type }}Obj({index, collectionID}, data);
Expand Down
2 changes: 1 addition & 1 deletion python/templates/CollectionData.h.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public:

void prepareForWrite(bool isSubsetColl);

void prepareAfterRead(int collectionID);
void prepareAfterRead(uint32_t collectionID);

void makeSubsetCollection();

Expand Down
4 changes: 2 additions & 2 deletions python/templates/Obj.cc.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
{{ utils.namespace_open(class.namespace) }}
{% with obj_type = class.bare_type + 'Obj' %}
{{ obj_type }}::{{ obj_type }}() :
{% raw %} ObjBase{{podio::ObjectID::untracked, podio::ObjectID::untracked}, 0}{% endraw %},
{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %},
data(){{ single_relations_initialize(OneToOneRelations) }}
{%- for relation in OneToManyRelations + VectorMembers %},
m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>())
Expand All @@ -29,7 +29,7 @@
{ }

{{ obj_type }}::{{ obj_type }}(const {{ obj_type }}& other) :
{% raw %} ObjBase{{podio::ObjectID::untracked, podio::ObjectID::untracked}, 0}{% endraw %},
{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %},
data(other.data){{ single_relations_initialize(OneToOneRelations) }}
{%- for relation in OneToManyRelations + VectorMembers %},
m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>(*(other.m_{{ relation.name }})))
Expand Down
2 changes: 1 addition & 1 deletion python/templates/macros/collections.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ std::vector<{{ member.full_type }}> {{ class.bare_type }}Collection::{{ member.n
if (obj->m_{{ relation.name }}) {
m_refCollections[{{ real_index }}]->emplace_back(obj->m_{{ relation.name }}->getObjectID());
} else {
m_refCollections[{{ real_index }}]->push_back({podio::ObjectID::invalid, podio::ObjectID::invalid});
m_refCollections[{{ real_index }}]->push_back({podio::ObjectID::invalid, 0});
}
}
{% endmacro %}
Expand Down
2 changes: 1 addition & 1 deletion python/templates/macros/implementations.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ const podio::ObjectID {{ full_type }}::getObjectID() const {
if (m_obj) {
return m_obj->id;
}
return podio::ObjectID{podio::ObjectID::invalid, podio::ObjectID::invalid};
return podio::ObjectID{podio::ObjectID::invalid, 0};
}

{% set inverse_type = class.bare_type if prefix else 'Mutable' + class.bare_type %}
Expand Down
8 changes: 8 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ SET(core_sources
DatamodelRegistryIOHelpers.cc
UserDataCollection.cc
CollectionBufferFactory.cc
MurmurHash3.cpp
)

SET(core_headers
Expand Down Expand Up @@ -164,3 +165,10 @@ if (ENABLE_SIO)
DESTINATION "${CMAKE_INSTALL_LIBDIR}"
)
endif()

add_executable(podio_test_hashes test_hashes.cpp)
target_link_libraries(podio_test_hashes PRIVATE podio::podio)
install(TARGETS podio_test_hashes
EXPORT podioTargets
DESTINATION "${CMAKE_INSTALL_BINDIR}"
)
Loading

0 comments on commit ac086e0

Please sign in to comment.