diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index 45f179e96..d502c124b 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -41,10 +41,10 @@ class CollectionBase { virtual bool setReferences(const ICollectionProvider* collectionProvider) = 0; /// set collection ID - virtual void setID(unsigned id) = 0; + virtual void setID(uint32_t id) = 0; /// get collection ID - virtual unsigned getID() const = 0; + virtual uint32_t getID() const = 0; /// Get the collection buffers for this collection virtual podio::CollectionWriteBuffers getBuffers() = 0; diff --git a/include/podio/CollectionIDTable.h b/include/podio/CollectionIDTable.h index 47d51251b..39b947c2e 100644 --- a/include/podio/CollectionIDTable.h +++ b/include/podio/CollectionIDTable.h @@ -1,6 +1,7 @@ #ifndef PODIO_COLLECTIONIDTABLE_H #define PODIO_COLLECTIONIDTABLE_H +#include #include #include #include @@ -20,15 +21,15 @@ class CollectionIDTable { CollectionIDTable& operator=(CollectionIDTable&&) = default; /// constructor from existing ID:name mapping - CollectionIDTable(std::vector&& ids, std::vector&& names); + CollectionIDTable(std::vector&& ids, std::vector&& names); - CollectionIDTable(const std::vector& ids, const std::vector& names); + CollectionIDTable(const std::vector& ids, const std::vector& names); /// return collection ID for given name - int collectionID(const std::string& name) const; + uint32_t collectionID(const std::string& name) const; /// return name for given collection ID - const std::string name(int collectionID) const; + const std::string name(uint32_t collectionID) const; /// Check if collection name is known bool present(const std::string& name) const; @@ -39,13 +40,13 @@ class CollectionIDTable { }; /// return the ids - const std::vector& ids() const { + const std::vector& ids() const { return m_collectionIDs; } /// register new name to the table /// returns assigned collection ID - int add(const std::string& name); + uint32_t add(const std::string& name); /// Prints collection information void print() const; @@ -56,7 +57,7 @@ class CollectionIDTable { } private: - std::vector m_collectionIDs{}; + std::vector m_collectionIDs{}; std::vector m_names{}; mutable std::unique_ptr m_mutex{nullptr}; }; diff --git a/include/podio/EventStore.h b/include/podio/EventStore.h index f8ee70dc4..05ae58b38 100644 --- a/include/podio/EventStore.h +++ b/include/podio/EventStore.h @@ -58,13 +58,8 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro template bool get(const std::string& name, const T*& collection); - /// fast access to cached collections - CollectionBase* getFast(int id) const { - return (m_cachedCollections.size() > (unsigned)id ? m_cachedCollections[id] : nullptr); - } - /// access a collection by ID. returns true if successful - bool get(int id, CollectionBase*& coll) const final; + bool get(uint32_t id, CollectionBase*& coll) const final; /// access a collection by name /// returns a collection w/ setting isValid to true if successful @@ -96,7 +91,7 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro GenericParameters& getRunMetaData(int runID) override; /// return the collection meta data for the given colID - GenericParameters& getCollectionMetaData(int colID) override; + GenericParameters& getCollectionMetaData(uint32_t colID) override; RunMDMap* getRunMetaDataMap() { return &m_runMDMap; @@ -118,9 +113,8 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro } // members - mutable std::set m_retrievedIDs{}; + mutable std::set m_retrievedIDs{}; mutable CollContainer m_collections{}; - mutable std::vector m_cachedCollections{}; IReader* m_reader{nullptr}; std::shared_ptr m_table; diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 4943c4160..56ffe9cc2 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,7 @@ class Frame { return *m_parameters; }; - bool get(int collectionID, podio::CollectionBase*& collection) const override; + bool get(uint32_t collectionID, podio::CollectionBase*& collection) const override; podio::CollectionIDTable getIDTable() const override { // Make a copy @@ -140,8 +141,8 @@ class Frame { mutable std::unique_ptr m_dataMtx{nullptr}; ///< The mutex for guarding the raw data podio::CollectionIDTable m_idTable{}; ///< The collection ID table std::unique_ptr m_parameters{nullptr}; ///< The generic parameter store for this frame - mutable std::set m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet put - ///< into the map) + mutable std::set m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet + ///< put into the map) }; std::unique_ptr m_self; ///< The internal concept pointer through which all the work is done @@ -386,7 +387,7 @@ podio::CollectionBase* Frame::FrameModel::doGet(const std::string& n } template -bool Frame::FrameModel::get(int collectionID, CollectionBase*& collection) const { +bool Frame::FrameModel::get(uint32_t collectionID, CollectionBase*& collection) const { const auto& name = m_idTable.name(collectionID); const auto& [_, inserted] = m_retrievedIDs.insert(collectionID); @@ -420,6 +421,8 @@ const podio::CollectionBase* Frame::FrameModel::put(std::unique_ptr< // collisions from collections that are potentially present from rawdata? it->second->setID(m_idTable.add(name)); return it->second.get(); + } else { + throw std::invalid_argument("An object with key " + name + " already exists in the frame"); } } diff --git a/include/podio/ICollectionProvider.h b/include/podio/ICollectionProvider.h index 1d5bfc53f..3a724f26a 100644 --- a/include/podio/ICollectionProvider.h +++ b/include/podio/ICollectionProvider.h @@ -1,6 +1,8 @@ #ifndef PODIO_ICOLLECTIONPROVIDER_H #define PODIO_ICOLLECTIONPROVIDER_H +#include + namespace podio { class CollectionBase; @@ -10,7 +12,7 @@ class ICollectionProvider { /// destructor virtual ~ICollectionProvider() = default; /// access a collection by ID. returns true if successful - virtual bool get(int collectionID, CollectionBase*& collection) const = 0; + virtual bool get(uint32_t collectionID, CollectionBase*& collection) const = 0; }; } // namespace podio diff --git a/include/podio/IMetaDataProvider.h b/include/podio/IMetaDataProvider.h index 20f1941b0..b8c662277 100644 --- a/include/podio/IMetaDataProvider.h +++ b/include/podio/IMetaDataProvider.h @@ -23,7 +23,7 @@ class DEPR_EVTSTORE IMetaDataProvider { virtual GenericParameters& getRunMetaData(int runID) = 0; /// return the collection meta data for the given colID - virtual GenericParameters& getCollectionMetaData(int colID) = 0; + virtual GenericParameters& getCollectionMetaData(uint32_t colID) = 0; }; } // namespace podio diff --git a/include/podio/ObjectID.h b/include/podio/ObjectID.h index fc6037c47..4347a5ba9 100644 --- a/include/podio/ObjectID.h +++ b/include/podio/ObjectID.h @@ -1,6 +1,8 @@ #ifndef PODIO_OBJECTID_H #define PODIO_OBJECTID_H +#include + namespace podio { class ObjectID { @@ -9,7 +11,7 @@ class ObjectID { /// index of object in collection int index; /// ID of the collection - int collectionID; + uint32_t collectionID; /// not part of a collection static const int untracked = -1; diff --git a/include/podio/ROOTFrameWriter.h b/include/podio/ROOTFrameWriter.h index 3b0fde4ba..535b84025 100644 --- a/include/podio/ROOTFrameWriter.h +++ b/include/podio/ROOTFrameWriter.h @@ -54,7 +54,7 @@ class ROOTFrameWriter { // collectionID, collectionType, subsetCollection // NOTE: same as in rootUtils.h private header! - using CollectionInfoT = std::tuple; + using CollectionInfoT = std::tuple; /** * Helper struct to group together all necessary state to write / process a diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index 4b52b91c6..2a2a8b621 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -91,7 +91,7 @@ class ROOTLegacyReader { private: std::pair getLocalTreeAndEntry(const std::string& treename); - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector>& collInfo); podio::GenericParameters readEventMetaData(); diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index 03a5d5557..757791cf0 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -85,7 +85,7 @@ class ROOTReader : public IReader { std::map* readRunMetaData() override; private: - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector>& collInfo); std::pair getLocalTreeAndEntry(const std::string& treename); // Information about the data vector as wall as the collection class type diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 5834a9b5b..e7c917d27 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -104,8 +104,8 @@ class SIOCollectionIDTableBlock : public sio::block { SIOCollectionIDTableBlock(podio::EventStore* store); - SIOCollectionIDTableBlock(std::vector&& names, std::vector&& ids, std::vector&& types, - std::vector&& isSubsetColl) : + SIOCollectionIDTableBlock(std::vector&& names, std::vector&& ids, + std::vector&& types, std::vector&& isSubsetColl) : sio::block("CollectionIDs", sio::version::encode_version(0, 3)), _names(std::move(names)), _ids(std::move(ids)), @@ -131,7 +131,7 @@ class SIOCollectionIDTableBlock : public sio::block { private: std::vector _names{}; - std::vector _ids{}; + std::vector _ids{}; std::vector _types{}; std::vector _isSubsetColl{}; }; diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index 4fe575996..dcf726806 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -75,7 +75,7 @@ class UserDataCollection : public CollectionBase { // simpler move-semantics this will be set and properly initialized on // demand during the call to getBuffers std::vector* _vecPtr{nullptr}; - int m_collectionID{0}; + uint32_t m_collectionID{0}; CollRefCollection m_refCollections{}; VectorMembersInfo m_vecmem_info{}; @@ -107,12 +107,12 @@ class UserDataCollection : public CollectionBase { } /// set collection ID - void setID(unsigned id) override { + void setID(uint32_t id) override { m_collectionID = id; } /// get collection ID - unsigned getID() const override { + uint32_t getID() const override { return m_collectionID; } diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index b70e93b12..cbc3945ba 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -59,7 +59,7 @@ public: {{ class.bare_type }}Collection({{ class.bare_type }}Collection&&) = default; {{ class.bare_type }}Collection& operator=({{ class.bare_type }}Collection&&) = default; -// {{ class.bare_type }}Collection({{ class.bare_type }}Vector* data, int collectionID); +// {{ class.bare_type }}Collection({{ class.bare_type }}Vector* data, uint32_t collectionID); ~{{ class.bare_type }}Collection(); void clear() final; @@ -116,17 +116,17 @@ public: /// Get the collection buffers for this collection podio::CollectionWriteBuffers getBuffers() final; - void setID(unsigned ID) final { + void setID(uint32_t ID) final { m_collectionID = ID; if (!m_isSubsetColl) { std::for_each(m_storage.entries.begin(), m_storage.entries.end(), - [ID] ({{ class.bare_type }}Obj* obj) { obj->id = {obj->id.index, static_cast(ID)}; } + [ID] ({{ class.bare_type }}Obj* obj) { obj->id = {obj->id.index, static_cast(ID)}; } ); } m_isValid = true; }; - unsigned getID() const final { + uint32_t getID() const final { return m_collectionID; } @@ -163,7 +163,7 @@ private: bool m_isValid{false}; mutable bool m_isPrepared{false}; bool m_isSubsetColl{false}; - int m_collectionID{0}; + uint32_t m_collectionID{0}; mutable std::unique_ptr m_storageMtx{nullptr}; mutable {{ class.bare_type }}CollectionData m_storage{}; }; diff --git a/python/templates/CollectionData.cc.jinja2 b/python/templates/CollectionData.cc.jinja2 index ddc6f29dd..3ae5d3a80 100644 --- a/python/templates/CollectionData.cc.jinja2 +++ b/python/templates/CollectionData.cc.jinja2 @@ -139,7 +139,7 @@ const auto {{ member.name }}_size = std::accumulate(entries.begin(), entries.end {% endfor %} } -void {{ class_type }}::prepareAfterRead(int collectionID) { +void {{ class_type }}::prepareAfterRead(uint32_t collectionID) { int index = 0; for (auto& data : *m_data) { auto obj = new {{ class.bare_type }}Obj({index, collectionID}, data); diff --git a/python/templates/CollectionData.h.jinja2 b/python/templates/CollectionData.h.jinja2 index 50ae8dd02..0eee7dabe 100644 --- a/python/templates/CollectionData.h.jinja2 +++ b/python/templates/CollectionData.h.jinja2 @@ -65,7 +65,7 @@ public: void prepareForWrite(bool isSubsetColl); - void prepareAfterRead(int collectionID); + void prepareAfterRead(uint32_t collectionID); void makeSubsetCollection(); diff --git a/python/templates/Obj.cc.jinja2 b/python/templates/Obj.cc.jinja2 index 39829310c..58574057d 100644 --- a/python/templates/Obj.cc.jinja2 +++ b/python/templates/Obj.cc.jinja2 @@ -16,7 +16,7 @@ {{ utils.namespace_open(class.namespace) }} {% with obj_type = class.bare_type + 'Obj' %} {{ obj_type }}::{{ obj_type }}() : -{% raw %} ObjBase{{podio::ObjectID::untracked, podio::ObjectID::untracked}, 0}{% endraw %}, +{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %}, data(){{ single_relations_initialize(OneToOneRelations) }} {%- for relation in OneToManyRelations + VectorMembers %}, m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>()) @@ -29,7 +29,7 @@ { } {{ obj_type }}::{{ obj_type }}(const {{ obj_type }}& other) : -{% raw %} ObjBase{{podio::ObjectID::untracked, podio::ObjectID::untracked}, 0}{% endraw %}, +{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %}, data(other.data){{ single_relations_initialize(OneToOneRelations) }} {%- for relation in OneToManyRelations + VectorMembers %}, m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>(*(other.m_{{ relation.name }}))) diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index f91dfbedf..d07abad19 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -48,7 +48,7 @@ std::vector<{{ member.full_type }}> {{ class.bare_type }}Collection::{{ member.n if (obj->m_{{ relation.name }}) { m_refCollections[{{ real_index }}]->emplace_back(obj->m_{{ relation.name }}->getObjectID()); } else { - m_refCollections[{{ real_index }}]->push_back({podio::ObjectID::invalid, podio::ObjectID::invalid}); + m_refCollections[{{ real_index }}]->push_back({podio::ObjectID::invalid, 0}); } } {% endmacro %} diff --git a/python/templates/macros/implementations.jinja2 b/python/templates/macros/implementations.jinja2 index 821056fba..0ddb29e39 100644 --- a/python/templates/macros/implementations.jinja2 +++ b/python/templates/macros/implementations.jinja2 @@ -164,7 +164,7 @@ const podio::ObjectID {{ full_type }}::getObjectID() const { if (m_obj) { return m_obj->id; } - return podio::ObjectID{podio::ObjectID::invalid, podio::ObjectID::invalid}; + return podio::ObjectID{podio::ObjectID::invalid, 0}; } {% set inverse_type = class.bare_type if prefix else 'Mutable' + class.bare_type %} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ae5f0f984..7f71864e3 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ SET(core_sources DatamodelRegistryIOHelpers.cc UserDataCollection.cc CollectionBufferFactory.cc + MurmurHash3.cpp ) SET(core_headers @@ -164,3 +165,10 @@ if (ENABLE_SIO) DESTINATION "${CMAKE_INSTALL_LIBDIR}" ) endif() + +add_executable(podio_test_hashes test_hashes.cpp) +target_link_libraries(podio_test_hashes PRIVATE podio::podio) +install(TARGETS podio_test_hashes + EXPORT podioTargets + DESTINATION "${CMAKE_INSTALL_BINDIR}" +) diff --git a/src/CollectionIDTable.cc b/src/CollectionIDTable.cc index e5ca3d15d..fa97a4bbb 100644 --- a/src/CollectionIDTable.cc +++ b/src/CollectionIDTable.cc @@ -3,27 +3,29 @@ #include #include +#include "MurmurHash3.h" + namespace podio { CollectionIDTable::CollectionIDTable() : m_mutex(std::make_unique()) { } -CollectionIDTable::CollectionIDTable(std::vector&& ids, std::vector&& names) : +CollectionIDTable::CollectionIDTable(std::vector&& ids, std::vector&& names) : m_collectionIDs(std::move(ids)), m_names(std::move(names)), m_mutex(std::make_unique()) { } -CollectionIDTable::CollectionIDTable(const std::vector& ids, const std::vector& names) : +CollectionIDTable::CollectionIDTable(const std::vector& ids, const std::vector& names) : m_collectionIDs(ids), m_names(names), m_mutex(std::make_unique()) { } -const std::string CollectionIDTable::name(int ID) const { +const std::string CollectionIDTable::name(uint32_t ID) const { std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_collectionIDs), end(m_collectionIDs), ID); const auto index = std::distance(m_collectionIDs.begin(), result); return m_names[index]; } -int CollectionIDTable::collectionID(const std::string& name) const { +uint32_t CollectionIDTable::collectionID(const std::string& name) const { std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_names), end(m_names), name); const auto index = std::distance(m_names.begin(), result); @@ -44,13 +46,13 @@ bool CollectionIDTable::present(const std::string& name) const { return result != end(m_names); } -int CollectionIDTable::add(const std::string& name) { +uint32_t CollectionIDTable::add(const std::string& name) { std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_names), end(m_names), name); - int ID = 0; + uint32_t ID = 0; if (result == m_names.end()) { m_names.emplace_back(name); - ID = m_names.size(); + MurmurHash3_x86_32(name.c_str(), name.size(), 0, &ID); m_collectionIDs.emplace_back(ID); } else { const auto index = std::distance(m_names.begin(), result); diff --git a/src/EventStore.cc b/src/EventStore.cc index 85dfb3a09..947d83fff 100644 --- a/src/EventStore.cc +++ b/src/EventStore.cc @@ -7,7 +7,6 @@ namespace podio { EventStore::EventStore() : m_table(new CollectionIDTable()) { - m_cachedCollections.resize(128); // allow for a sufficiently large initial number of collections } EventStore::~EventStore() { @@ -16,24 +15,13 @@ EventStore::~EventStore() { } } -bool EventStore::get(int id, CollectionBase*& collection) const { - // see if we have a cached collection - if ((collection = getFast(id)) != nullptr) { - return true; - } - +bool EventStore::get(uint32_t id, CollectionBase*& collection) const { auto val = m_retrievedIDs.insert(id); bool success = false; if (val.second == true) { // collection not yet retrieved in recursive-call auto name = m_table->name(id); success = doGet(name, collection, true); - if (collection != nullptr) { // cache the collection for faster retreaval later - if (m_cachedCollections.size() < (unsigned)id + 1) { - m_cachedCollections.resize(id + 1); - } - m_cachedCollections[id] = collection; - } } else { // collection already requested in recursive call // do not set the references to break collection dependency-cycle @@ -106,7 +94,7 @@ GenericParameters& EventStore::getRunMetaData(int runID) { return m_runMDMap[runID]; } -GenericParameters& EventStore::getCollectionMetaData(int colID) { +GenericParameters& EventStore::getCollectionMetaData(uint32_t colID) { if (m_colMDMap.empty() && m_reader != nullptr) { ColMDMap* tmp = m_reader->readCollectionMetaData(); @@ -135,8 +123,6 @@ void EventStore::clear() { void EventStore::clearCaches() { m_collections.clear(); - m_cachedCollections.clear(); - m_cachedCollections.resize(128); m_retrievedIDs.clear(); } diff --git a/src/MurmurHash3.cpp b/src/MurmurHash3.cpp new file mode 100644 index 000000000..a782eeeaa --- /dev/null +++ b/src/MurmurHash3.cpp @@ -0,0 +1,442 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + + #define FORCE_INLINE __forceinline + + #include + + #define ROTL32(x, y) _rotl(x, y) + #define ROTL64(x, y) _rotl64(x, y) + + #define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + + #define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint32_t rotl32(uint32_t x, int8_t r) { + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64(uint64_t x, int8_t r) { + return (x << r) | (x >> (64 - r)); +} + + #define ROTL32(x, y) rotl32(x, y) + #define ROTL64(x, y) rotl64(x, y) + + #define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { + return p[i]; +} + +FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { + return p[i]; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix32(uint32_t h) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +FORCE_INLINE uint64_t fmix64(uint64_t k) { + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 4); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = getblock32(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_128(const void* key, const int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 16); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = getblock32(blocks, i * 4 + 0); + uint32_t k2 = getblock32(blocks, i * 4 + 1); + uint32_t k3 = getblock32(blocks, i * 4 + 2); + uint32_t k4 = getblock32(blocks, i * 4 + 3); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + + h1 = ROTL32(h1, 19); + h1 += h2; + h1 = h1 * 5 + 0x561ccd1b; + + k2 *= c2; + k2 = ROTL32(k2, 16); + k2 *= c3; + h2 ^= k2; + + h2 = ROTL32(h2, 17); + h2 += h3; + h2 = h2 * 5 + 0x0bcaa747; + + k3 *= c3; + k3 = ROTL32(k3, 17); + k3 *= c4; + h3 ^= k3; + + h3 = ROTL32(h3, 15); + h3 += h4; + h3 = h3 * 5 + 0x96cd1c35; + + k4 *= c4; + k4 = ROTL32(k4, 18); + k4 *= c1; + h4 ^= k4; + + h4 = ROTL32(h4, 13); + h4 += h1; + h4 = h4 * 5 + 0x32ac3b17; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 16); + + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: + k4 ^= tail[14] << 16; + [[fallthrough]]; + case 14: + k4 ^= tail[13] << 8; + [[fallthrough]]; + case 13: + k4 ^= tail[12] << 0; + k4 *= c4; + k4 = ROTL32(k4, 18); + k4 *= c1; + h4 ^= k4; + [[fallthrough]]; + + case 12: + k3 ^= tail[11] << 24; + [[fallthrough]]; + case 11: + k3 ^= tail[10] << 16; + [[fallthrough]]; + case 10: + k3 ^= tail[9] << 8; + [[fallthrough]]; + case 9: + k3 ^= tail[8] << 0; + k3 *= c3; + k3 = ROTL32(k3, 17); + k3 *= c4; + h3 ^= k3; + [[fallthrough]]; + + case 8: + k2 ^= tail[7] << 24; + [[fallthrough]]; + case 7: + k2 ^= tail[6] << 16; + [[fallthrough]]; + case 6: + k2 ^= tail[5] << 8; + [[fallthrough]]; + case 5: + k2 ^= tail[4] << 0; + k2 *= c2; + k2 = ROTL32(k2, 16); + k2 *= c3; + h2 ^= k2; + [[fallthrough]]; + + case 4: + k1 ^= tail[3] << 24; + [[fallthrough]]; + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0] << 0; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + h3 ^= len; + h4 ^= len; + + h1 += h2; + h1 += h3; + h1 += h4; + h2 += h1; + h3 += h1; + h4 += h1; + + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + + h1 += h2; + h1 += h3; + h1 += h4; + h2 += h1; + h3 += h1; + h4 += h1; + + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128(const void* key, const int len, const uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const auto blocks = (const uint64_t*)(data); + + for (int i = 0; i < nblocks; i++) { + uint64_t k1 = getblock64(blocks, i * 2 + 0); + uint64_t k2 = getblock64(blocks, i * 2 + 1); + + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + + h1 = ROTL64(h1, 27); + h1 += h2; + h1 = h1 * 5 + 0x52dce729; + + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + + h2 = ROTL64(h2, 31); + h2 += h1; + h2 = h2 * 5 + 0x38495ab5; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: + k2 ^= ((uint64_t)tail[14]) << 48; + [[fallthrough]]; + case 14: + k2 ^= ((uint64_t)tail[13]) << 40; + [[fallthrough]]; + case 13: + k2 ^= ((uint64_t)tail[12]) << 32; + [[fallthrough]]; + case 12: + k2 ^= ((uint64_t)tail[11]) << 24; + [[fallthrough]]; + case 11: + k2 ^= ((uint64_t)tail[10]) << 16; + [[fallthrough]]; + case 10: + k2 ^= ((uint64_t)tail[9]) << 8; + [[fallthrough]]; + case 9: + k2 ^= ((uint64_t)tail[8]) << 0; + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + [[fallthrough]]; + + case 8: + k1 ^= ((uint64_t)tail[7]) << 56; + [[fallthrough]]; + case 7: + k1 ^= ((uint64_t)tail[6]) << 48; + [[fallthrough]]; + case 6: + k1 ^= ((uint64_t)tail[5]) << 40; + [[fallthrough]]; + case 5: + k1 ^= ((uint64_t)tail[4]) << 32; + [[fallthrough]]; + case 4: + k1 ^= ((uint64_t)tail[3]) << 24; + [[fallthrough]]; + case 3: + k1 ^= ((uint64_t)tail[2]) << 16; + [[fallthrough]]; + case 2: + k1 ^= ((uint64_t)tail[1]) << 8; + [[fallthrough]]; + case 1: + k1 ^= ((uint64_t)tail[0]) << 0; + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- diff --git a/src/MurmurHash3.h b/src/MurmurHash3.h new file mode 100644 index 000000000..e73990396 --- /dev/null +++ b/src/MurmurHash3.h @@ -0,0 +1,37 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards +#define _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + + // Other compilers + +#else // defined(_MSC_VER) + + #include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out); + +void MurmurHash3_x86_128(const void* key, int len, uint32_t seed, void* out); + +void MurmurHash3_x64_128(const void* key, int len, uint32_t seed, void* out); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ diff --git a/src/rootUtils.h b/src/rootUtils.h index 7c6311c83..507d24b15 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -135,7 +135,7 @@ inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionB // A collection of additional information that describes the collection: the // collectionID, the collection (data) type, whether it is a subset // collection, and its schema version -using CollectionInfoT = std::tuple; +using CollectionInfoT = std::tuple; // for backwards compatibility using CollectionInfoWithoutSchemaT = std::tuple; diff --git a/src/selection.xml b/src/selection.xml index b1a9694c1..4daca9192 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -15,6 +15,9 @@ + + + diff --git a/src/sioUtils.h b/src/sioUtils.h index 204867eaf..82297456b 100644 --- a/src/sioUtils.h +++ b/src/sioUtils.h @@ -46,7 +46,7 @@ namespace sio_utils { subsetColl.reserve(collections.size()); std::vector names; names.reserve(collections.size()); - std::vector ids; + std::vector ids; ids.reserve(collections.size()); for (const auto& [name, coll] : collections) { diff --git a/src/test_hashes.cpp b/src/test_hashes.cpp new file mode 100644 index 000000000..41168ae58 --- /dev/null +++ b/src/test_hashes.cpp @@ -0,0 +1,126 @@ +#include "MurmurHash3.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +auto readCollNames(const std::string& fileName) { + std::vector collNames{}; + + std::ifstream inputFile(fileName); + if (!inputFile.is_open()) { + std::cerr << "Failed to open file \'" << fileName << "\' for reading collection names" << std::endl; + return collNames; + } + + std::string name; + while (inputFile >> name) { + collNames.emplace_back(std::move(name)); + } + + return collNames; +} + +/// Hash all passed strings using the passed in HashFunc with an interface like +/// the MurmurHash3 methods +template +auto hashStrings(const std::vector& strings, HashFunc hashFunc) { + std::vector hashes; + hashes.reserve(strings.size()); + + for (const auto& s : strings) { + HashT id = 0; + hashFunc(s.c_str(), s.size(), 0, &id); + hashes.emplace_back(id); + } + + return hashes; +} + +/// Hash all the passed in strings and check for collisions. Returns a vector of +/// Hashes and the corresponding colliding strings. Empty vector corresponds to +/// no collisions +template +auto getCollisions(const std::vector& strings, HashFunc hashFunc) { + auto hashes = hashStrings(strings, hashFunc); + + // Use a multimap for collision detection + std::multimap hashMap{}; + for (size_t i = 0; i < hashes.size(); ++i) { + hashMap.emplace(hashes[i], strings[i]); + } + + std::vector>> collidingStrings; + auto firstIt = hashMap.begin(); + while (firstIt != hashMap.end()) { + auto rangeIts = hashMap.equal_range(firstIt->first); + if (std::distance(rangeIts.first, rangeIts.second) != 1) { + std::vector names; + names.reserve(2); // Most likely case hopefully + for (auto it = rangeIts.first; it != rangeIts.second; ++it) { + names.emplace_back(it->second); + } + + collidingStrings.emplace_back(rangeIts.first->first, std::move(names)); + } + + firstIt = rangeIts.second; + } + + return collidingStrings; +} + +template +std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + os << '['; + if (!vec.empty()) { + os << vec[0]; + } + for (size_t i = 1; i < vec.size(); ++i) { + os << ", " << vec[i]; + } + return os << ']'; +} + +constexpr static auto usage = R"USAGE(usage: podio_test_hashes [-h] collNameFile)USAGE"; +constexpr static auto help = R"HELP( +Check if any of the collection names provided lead to a collision in the collection IDs + +positional arguments: + collNameFile a text file containing all collection names to be checked + +optional arguments: + -h, --help show this help message and exit +)HELP"; + +int main(int argc, char* argv[]) { + if (argc == 1) { + std::cerr << usage << std::endl; + return 1; + } + if (argc == 2 && (argv[1] == std::string("-h") || argv[1] == std::string("--help"))) { + std::cerr << usage << '\n' << help << std::endl; + return 0; + } + + const auto collNames = readCollNames(argv[1]); + const auto collisions = getCollisions(collNames, MurmurHash3_x86_32); + + if (!collisions.empty()) { + std::cerr << "Found collisions between names" << std::endl; + std::cout << "hash: " << '\n'; + for (const auto& [hash, colls] : collisions) { + std::cout << std::hex << std::setw(8) << std::setfill('0') << hash << ": " << colls << '\n'; + } + + return 1; + } + + return 0; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 344d910db..9683e6d31 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -237,6 +237,8 @@ if (NOT FORCE_RUN_ALL_TESTS) set(filter_tests "~[LEAK-FAIL]") elseif(USE_SANITIZER MATCHES "Thread") set(filter_tests "~[THREAD-FAIL]") + elseif(USE_SANITIZER MATCHES "Undefined") + set(filter_tests "~[UBSAN-FAIL]") endif() endif() diff --git a/tests/frame.cpp b/tests/frame.cpp index 4d36909f1..e52965a07 100644 --- a/tests/frame.cpp +++ b/tests/frame.cpp @@ -365,3 +365,15 @@ TEST_CASE("Frame parameters multithread insert and read", "[frame][basics][multi REQUIRE(frame.getParameter(makeName("string", i)) == std::to_string(i)); } } + +TEST_CASE("Frame double insert", "[frame][basics]") { + auto event = podio::Frame(); + auto clusters = ExampleClusterCollection(); + clusters.create(3.14f); + clusters.create(42.0f); + auto other_clusters = ExampleClusterCollection(); + other_clusters.create(23.0f); + + event.put(std::move(clusters), "clusters"); + REQUIRE_THROWS_AS(event.put(std::move(other_clusters), "clusters"), std::invalid_argument); +} diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 77e14727f..6903a2e2b 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -79,7 +79,7 @@ TEST_CASE("Assignment-operator ref count", "[basics][memory-management]") { } } -TEST_CASE("Clearing", "[ASAN-FAIL][THREAD-FAIL][basics][memory-management]") { +TEST_CASE("Clearing", "[UBSAN-FAIL][ASAN-FAIL][THREAD-FAIL][basics][memory-management]") { bool success = true; auto store = podio::EventStore(); auto& hits = store.create("hits");