Skip to content

Commit

Permalink
Add standalone executable for collision detection
Browse files Browse the repository at this point in the history
  • Loading branch information
tmadlener committed Jun 5, 2023
1 parent 9d1038b commit aee75ee
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,10 @@ if (ENABLE_SIO)
DESTINATION "${CMAKE_INSTALL_LIBDIR}"
)
endif()

add_executable(podio_test_hashes test_hashes.cpp)
target_link_libraries(podio_test_hashes PRIVATE podio::podio)
install(TARGETS podio_test_hashes
EXPORT podioTargets
DESTINATION "${CMAKE_INSTALL_BINDIR}"
)
126 changes: 126 additions & 0 deletions src/test_hashes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include "MurmurHash3.h"

#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <map>
#include <string>
#include <utility>
#include <vector>

auto readCollNames(const std::string& fileName) {
std::vector<std::string> collNames{};

std::ifstream inputFile(fileName);
if (!inputFile.is_open()) {
std::cerr << "Failed to open file \'" << fileName << "\' for reading collection names" << std::endl;
return collNames;
}

std::string name;
while (inputFile >> name) {
collNames.emplace_back(std::move(name));
}

return collNames;
}

/// Hash all passed strings using the passed in HashFunc with an interface like
/// the MurmurHash3 methods
template <typename HashFunc, typename HashT = uint32_t>
auto hashStrings(const std::vector<std::string>& strings, HashFunc hashFunc) {
std::vector<HashT> hashes;
hashes.reserve(strings.size());

for (const auto& s : strings) {
HashT id = 0;
hashFunc(s.c_str(), s.size(), 0, &id);
hashes.emplace_back(id);
}

return hashes;
}

/// Hash all the passed in strings and check for collisions. Returns a vector of
/// Hashes and the corresponding colliding strings. Empty vector corresponds to
/// no collisions
template <typename HashFunc, typename HashT = uint32_t>
auto getCollisions(const std::vector<std::string>& strings, HashFunc hashFunc) {
auto hashes = hashStrings<HashFunc, HashT>(strings, hashFunc);

// Use a multimap for collision detection
std::multimap<HashT, std::string> hashMap{};
for (size_t i = 0; i < hashes.size(); ++i) {
hashMap.emplace(hashes[i], strings[i]);
}

std::vector<std::tuple<HashT, std::vector<std::string>>> collidingStrings;
auto firstIt = hashMap.begin();
while (firstIt != hashMap.end()) {
auto rangeIts = hashMap.equal_range(firstIt->first);
if (std::distance(rangeIts.first, rangeIts.second) != 1) {
std::vector<std::string> names;
names.reserve(2); // Most likely case hopefully
for (auto it = rangeIts.first; it != rangeIts.second; ++it) {
names.emplace_back(it->second);
}

collidingStrings.emplace_back(rangeIts.first->first, std::move(names));
}

firstIt = rangeIts.second;
}

return collidingStrings;
}

template <typename T, typename A>
std::ostream& operator<<(std::ostream& os, const std::vector<T, A>& vec) {
os << '[';
if (!vec.empty()) {
os << vec[0];
}
for (size_t i = 1; i < vec.size(); ++i) {
os << ", " << vec[i];
}
return os << ']';
}

constexpr static auto usage = R"USAGE(usage: podio_test_hashes [-h] collNameFile)USAGE";
constexpr static auto help = R"HELP(
Check if any of the collection names provided lead to a collision in the collection IDs
positional arguments:
collNameFile a text file containing all collection names to be checked
optional arguments:
-h, --help show this help message and exit
)HELP";

int main(int argc, char* argv[]) {
if (argc == 1) {
std::cerr << usage << std::endl;
return 1;
}
if (argc == 2 && (argv[1] == std::string("-h") || argv[1] == std::string("--help"))) {
std::cerr << usage << '\n' << help << std::endl;
return 0;
}

const auto collNames = readCollNames(argv[1]);
const auto collisions = getCollisions(collNames, MurmurHash3_x86_32);

if (!collisions.empty()) {
std::cerr << "Found collisions between names" << std::endl;
std::cout << "hash: " << '\n';
for (const auto& [hash, colls] : collisions) {
std::cout << std::hex << std::setw(8) << std::setfill('0') << hash << ": " << colls << '\n';
}

return 1;
}

return 0;
}

0 comments on commit aee75ee

Please sign in to comment.