Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use bit manipulation with the UniqueIDGenSvc and add a check for repeated numbers #247

Merged
merged 11 commits into from
Oct 15, 2024
52 changes: 36 additions & 16 deletions k4FWCore/components/UniqueIDGenSvc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,47 @@
*/
#include "UniqueIDGenSvc.h"

DECLARE_COMPONENT(UniqueIDGenSvc)
#include <cstdint>
#include <string>

UniqueIDGenSvc::UniqueIDGenSvc(const std::string& name, ISvcLocator* svcLoc) : base_class(name, svcLoc) {}

StatusCode UniqueIDGenSvc::initialize() {
StatusCode sc = Service::initialize();
return sc;
}

const size_t bits32 = std::numeric_limits<uint32_t>::digits;
const size_t bits64 = std::numeric_limits<uint64_t>::digits;
const size_t bitsSizeT = std::numeric_limits<size_t>::digits;
constexpr size_t bits32 = std::numeric_limits<uint32_t>::digits;
constexpr size_t bits64 = std::numeric_limits<uint64_t>::digits;
constexpr size_t bitsSizeT = std::numeric_limits<size_t>::digits;

size_t UniqueIDGenSvc::getUniqueID(uint32_t evt_num, uint32_t run_num, const std::string& name) const {
std::bitset<bits64> seed_bits(this->m_seed);
std::bitset<bits32> event_num_bits(evt_num), run_num_bits(run_num);
size_t str_hash = std::hash<std::string>{}(name);
std::bitset<bitsSizeT> name_bits(str_hash);
std::bitset<bits64> seed_bits = this->m_seed.value();
std::bitset<bits32> event_num_bits = evt_num, run_num_bits = run_num;
size_t str_hash = std::hash<std::string>{}(name);
std::bitset<bitsSizeT> name_bits = str_hash;

std::bitset<bits64 + bits32 + bits32 + bitsSizeT> combined_bits;

std::bitset<bits64 + bits32 + bits32 + bitsSizeT> combined_bits(seed_bits.to_string() + event_num_bits.to_string() +
run_num_bits.to_string() + name_bits.to_string());
for (size_t i = 0; i < bits64; i++) {
combined_bits[i + bits32 + bits32 + bitsSizeT] = seed_bits[i];
}
for (size_t i = 0; i < bits32; i++) {
combined_bits[i + bits32 + bitsSizeT] = event_num_bits[i];
}
for (size_t i = 0; i < bits32; i++) {
combined_bits[i + bitsSizeT] = run_num_bits[i];
}
for (size_t i = 0; i < bitsSizeT; i++) {
combined_bits[i] = name_bits[i];
}
jmcarcell marked this conversation as resolved.
Show resolved Hide resolved

return std::hash<std::bitset<bits64 + bits32 + bits32 + bitsSizeT>>{}(combined_bits);
auto hash = std::hash<std::bitset<bits64 + bits32 + bits32 + bitsSizeT>>{}(combined_bits);
std::lock_guard<std::mutex> lock(m_mutex);
if (m_uniqueIDs.contains(hash)) {
warning() << "Event number " << evt_num << ", run number " << run_num << " and algorithm name \"" << name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this should be fatal, with an option to turn it into a warning? Nobody reads warnings.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible/likely to enter this condition 'by chance' while everything is actually sane? If not then I would be in favor

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible hash collision, but it shouldn't be likely. And if it happens too often, we have a problem we should know about.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the default to throw whenever the same ID is asked, with an option not to if needed. Indeed if this is buried in thousands of lines of logs it's not very useful. This will throw if someone runs the same algorithm multiple times with the same name; for example, if digitizing multiple collections with the same digitizer and running it multiple times.

<< "\" have already been used. Please check the uniqueness of the event number, run number and name."
<< endmsg;
} else {
m_uniqueIDs.insert(hash);
}
jmcarcell marked this conversation as resolved.
Show resolved Hide resolved

return hash;
}

DECLARE_COMPONENT(UniqueIDGenSvc)
15 changes: 9 additions & 6 deletions k4FWCore/components/UniqueIDGenSvc.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
#ifndef FWCORE_UNIQUEIDGENSVC_H
#define FWCORE_UNIQUEIDGENSVC_H

#include "GaudiKernel/Service.h"
#include "k4Interface/IUniqueIDGenSvc.h"

#include <cstdint>
#include <functional>
#include <string>

#include <GaudiKernel/Service.h>
#include "k4Interface/IUniqueIDGenSvc.h"
#include <unordered_set>

/** @class UniqueIDGenSvc
* Generate unique, reproducible numbers using
Expand All @@ -34,11 +36,12 @@
class UniqueIDGenSvc : public extends<Service, IUniqueIDGenSvc> {
public:
UniqueIDGenSvc(const std::string& name, ISvcLocator* svcLoc);
StatusCode initialize() override;
size_t getUniqueID(uint32_t evt_num, uint32_t run_num, const std::string& name) const override;
size_t getUniqueID(uint32_t evt_num, uint32_t run_num, const std::string& name) const override;

private:
Gaudi::Property<int64_t> m_seed{this, "Seed", {123456789}};
Gaudi::Property<uint64_t> m_seed{this, "Seed", {123456789}};
mutable std::unordered_set<size_t, std::identity> m_uniqueIDs;
mutable std::mutex m_mutex;
};

#endif
3 changes: 0 additions & 3 deletions k4Interface/include/k4Interface/IUniqueIDGenSvc.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@
#ifndef FWCORE_IUNIQUEIDGENSVC_H
#define FWCORE_IUNIQUEIDGENSVC_H

#include <bitset>
#include <cstdint>
#include <functional>
#include <limits>
#include <string>

#include <GaudiKernel/IInterface.h>
Expand Down
1 change: 1 addition & 0 deletions test/k4FWCoreTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ add_test(NAME checkKeepDropSwitch
set_test_env(checkKeepDropSwitch)
set_property(TEST checkKeepDropSwitch APPEND PROPERTY DEPENDS ReadExampleEventData)
add_test_with_env(TestUniqueIDGenSvc options/TestUniqueIDGenSvc.py)
add_test_with_env(TestUniqueIDGenSvcRepeated options/TestUniqueIDGenSvc.py PROPERTIES PASS_REGULAR_EXPRESSION "WARNING *Event number 4, run number 3 and algorithm name \"Some algorithm name\" have already been used. Please check the uniqueness of the event number, run number and name.")
add_test_with_env(TestEventHeaderFiller options/createEventHeader.py)
add_test_with_env(EventHeaderCheck options/runEventHeaderCheck.py PROPERTIES DEPENDS TestEventHeaderFiller)
add_test(NAME TestExec WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} COMMAND python options/TestExec.py)
Expand Down
Loading