From a46fc3766148b92633f3fd7c2d06ea96acf6d2f4 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Mon, 6 May 2024 15:37:10 +0200 Subject: [PATCH] Changes to use podio::ROOTDataSource --- CMakeLists.txt | 4 - analyzers/dataframe/src/SmearObjects.cc | 6 +- e4hsource/CMakeLists.txt | 25 -- .../EDM4hepDataSource/EDM4hepDataSource.hxx | 105 ----- e4hsource/include/EDM4hepDataSource/LinkDef.h | 16 - e4hsource/src/EDM4hepDataSource.cxx | 388 ------------------ examples/data_source/analysis_final.py | 50 +++ examples/data_source/analysis_plots.py | 49 +++ .../data_source}/analysis_stage1.py | 47 ++- examples/data_source/analysis_stage2.py | 107 +++++ .../data_source}/histmaker_source.py | 0 .../data_source}/stages_source.py | 0 .../data_source}/standalone.py | 21 +- python/run_analysis.py | 34 +- tests/CMakeLists.txt | 35 ++ tests/data_source/test_low_level.cxx | 110 +++++ .../test_low_level_associations.cxx | 77 ++++ tests/data_source/test_source.cxx | 152 +++++++ .../data_source/test_source_associations.cxx | 283 +++++++++++++ 19 files changed, 925 insertions(+), 584 deletions(-) delete mode 100644 e4hsource/CMakeLists.txt delete mode 100644 e4hsource/include/EDM4hepDataSource/EDM4hepDataSource.hxx delete mode 100644 e4hsource/include/EDM4hepDataSource/LinkDef.h delete mode 100644 e4hsource/src/EDM4hepDataSource.cxx create mode 100644 examples/data_source/analysis_final.py create mode 100644 examples/data_source/analysis_plots.py rename {e4hsource/test => examples/data_source}/analysis_stage1.py (79%) create mode 100644 examples/data_source/analysis_stage2.py rename {e4hsource/test => examples/data_source}/histmaker_source.py (100%) rename {e4hsource/test => examples/data_source}/stages_source.py (100%) rename {e4hsource/test => examples/data_source}/standalone.py (75%) create mode 100644 tests/data_source/test_low_level.cxx create mode 100644 tests/data_source/test_low_level_associations.cxx create mode 100644 tests/data_source/test_source.cxx create mode 100644 tests/data_source/test_source_associations.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 8de1cad5ec..4600f34442 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,10 +150,6 @@ install(DIRECTORY examples DESTINATION ${CMAKE_INSTALL_PREFIX}/share/examples) #--- Descend into subdirectories ---------------------------------------------- -if(WITH_SOURCE) - add_subdirectory(e4hsource) -endif() - set(ADDONS_LIBRARIES CACHE STRING "List of external libraries the RDF utilities will be linked against") add_subdirectory(addons) add_subdirectory(analyzers/dataframe) diff --git a/analyzers/dataframe/src/SmearObjects.cc b/analyzers/dataframe/src/SmearObjects.cc index 27e7d29477..7c3a5056f1 100644 --- a/analyzers/dataframe/src/SmearObjects.cc +++ b/analyzers/dataframe/src/SmearObjects.cc @@ -151,7 +151,8 @@ ROOT::VecOps::RVec SmearedTracks::operator()( smeared_track.covMatrix = covMatrix_edm4hep; - if (m_debug) { + // if (m_debug) { + if (false) { std::cout << std::endl << "Original track " << track.D0 << " " << track.phi << " " << track.omega << " " << track.Z0 << " " << track.tanLambda @@ -292,7 +293,8 @@ TVectorD CovSmear(TVectorD x, TMatrixDSym C, TRandom *ran, bool debug = false) { TVectorD r(Nvec); for (Int_t i = 0; i < Nvec; i++) r(i) = ran->Gaus(0.0, 1.0); // Array of normal random numbers - if (debug) + // if (debug) + if (false) std::cout << " random nb " << ran->Gaus(0.0, 1.0) << std::endl; TVectorD xOut = x + DCv * (Ut * r); // Observed parameter vector // diff --git a/e4hsource/CMakeLists.txt b/e4hsource/CMakeLists.txt deleted file mode 100644 index 7baddb41fe..0000000000 --- a/e4hsource/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -add_library(e4hsource SHARED src/EDM4hepDataSource.cxx) -target_include_directories(e4hsource PUBLIC include) -target_link_libraries(e4hsource PUBLIC ROOT::RIO - ROOT::ROOTDataFrame - podio::podioRootIO -) - -ROOT_GENERATE_DICTIONARY(G__FCCAnalyses_EDM4hepDataSource - include/EDM4hepDataSource/EDM4hepDataSource.hxx - MODULE e4hsource - LINKDEF include/EDM4hepDataSource/LinkDef.h -) - -install(TARGETS e4hsource - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) - -install(FILES "${PROJECT_BINARY_DIR}/e4hsource/libe4hsource.rootmap" - DESTINATION "${CMAKE_INSTALL_LIBDIR}" -) - -install(FILES "${PROJECT_BINARY_DIR}/e4hsource/libe4hsource_rdict.pcm" - DESTINATION "${CMAKE_INSTALL_LIBDIR}" -) diff --git a/e4hsource/include/EDM4hepDataSource/EDM4hepDataSource.hxx b/e4hsource/include/EDM4hepDataSource/EDM4hepDataSource.hxx deleted file mode 100644 index 7bdb5416ae..0000000000 --- a/e4hsource/include/EDM4hepDataSource/EDM4hepDataSource.hxx +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef EDM4HEP_DATASOURCE_H__ -#define EDM4HEP_DATASOURCE_H__ - -// STL -#include -#include -#include -#include - -// ROOT -#include -#include - -// Podio -#include -#include -#include - -bool loadEDM4hepDataSource(); - -namespace FCCAnalyses { - using Record_t = std::vector; - - class EDM4hepDataSource final : public ROOT::RDF::RDataSource { - public: - EDM4hepDataSource(const std::string& filePath, int nEvents = -1); - EDM4hepDataSource(const std::vector& filePathList, - int nEvents = -1); - - void SetNSlots(unsigned int nSlots); - - template - std::vector GetColumnReaders(std::string_view columnName); - - void Initialize(); - - std::vector> GetEntryRanges(); - - void InitSlot(unsigned int slot, ULong64_t firstEntry); - - bool SetEntry(unsigned int slot, ULong64_t entry); - - void FinalizeSlot(unsigned int slot); - - void Finalize(); - - const std::vector& GetColumnNames() const; - - bool HasColumn(std::string_view columnName) const; - - std::string GetTypeName(std::string_view columnName) const; - - protected: - Record_t GetColumnReadersImpl (std::string_view name, - const std::type_info& typeInfo); - - std::string AsString() { return "Edm4hep data source"; }; - - private: - /// Number of slots/threads - unsigned int m_nSlots; - /// Input filename - std::vector m_filePathList; - /// Total number of events - unsigned int m_nEvents; - /// Ranges of events available to be processed - std::vector> m_rangesAvailable; - /// Ranges of events available ever created - std::vector> m_rangesAll; - /// Column names - std::vector m_columnNames; - /// Column types - std::vector m_columnTypes; - /// Collections, m_Collections[columnIndex][slotIndex] - std::vector> m_Collections; - /// Active collections - std::vector m_activeCollections; - /// Root podio readers - std::vector> m_podioReaders; - /// Podio frames - std::vector> m_frames; - /// Setup input - void SetupInput(int nEvents); - }; - - - /** - * \brief Retrieve from EDM4hepDataSource per-thread readers for the - * desired columns. - */ - template - std::vector - EDM4hepDataSource::GetColumnReaders(std::string_view columnName) { - std::cout << "EDM4hepDataSource: Getting column readers for column: " - << columnName << std::endl; - - std::vector readers; - - return readers; - } - - ROOT::RDataFrame FromEDM4hep(const std::vector& filePathList); -} - -#endif /* EDM4HEP_DATASOURCE_H__ */ diff --git a/e4hsource/include/EDM4hepDataSource/LinkDef.h b/e4hsource/include/EDM4hepDataSource/LinkDef.h deleted file mode 100644 index 81e5097aa6..0000000000 --- a/e4hsource/include/EDM4hepDataSource/LinkDef.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifdef __CINT__ - -//Globals -#pragma link off all globals; -#pragma link off all classes; -#pragma link off all functions; -#pragma link C++ nestedclasses; - -// Load function -#pragma link C++ function loadEDM4hepDataSource; - -// Source -#pragma link C++ class FCCAnalyses::EDM4hepDataSource; -#pragma link C++ function FCCAnalyses::FromEDM4hep; - -#endif diff --git a/e4hsource/src/EDM4hepDataSource.cxx b/e4hsource/src/EDM4hepDataSource.cxx deleted file mode 100644 index a222d9a38b..0000000000 --- a/e4hsource/src/EDM4hepDataSource.cxx +++ /dev/null @@ -1,388 +0,0 @@ -#include "EDM4hepDataSource/EDM4hepDataSource.hxx" - -// STL -#include -#include -#include -#include -#include -#include - -// ROOT -#include -#include -#include - -bool loadEDM4hepDataSource() { - return true; -} - -namespace FCCAnalyses { - /** - * \brief Construct the EDM4hepDataSource from the provided file. - */ - EDM4hepDataSource::EDM4hepDataSource(const std::string& filePath, - int nEvents) : m_nSlots{1} { - m_filePathList.emplace_back(filePath); - SetupInput(nEvents); - } - - /** - * \brief Construct the EDM4hepDataSource from the provided file list. - */ - EDM4hepDataSource::EDM4hepDataSource( - const std::vector& filePathList, - int nEvents) : m_nSlots{1}, - m_filePathList{filePathList} { - SetupInput(nEvents); - } - - /** - * \brief Setup input for the EDM4hepDataSource. - */ - void EDM4hepDataSource::SetupInput(int nEvents) { - // std::cout << "EDM4hepDataSource: Constructing the source ..." << std::endl; - - if (m_filePathList.empty()) { - throw std::runtime_error("EDM4hepDataSource: No input files provided!"); - } - - for (const auto& filePath : m_filePathList) { - // Check if file exists - // Warning: file can be coming from web or eos - // if (!std::filesystem::exists(filePath)) { - // throw std::runtime_error("EDM4hepDataSource: Provided file \"" - // + filePath + "\" does not exist!"); - // } - - // Check if the provided file contains required metadata - std::unique_ptr inFile(TFile::Open(filePath.data(), "READ")); - auto metadata = inFile->Get("podio_metadata"); - if (!metadata) { - throw std::runtime_error( - "EDM4hepDataSource: Provided file is missing podio metadata!"); - } - } - - // Create probing frame - podio::Frame frame; - unsigned int nEventsInFiles = 0; - podio::ROOTReader podioReader; - podioReader.openFiles(m_filePathList); - nEventsInFiles = podioReader.getEntries("events"); - frame = podio::Frame(podioReader.readEntry("events", 0)); - - // Determine over how many events to run - if (nEventsInFiles > 0) { - /* - std::cout << "EDM4hepDataSource: Found " << nEventsInFiles - << " events in files: \n"; - for (const auto& filePath : m_filePathList) { - std::cout << " - " << filePath << "\n"; - } - */ - } else { - throw std::runtime_error("EDM4hepDataSource: No events found!"); - } - - if (nEvents < 0) { - m_nEvents = nEventsInFiles; - } else if (nEvents == 0) { - throw std::runtime_error( - "EDM4hepDataSource: Requested to run over zero events!"); - } else { - m_nEvents = nEvents; - } - if (nEventsInFiles < m_nEvents) { - m_nEvents = nEventsInFiles; - } - - // std::cout << "EDM4hepDataSource: Running over " << m_nEvents << " events." - // << std::endl; - - // Get collections stored in the files - std::vector collNames = frame.getAvailableCollections(); - // std::cout << "EDM4hepDataSource: Found following collections:\n"; - for (auto& collName: collNames) { - const podio::CollectionBase* coll = frame.get(collName); - if (coll->isValid()) { - m_columnNames.emplace_back(collName); - m_columnTypes.emplace_back(coll->getValueTypeName()); - // std::cout << " - " << collName << "\n"; - } - } - } - - - /** - * \brief Inform the EDM4hepDataSource of the desired level of parallelism. - */ - void - EDM4hepDataSource::SetNSlots(unsigned int nSlots) { - // std::cout << "EDM4hepDataSource: Setting num. of slots to: " << nSlots - // << std::endl; - m_nSlots = nSlots; - - if (m_nSlots > m_nEvents) { - throw std::runtime_error("EDM4hepDataSource: Number of events too small!"); - } - - int eventsPerSlot = m_nEvents / m_nSlots; - for (size_t i = 0; i < (m_nSlots - 1); ++i) { - m_rangesAll.emplace_back(eventsPerSlot * i, eventsPerSlot * (i + 1)); - } - m_rangesAll.emplace_back(eventsPerSlot * (m_nSlots - 1), m_nEvents); - m_rangesAvailable = m_rangesAll; - - // Initialize set of addresses needed - m_Collections.resize( - m_columnNames.size(), - std::vector(m_nSlots, nullptr)); - - // Initialize podio readers - for (size_t i = 0; i < m_nSlots; ++i) { - m_podioReaders.emplace_back(std::make_unique()); - } - - for (size_t i = 0; i < m_nSlots; ++i) { - m_podioReaders[i]->openFiles(m_filePathList); - } - - for (size_t i = 0; i < m_nSlots; ++i) { - m_frames.emplace_back( - std::make_unique( - podio::Frame(m_podioReaders[i]->readEntry("events", 0)))); - } - } - - - /** - * \brief Inform RDataSource that an event-loop is about to start. - */ - void - EDM4hepDataSource::Initialize() { - // std::cout << "EDM4hepDataSource: Initializing the source ..." << std::endl; - } - - - /** - * \brief Retrieve from EDM4hepDataSource a set of ranges of entries that can be - * processed concurrently. - */ - std::vector> - EDM4hepDataSource::GetEntryRanges() { - // std::cout << "EDM4hepDataSource: Getting entry ranges ..." << std::endl; - - std::vector> rangesToBeProcessed; - for (auto& range: m_rangesAvailable) { - rangesToBeProcessed.emplace_back( - std::pair{range.first, range.second} - ); - if (rangesToBeProcessed.size() >= m_nSlots) { - break; - } - } - - if (m_rangesAvailable.size() > m_nSlots) { - m_rangesAvailable.erase(m_rangesAvailable.begin(), - m_rangesAvailable.begin() + m_nSlots); - } else { - m_rangesAvailable.erase(m_rangesAvailable.begin(), - m_rangesAvailable.end()); - } - - - /* - std::cout << "EDM4hepDataSource: Ranges to be processed:\n"; - for (auto& range: rangesToBeProcessed) { - std::cout << " {" << range.first << ", " << range.second - << "}\n"; - } - - if (m_rangesAvailable.size() > 0) { - - std::cout << "EDM4hepDataSource: Ranges remaining:\n"; - for (auto& range: m_rangesAvailable) { - std::cout << " {" << range.first << ", " << range.second - << "}\n"; - } - } else { - std::cout << "EDM4hepDataSource: No more remaining ranges.\n"; - } - */ - - return rangesToBeProcessed; - } - - - /** - * \brief Inform EDM4hepDataSource that a certain thread is about to start working - * on a certain range of entries. - */ - void - EDM4hepDataSource::InitSlot(unsigned int slot, ULong64_t firstEntry) { - // std::cout << "EDM4hepDataSource: Initializing slot: " << slot - // << " with first entry " << firstEntry << std::endl; - } - - - /** - * \brief Inform EDM4hepDataSource that a certain thread is about to start working - * on a certain entry. - */ - bool - EDM4hepDataSource::SetEntry(unsigned int slot, ULong64_t entry) { - // std::cout << "EDM4hepDataSource: In slot: " << slot << ", setting entry: " - // << entry << std::endl; - - m_frames[slot] = std::make_unique( - podio::Frame(m_podioReaders[slot]->readEntry("events", entry))); - - for (auto& collectionIndex: m_activeCollections) { - m_Collections[collectionIndex][slot] = - m_frames[slot]->get(m_columnNames.at(collectionIndex)); - /* - std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; - std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; - std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; - if (m_Collections[collectionIndex][slot]->isValid()) { - std::cout << "Collection valid\n"; - } - */ - } - - return true; - } - - - /** - * \brief Inform EDM4hepDataSource that a certain thread finished working on a - * certain range of entries. - */ - void - EDM4hepDataSource::FinalizeSlot(unsigned int slot) { - // std::cout << "EDM4hepDataSource: Finalizing slot: " << slot << std::endl; - // std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; - - // for (auto& collectionIndex: m_activeCollections) { - // std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; - // std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; - // if (m_Collections[collectionIndex][slot]->isValid()) { - // std::cout << "Collection valid\n"; - // } - // std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; - // } - } - - - /** - * \brief Inform RDataSource that an event-loop finished. - */ - void - EDM4hepDataSource::Finalize() { - // std::cout << "EDM4hepDataSource: Finalizing ..." << std::endl; - } - - - /** - * \brief Type-erased vector of pointers to pointers to column values --- one - * per slot - */ - Record_t - EDM4hepDataSource::GetColumnReadersImpl(std::string_view columnName, - const std::type_info& typeInfo) { - /* - std::cout << "EDM4hepDataSource: Getting column reader implementation for column:\n" - << " " << columnName - << "\n with type: " << typeInfo.name() << std::endl; - */ - - auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), - columnName); - if (itr == m_columnNames.end()) { - std::string errMsg = "EDM4hepDataSource: Can't find requested column \""; - errMsg += columnName; - errMsg += "\"!"; - throw std::runtime_error(errMsg); - } - auto columnIndex = std::distance(m_columnNames.begin(), itr); - m_activeCollections.emplace_back(columnIndex); - /* - std::cout << "EDM4hepDataSource: Active collections so far:\n" - << " "; - for (auto& i: m_activeCollections) { - std::cout << i << ", "; - } - std::cout << std::endl; - */ - - Record_t columnReaders(m_nSlots); - for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) { - // std::cout << " Column index: " << columnIndex << "\n"; - // std::cout << " Slot index: " << slotIndex << "\n"; - // std::cout << " Address: " - // << &m_Collections[columnIndex][slotIndex] - // << std::endl; - columnReaders[slotIndex] = (void*) &m_Collections[columnIndex][slotIndex]; - } - - return columnReaders; - } - - - /** - * \brief Returns a reference to the collection of the dataset's column names - */ - const std::vector& - EDM4hepDataSource::GetColumnNames() const { - // std::cout << "EDM4hepDataSource: Looking for column names" << std::endl; - - return m_columnNames; - } - - /** - * \brief Checks if the dataset has a certain column. - */ - bool - EDM4hepDataSource::HasColumn(std::string_view columnName) const { - // std::cout << "EDM4hepDataSource: Looking for column: " << columnName - // << std::endl; - - if (std::find(m_columnNames.begin(), - m_columnNames.end(), - columnName) != m_columnNames.end()) { - return true; - } - - return false; - } - - - /** - * \brief Type of a column as a string. Required for JITting. - */ - std::string - EDM4hepDataSource::GetTypeName(std::string_view columnName) const { - // std::cout << "EDM4hepDataSource: Looking for type name of column: " - // << columnName << std::endl; - - auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), - columnName); - if (itr != m_columnNames.end()) { - auto i = std::distance(m_columnNames.begin(), itr); - // std::cout << "EDM4hepDataSource: Found type name: " - // << m_columnTypes.at(i) << std::endl; - - return m_columnTypes.at(i) + "Collection"; - } - - return "float"; - } - - ROOT::RDataFrame - FromEDM4hep(const std::vector& filePathList) { - ROOT::RDataFrame rdf(std::make_unique(filePathList)); - - return rdf; - } -} diff --git a/examples/data_source/analysis_final.py b/examples/data_source/analysis_final.py new file mode 100644 index 0000000000..2bec5a829a --- /dev/null +++ b/examples/data_source/analysis_final.py @@ -0,0 +1,50 @@ +#Input directory where the files produced at the pre-selection level are +inputDir = "outputs/data_source/stage2/" + +#Input directory where the files produced at the pre-selection level are +outputDir = "outputs/data_source/final/" + +processList = { + 'p8_ee_ZZ_ecm240':{},#Run over the full statistics from stage2 input file /p8_ee_ZZ_ecm240.root. Keep the same output name as input + 'p8_ee_WW_ecm240':{}, #Run over the statistics from stage2 input files /p8_ee_WW_ecm240_out/*.root. Keep the same output name as input + 'MySample_p8_ee_ZH_ecm240':{} #Run over the full statistics from stage2 input file /p8_ee_ZH_ecm240_out.root. Change the output name to MySample_p8_ee_ZH_ecm240 +} + +#Link to the dictonary that contains all the cross section informations etc... +procDict = "FCCee_procDict_spring2021_IDEA.json" + +#Add MySample_p8_ee_ZH_ecm240 as it is not an offical process +procDictAdd={"MySample_p8_ee_ZH_ecm240":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 0.201868, "kfactor": 1.0, "matchingEfficiency": 1.0}} + +#Number of CPUs to use +nCPUS = 2 + +#produces ROOT TTrees, default is False +doTree = False + +saveTabular = True + +###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file +cutList = {"sel0":"Zcand_q == 0", + "sel1":"Zcand_q == -1 || Zcand_q == 1", + "sel2":"Zcand_m > 80 && Zcand_m < 100", + "sel3":"MyFilter==true && (Zcand_m < 80 || Zcand_m > 100)" + } + + +#Dictionary for the ouput variable/hitograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value. +histoList = { + "mz":{"name":"Zcand_m","title":"m_{Z} [GeV]","bin":125,"xmin":0,"xmax":250}, + "mz_zoom":{"name":"Zcand_m","title":"m_{Z} [GeV]","bin":40,"xmin":80,"xmax":100}, + "leptonic_recoil_m":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":100,"xmin":0,"xmax":200}, + "leptonic_recoil_m_zoom":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":200,"xmin":80,"xmax":160}, + "leptonic_recoil_m_zoom1":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":100,"xmin":120,"xmax":140}, + "leptonic_recoil_m_zoom2":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":200,"xmin":120,"xmax":140}, + "leptonic_recoil_m_zoom3":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":400,"xmin":120,"xmax":140}, + "leptonic_recoil_m_zoom4":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":800,"xmin":120,"xmax":140}, + "leptonic_recoil_m_zoom5":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":2000,"xmin":120,"xmax":140}, + "leptonic_recoil_m_zoom6":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":100,"xmin":130.3,"xmax":132.5}, + "mz_1D":{"cols":["Zcand_m"],"title":"m_{Z} [GeV]", "bins": [(40,80,100)]}, # 1D histogram (alternative syntax) + "mz_recoil_2D":{"cols":["Zcand_m", "Zcand_recoil_m"],"title":"m_{Z} - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140)]}, # 2D histogram + "mz_recoil_3D":{"cols":["Zcand_m", "Zcand_recoil_m", "Zcand_recoil_m"],"title":"m_{Z} - leptonic recoil - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140), (100,120,140)]}, # 3D histogram +} diff --git a/examples/data_source/analysis_plots.py b/examples/data_source/analysis_plots.py new file mode 100644 index 0000000000..5a41842da6 --- /dev/null +++ b/examples/data_source/analysis_plots.py @@ -0,0 +1,49 @@ +import ROOT + +# global parameters +intLumi = 5.0e+06 #in pb-1 +ana_tex = 'e^{+}e^{-} #rightarrow ZH #rightarrow #mu^{+}#mu^{-} + X' +delphesVersion = '3.4.2' +energy = 240.0 +collider = 'FCC-ee' +inputDir = 'outputs/data_source/final/' +formats = ['png','pdf'] +yaxis = ['lin','log'] +stacksig = ['stack','nostack'] +outdir = 'outputs/data_source/plots/' +plotStatUnc = True + +variables = ['mz','mz_zoom','leptonic_recoil_m','leptonic_recoil_m_zoom','leptonic_recoil_m_zoom2'] +rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional) + +###Dictonnary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection +selections = {} +selections['ZH'] = ["sel0","sel1"] +selections['ZH_2'] = ["sel0","sel1"] + +extralabel = {} +extralabel['sel0'] = "Selection: N_{Z} = 1" +extralabel['sel1'] = "Selection: N_{Z} = 1; 80 GeV < m_{Z} < 100 GeV" + +colors = {} +colors['ZH'] = ROOT.kRed +colors['WW'] = ROOT.kBlue+1 +colors['ZZ'] = ROOT.kGreen+2 +colors['VV'] = ROOT.kGreen+3 + +plots = {} +plots['ZH'] = {'signal':{'ZH':['MySample_p8_ee_ZH_ecm240']}, + 'backgrounds':{'WW':['p8_ee_WW_ecm240'], + 'ZZ':['p8_ee_ZZ_ecm240']} + } + + +plots['ZH_2'] = {'signal':{'ZH':['MySample_p8_ee_ZH_ecm240']}, + 'backgrounds':{'VV':['p8_ee_WW_ecm240','p8_ee_ZZ_ecm240']} + } + +legend = {} +legend['ZH'] = 'ZH' +legend['WW'] = 'WW' +legend['ZZ'] = 'ZZ' +legend['VV'] = 'VV boson' diff --git a/e4hsource/test/analysis_stage1.py b/examples/data_source/analysis_stage1.py similarity index 79% rename from e4hsource/test/analysis_stage1.py rename to examples/data_source/analysis_stage1.py index aeccc7e399..9240b3d860 100644 --- a/e4hsource/test/analysis_stage1.py +++ b/examples/data_source/analysis_stage1.py @@ -1,29 +1,37 @@ ''' -First stage +First analysis stage ''' # Mandatory: -# List of processes used in the analysis +# List of samples(processes) used in the analysis processList = { # Run over the full statistics and save it to one output file named # /.root - 'p8_ee_ZZ_ecm240': {'fraction': 0.005}, + # 'p8_ee_ZZ_ecm240': {'fraction': 0.005}, + 'p8_ee_ZZ_ecm240': {'fraction': 1.}, # Run over 50% of the statistics and save output into two files named # /p8_ee_WW_ecm240/chunk.root - 'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2}, + # 'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2}, + 'p8_ee_WW_ecm240': {'fraction': 1.}, # Run over 20% of the statistics and save output into one file named # /p8_ee_ZH_ecm240_out.root - 'p8_ee_ZH_ecm240': {'fraction': 0.2, 'output': 'p8_ee_ZH_ecm240_out'} + # 'p8_ee_ZH_ecm240': {'fraction': 0.2, 'output': 'p8_ee_ZH_ecm240_out'} + 'p8_ee_ZH_ecm240': {'fraction': 1.} } -# Mandatory: +# Mandatory: prodTag or inputDir # Production tag when running over EDM4Hep centrally produced events, this -# points to the yaml files for getting sample statistics -prodTag = "FCCee/spring2021/IDEA/" +# points to the yaml files for getting sample file list. +# prodTag = "FCCee/spring2021/IDEA/" + +# Mandatory: prodTag or inputDir +# Input directory when not running over centrally produced EDM4hep events. +# It can still be EDM4hep files produced standalone. +inputDir = "/home/jsmiesko/gen" # Optional: # Output directory, default is local running directory -outputDir = "outputs/source" +outputDir = "outputs/data_source/stage1" # Optional: # Name of the analysis, default is "" @@ -31,7 +39,7 @@ # Optional: # Number of threads to run on, default is 4 -# nCPUS = 4 +nCPUS = 32 # Optional: # Optional running on HTCondor, default is False @@ -113,6 +121,7 @@ def analysers(dframe): # Filter on at least one candidate .Filter("zed_leptonic_recoil_m.size() > 0") ) + return dframe2 # Mandatory: @@ -123,14 +132,14 @@ def output(): Which columns to snapshot. ''' branchList = [ - "selected_muons_pt", - "selected_muons_y", - "selected_muons_p", - "selected_muons_e", - "zed_leptonic_pt", - "zed_leptonic_m", - "zed_leptonic_charge", - "zed_leptonic_recoil_m" - ] + "selected_muons_pt", + "selected_muons_y", + "selected_muons_p", + "selected_muons_e", + "zed_leptonic_pt", + "zed_leptonic_m", + "zed_leptonic_charge", + "zed_leptonic_recoil_m" + ] return branchList diff --git a/examples/data_source/analysis_stage2.py b/examples/data_source/analysis_stage2.py new file mode 100644 index 0000000000..90493ec828 --- /dev/null +++ b/examples/data_source/analysis_stage2.py @@ -0,0 +1,107 @@ +''' +Analysis stage 2 +''' + +# Mandatory: +# List of samples used in the analysis +processList = { + # Run over the full statistics from stage1 input files + # /p8_ee_ZZ_ecm240.root. + # Keep the same output name as input + 'p8_ee_ZZ_ecm240': {}, + # Run over the statistics from stage1 input files + # /p8_ee_WW_ecm240_out/*.root. + # Keep the same output name as input + 'p8_ee_WW_ecm240': {}, + # Run over the full statistics from stage1 input file + # /p8_ee_ZH_ecm240_out.root. + # Change the output name to MySample_p8_ee_ZH_ecm240 + 'p8_ee_ZH_ecm240_out': {'output': 'MySample_p8_ee_ZH_ecm240'} +} + +# Mandatory: +# Input directory when not running over centrally produced EDM4hep events. +# It can still be EDM4hep files produced standalone or files from a first +# analysis step (this is the case in this example it runs over the files +# produced from analysis.py) +inputDir = "outputs/data_source/stage1" + +# Optional: +# Output directory, default is local dir +outputDir = "outputs/data_source/stage2" + +# Optional: +# Number of threads, default is 4 +nCPUS = 2 + +# Optional: +# Running on HTCondor, default is False +runBatch = False + +# USER DEFINED CODE +import ROOT +ROOT.gInterpreter.Declare(""" +bool myFilter(ROOT::VecOps::RVec mass) { + for (size_t i = 0; i < mass.size(); ++i) { + if (mass.at(i)>80. && mass.at(i)<100.) + return true; + } + return false; +} +""") +# END USER DEFINED CODE + +# Mandatory: +# RDFanalysis class where the use defines the operations on the TTree +class RDFanalysis(): + ''' + Analysis class. + ''' + + # Mandatory: + # Analysers function to define the analysers to process, please make sure + # you return the last dataframe, in this example it is df2 + def analysers(dframe): + ''' + Defining operations on the dataframe. + ''' + dframe2 = ( + dframe + # Filter to have exactly one Z candidate + .Filter("zed_leptonic_m.size() == 1") + # Define Z candidate mass + .Define("Zcand_m", "zed_leptonic_m[0]") + # Define Z candidate recoil mass + .Define("Zcand_recoil_m", "zed_leptonic_recoil_m[0]") + # Define Z candidate pt + .Define("Zcand_pt", "zed_leptonic_pt[0]") + # Define Z candidate charge + .Define("Zcand_q", "zed_leptonic_charge[0]") + # Define new var rdf entry (example) + .Define("entry", "rdfentry_") + # Define a weight based on entry + # (inline example of possible operations) + .Define("weight", "return 1./(entry+1)") + # Define a variable based on a custom filter + .Define("MyFilter", "myFilter(zed_leptonic_m)") + ) + return dframe2 + + # Mandatory: + # Output function, please make sure you return the branchlist as a python + # list. + def output(): + ''' + Which columns to snapshot. + ''' + branchList = [ + "Zcand_m", + "Zcand_pt", + "Zcand_q", + "MyFilter", + "Zcand_recoil_m", + "entry", + "weight" + ] + + return branchList diff --git a/e4hsource/test/histmaker_source.py b/examples/data_source/histmaker_source.py similarity index 100% rename from e4hsource/test/histmaker_source.py rename to examples/data_source/histmaker_source.py diff --git a/e4hsource/test/stages_source.py b/examples/data_source/stages_source.py similarity index 100% rename from e4hsource/test/stages_source.py rename to examples/data_source/stages_source.py diff --git a/e4hsource/test/standalone.py b/examples/data_source/standalone.py similarity index 75% rename from e4hsource/test/standalone.py rename to examples/data_source/standalone.py index c18927bd76..54ac698dc4 100644 --- a/e4hsource/test/standalone.py +++ b/examples/data_source/standalone.py @@ -5,20 +5,25 @@ import ROOT ROOT.gROOT.SetBatch(True) + def main(): + ''' + Main entry point for the standalone analysis. + ''' input_list = ['https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/' 'p8_ee_WW_ecm240_edm4hep.root'] - print("----> Info: Loading analyzers from libFCCAnalyses... ",) ROOT.gSystem.Load("libFCCAnalyses") - _fcc = ROOT.dummyLoader + if ROOT.dummyLoader: + print('----> Debug: Found FCCAnalyses library.') + print("----> Info: Loading analyzers from libFCCAnalyses... ",) + if ROOT.podio.ROOTReader(): + print('----> Debug: Found Podio ROOT I/O.') print('----> Info: Loading events through EDM4hep RDataSource...') - ROOT.gSystem.Load("libe4hsource") - if ROOT.loadEDM4hepDataSource(): - print('----> Debug: EDM4hep RDataSource loaded') + try: - dframe = ROOT.FCCAnalyses.FromEDM4hep(input_list) + dframe = ROOT.podio.CreateDataFrame(input_list) except TypeError as excp: print('----> Error: Unable to build dataframe!') print(excp) @@ -35,10 +40,12 @@ def main(): count = dframe4.Count() dframe4.Snapshot("events", "output.root", {"electron_truth_pt"}) - # dframe4.Snapshot("events", "output.root", {"electron_truth", "electron_truth_pt"}) + # dframe4.Snapshot("events", "output.root", {"electron_truth", + # "electron_truth_pt"}) print("---------------------") print("Nuber of events: ", count.GetValue()) + if __name__ == "__main__": main() diff --git a/python/run_analysis.py b/python/run_analysis.py index 024f70de36..3653f1e615 100644 --- a/python/run_analysis.py +++ b/python/run_analysis.py @@ -316,19 +316,18 @@ def run_rdf(rdf_module, Create RDataFrame and snapshot it. ''' if args.use_data_source: - LOGGER.info('Loading events through EDM4hep RDataSource...') - if ROOT.gSystem.Load("libe4hsource") < 0: - LOGGER.error('Unable to load EDM4hep RDataSource library!\n' - 'Aborting...') + if ROOT.podio.ROOTReader(): + LOGGER.debug('Found Podio ROOT I/O.') + else: + LOGGER.error('Podio ROOT I/O library not found!\nAborting...') sys.exit(3) + LOGGER.info('Loading events through podio::ROOTDataSource...') - if ROOT.loadEDM4hepDataSource(): - LOGGER.debug('EDM4hep RDataSource loaded.') try: - dframe = ROOT.FCCAnalyses.FromEDM4hep(input_list) + dframe = ROOT.podio.CreateDataFrame(input_list) except TypeError as excp: - LOGGER.error('Unable to build dataframe using RDataSource!\n%s', - excp) + LOGGER.error('Unable to build dataframe using' + 'podio::RDataSource!\n%s', excp) sys.exit(3) else: dframe = ROOT.RDataFrame("events", input_list) @@ -477,7 +476,7 @@ def run_local(rdf_module, infile_list, args): Run analysis locally. ''' # Create list of files to be processed - info_msg = 'Creating dataframe object from files:\n\t' + info_msg = 'Creating dataframe object from files:\n' file_list = ROOT.vector('string')() # Amount of events processed in previous stage (= 0 if it is the first # stage) @@ -490,7 +489,7 @@ def run_local(rdf_module, infile_list, args): filepath = apply_filepath_rewrites(filepath) file_list.push_back(filepath) - info_msg += f'- {filepath}\t\n' + info_msg += f'\t- {filepath}\n' try: infile = ROOT.TFile.Open(filepath, 'READ') except OSError as excp: @@ -799,16 +798,15 @@ def run_histmaker(args, rdf_module, anapath): LOGGER.info(info_msg) if args.use_data_source: - LOGGER.info('Loading events through EDM4hep RDataSource...') - if ROOT.gSystem.Load("libe4hsource") < 0: - LOGGER.error('Unable to load EDM4hep RDataSource library!\n' - 'Aborting...') + if ROOT.podio.ROOTReader(): + LOGGER.debug('Found Podio ROOT I/O.') + else: + LOGGER.error('Podio ROOT I/O library not found!\nAborting...') sys.exit(3) + LOGGER.info('Loading events through podio::ROOTDataSource...') - if ROOT.loadEDM4hepDataSource(): - LOGGER.debug('EDM4hep RDataSource loaded.') try: - dframe = ROOT.FCCAnalyses.FromEDM4hep(file_list_root) + dframe = ROOT.podio.CreateDataFrame(file_list_root) except TypeError as excp: LOGGER.error('Unable to build dataframe using EDM4hep ' 'RDataSource!\n%s', excp) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3bb8b1b524..0f4b2dd807 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -16,5 +16,40 @@ endif() add_standalone_test("examples/FCCee/fullSim/caloNtupleizer/analysis.py") +# C++ Analyses + +# podio::ROOTDataSource +add_executable(test_data_source ${CMAKE_SOURCE_DIR}/tests/data_source/test_source.cxx) +target_include_directories(test_data_source PRIVATE ${CMAKE_SOURCE_DIR}/include) +target_link_libraries(test_data_source podio::podioRootIO + EDM4HEP::edm4hep +) + +add_executable(test_source_associations + ${CMAKE_SOURCE_DIR}/tests/data_source/test_source_associations.cxx) +target_include_directories(test_source_associations PRIVATE + ${CMAKE_SOURCE_DIR}/include) +target_link_libraries(test_source_associations podio::podioRootIO + EDM4HEP::edm4hep + FCCAnalyses +) + +# Low level access +add_executable(test_low_level ${CMAKE_SOURCE_DIR}/tests/data_source/test_low_level.cxx) +target_link_libraries(test_low_level ROOT::ROOTDataFrame + podio::podioRootIO + EDM4HEP::edm4hep + EDM4HEP::edm4hepDict +) + +add_executable(test_low_level_associations + ${CMAKE_SOURCE_DIR}/tests/data_source/test_low_level_associations.cxx) +target_link_libraries(test_low_level_associations ROOT::ROOTDataFrame + podio::podioRootIO + EDM4HEP::edm4hep + EDM4HEP::edm4hepDict + FCCAnalyses +) + # TODO: make this test run in the spack build environment #add_generic_test(build_new_case_study "tests/build_new_case_study.sh") diff --git a/tests/data_source/test_low_level.cxx b/tests/data_source/test_low_level.cxx new file mode 100644 index 0000000000..79baa8b6bd --- /dev/null +++ b/tests/data_source/test_low_level.cxx @@ -0,0 +1,110 @@ +// ROOT +#include +#include +#include +#include +// EDM4hep +#include +#include + +ROOT::VecOps::RVec selElectrons(ROOT::VecOps::RVec& inParticles) { + ROOT::VecOps::RVec electrons; + for (size_t i = 0; i < inParticles.size(); ++i) { + if (inParticles[i].PDG == 11) { + electrons.emplace_back(inParticles[i]); + } + } + + return electrons; +} + +struct selPDG { + selPDG(int pdg = 11, bool chargeConjugateAllowed = true); + const int m_pdg; + const bool m_chargeConjugateAllowed; + ROOT::VecOps::RVec operator() (ROOT::VecOps::RVec& inParticles); +}; + +selPDG::selPDG(int pdg, + bool chargeConjugateAllowed) : m_pdg(pdg), + m_chargeConjugateAllowed(chargeConjugateAllowed) {}; + +ROOT::VecOps::RVec selPDG::operator() (ROOT::VecOps::RVec& inParticles) { + ROOT::VecOps::RVec result; + for (size_t i = 0; i < inParticles.size(); ++i) { + auto &particle = inParticles[i]; + if (m_chargeConjugateAllowed) { + if (std::abs(particle.PDG ) == std::abs(m_pdg)) { + result.emplace_back(particle); + } + } + else { + if(particle.PDG == m_pdg) { + result.emplace_back(particle); + } + } + } + + return result; +} + +ROOT::VecOps::RVec getPx(ROOT::VecOps::RVec inParticles) { + ROOT::VecOps::RVec result; + for (auto& p: inParticles) { + result.push_back(p.momentum.x); + } + + return result; +} + + +int main(int argc, char *argv[]) { + // auto verbosity = ROOT::Experimental::RLogScopedVerbosity(ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + int nCPU = 4; + if (argc > 1) { + nCPU = atoi(argv[1]); + } + + std::vector filePathList; + std::string filePathBase = "/home/jsmiesko/source/FCCAnalyses/inputs/"; + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_10.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_11.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_12.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_1.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_2.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_3.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_4.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_5.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_6.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_7.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_8.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_9.edm4hep.root"); + + ROOT::EnableImplicitMT(nCPU); + + ROOT::RDataFrame rdf("events", filePathList); + + // rdf.Describe().Print(); + std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = rdf.Define("particles_px", getPx, {"Particle"}); + // auto rdf3 = rdf2.Define("electrons", selElectrons, {"MCParticles"}); + auto rdf3 = rdf2.Define("electrons", selPDG(11, false), {"Particle"}); + auto rdf4 = rdf3.Define("electrons_px", getPx, {"electrons"}); + auto h_particles_px = rdf4.Histo1D("particles_px"); + auto h_electrons_px = rdf4.Histo1D("electrons_px"); + + h_particles_px->Print(); + h_electrons_px->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_particles_px->Draw(); + canvas->Print("low_level_particles_px.pdf"); + h_electrons_px->Draw(); + canvas->Print("low_level_electrons_px.pdf"); + + return EXIT_SUCCESS; +} diff --git a/tests/data_source/test_low_level_associations.cxx b/tests/data_source/test_low_level_associations.cxx new file mode 100644 index 0000000000..b21ffabe1e --- /dev/null +++ b/tests/data_source/test_low_level_associations.cxx @@ -0,0 +1,77 @@ +// ROOT +#include "ROOT/RVec.hxx" +#include "ROOT/RDataFrame.hxx" +#include "ROOT/RLogger.hxx" +#include "TCanvas.h" +// EDM4hep +#include "edm4hep/MCParticleData.h" +#include "edm4hep/SimCalorimeterHitData.h" +// FCCAnalyses +#include "FCCAnalyses/SmearObjects.h" +#include "FCCAnalyses/ReconstructedParticle2MC.h" + + +int main(int argc, char *argv[]) { + // auto verbosity = ROOT::Experimental::RLogScopedVerbosity(ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + bool success = gInterpreter->Declare("#include \"edm4hep/TrackState.h\""); + if (!success) { + std::cerr << "ERROR: Unable to find edm4hep::TrackState header file!" + << std::endl; + return EXIT_FAILURE; + } + + int nCPU = 4; + if (argc > 1) { + nCPU = atoi(argv[1]); + } + + std::vector filePathList; + std::string filePathBase = "/home/jsmiesko/source/FCCAnalyses/inputs/"; + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_10.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_11.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_12.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_1.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_2.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_3.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_4.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_5.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_6.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_7.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_8.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_9.edm4hep.root"); + + ROOT::EnableImplicitMT(nCPU); + + ROOT::RDataFrame rdf("events", filePathList); + + // rdf.Describe().Print(); + // std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = rdf.Alias("MCRecoAssociations0", "_MCRecoAssociations_rec.index"); + auto rdf3 = rdf2.Alias("MCRecoAssociations1", "_MCRecoAssociations_sim.index"); + auto rdf4 = rdf3.Define( + "RP_MC_index", + FCCAnalyses::ReconstructedParticle2MC::getRP2MC_index, + {"MCRecoAssociations0", "MCRecoAssociations1", "ReconstructedParticles"} + ); + auto rdf5 = rdf4.Define( + "SmearedTracks", + FCCAnalyses::SmearObjects::SmearedTracks(2.0, 2.0, 2.0, 2.0, 2.0, true), + {"ReconstructedParticles", "_EFlowTrack_trackStates", "RP_MC_index", "Particle"} + ); + auto rdf6 = rdf5.Define("smearTrack_omega", "return SmearedTracks.empty() ? -1 : SmearedTracks.at(0).omega;"); + // auto rdf6 = rdf5.Define("smearTrack_omega", "return RP_MC_index[0];"); + auto h_smeared_tracks_omega = rdf6.Histo1D("smearTrack_omega"); + + + h_smeared_tracks_omega->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_smeared_tracks_omega->Draw(); + canvas->Print("low_level_smeared_tracks_omega.pdf"); + + return EXIT_SUCCESS; +} diff --git a/tests/data_source/test_source.cxx b/tests/data_source/test_source.cxx new file mode 100644 index 0000000000..b355ed51bf --- /dev/null +++ b/tests/data_source/test_source.cxx @@ -0,0 +1,152 @@ +// ROOT +#include +#include +#include +#include + +// PODIO +#include + +// EDM4hep +#include +#include +#include +#include +#include + +edm4hep::MCParticleCollection selElectrons(edm4hep::MCParticleCollection& inParticles) { + edm4hep::MCParticleCollection electrons; + electrons.setSubsetCollection(); + for (auto particle: inParticles) { + if (particle.getPDG() == 11) { + auto electron = edm4hep::MCParticle(particle); + electrons.push_back(electron); + } + } + + return electrons; +} + +struct selPDG { + selPDG(int pdg = 11, bool chargeConjugateAllowed = true); + const int m_pdg; + const bool m_chargeConjugateAllowed; + edm4hep::MCParticleCollection operator() (edm4hep::MCParticleCollection& inParticles); +}; + +selPDG::selPDG(int pdg, + bool chargeConjugateAllowed) : m_pdg(pdg), + m_chargeConjugateAllowed(chargeConjugateAllowed) {}; + +edm4hep::MCParticleCollection selPDG::operator() (edm4hep::MCParticleCollection& inParticles) { + edm4hep::MCParticleCollection result; + result.setSubsetCollection(); + for (auto particle: inParticles) { + if (m_chargeConjugateAllowed) { + if (std::abs(particle.getPDG() ) == std::abs(m_pdg)) { + result.push_back(particle); + } + } + else { + if(particle.getPDG() == m_pdg) { + result.push_back(particle); + } + } + } + + return result; +} + +ROOT::VecOps::RVec getPx(edm4hep::MCParticleCollection& inParticles) { + ROOT::VecOps::RVec result; + for (auto particle: inParticles) { + result.push_back(particle.getMomentum().x); + } + + return result; +} + +edm4hep::MCParticleCollection get_stable_particles_from_decay(edm4hep::MCParticle in) { + edm4hep::MCParticleCollection result; + result.setSubsetCollection(); + + auto daughters = in.getDaughters(); + if (daughters.size() != 0) { // particle is unstable + for (const auto& daughter : daughters) { + auto stable_daughters = get_stable_particles_from_decay(daughter); + for (const auto& stable_daughter : stable_daughters) { + result.push_back(stable_daughter); + } + } + } else { + result.push_back(in); + } + + return result; +} + +edm4hep::MCParticle get_mcParticle( + const edm4hep::ReconstructedParticle& recoParticle, + const edm4hep::MCRecoParticleAssociationCollection& assocColl) { + edm4hep::MCParticle no_result; + + for (const auto& assoc: assocColl) { + if (assoc.getRec() == recoParticle) { + return assoc.getSim(); + } + } + + return no_result; +} + + +int main(int argc, char *argv[]) { + // auto verbosity = ROOT::Experimental::RLogScopedVerbosity(ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + int nCPU = 4; + if (argc > 1) { + nCPU = atoi(argv[1]); + } + + std::vector filePathList; + std::string filePathBase = "/home/jsmiesko/source/FCCAnalyses/inputs/"; + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_10.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_11.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_12.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_1.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_2.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_3.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_4.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_5.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_6.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_7.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_8.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_9.edm4hep.root"); + + ROOT::EnableImplicitMT(nCPU); + + ROOT::RDataFrame rdf(std::make_unique(filePathList)); + + // rdf.Describe().Print(); + std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = rdf.Define("particles_px", getPx, {"Particle"}); + // auto rdf3 = rdf2.Define("electrons", selElectrons, {"MCParticles"}); + auto rdf3 = rdf2.Define("electrons", selPDG(11, false), {"Particle"}); + auto rdf4 = rdf3.Define("electrons_px", getPx, {"electrons"}); + auto h_particles_px = rdf4.Histo1D("particles_px"); + auto h_electrons_px = rdf4.Histo1D("electrons_px"); + + h_particles_px->Print(); + h_electrons_px->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_particles_px->Draw(); + canvas->Print("source_particles_px.pdf"); + h_electrons_px->Draw(); + canvas->Print("source_electrons_px.pdf"); + + return EXIT_SUCCESS; +} diff --git a/tests/data_source/test_source_associations.cxx b/tests/data_source/test_source_associations.cxx new file mode 100644 index 0000000000..792874a1e7 --- /dev/null +++ b/tests/data_source/test_source_associations.cxx @@ -0,0 +1,283 @@ +// std +#include +#include +#include + +// ROOT +#include +#include +#include +#include +#include +#include +#include +#include + +// PODIO +#include + +// EDM4hep +#include +#include +#include +#include "edm4hep/TrackState.h" +#include "edm4hep/TrackCollection.h" +#include "edm4hep/ReconstructedParticleCollection.h" +#include "edm4hep/MCRecoParticleAssociationCollection.h" + +// FCCAnalyses +#include "FCCAnalyses/SmearObjects.h" +#include "FCCAnalyses/VertexingUtils.h" + + +/// for a given MC particle, returns a "track state", i.e. a vector of 5 helix +/// parameters, in Delphes convention +TVectorD TrackParamFromMC_DelphesConv(edm4hep::MCParticle aMCParticle) { + + TVector3 p(aMCParticle.getMomentum().x, + aMCParticle.getMomentum().y, + aMCParticle.getMomentum().z); + TVector3 x(1e-3 * aMCParticle.getVertex().x, + 1e-3 * aMCParticle.getVertex().y, + 1e-3 * aMCParticle.getVertex().z); // mm to m + float Q = aMCParticle.getCharge(); + TVectorD param = FCCAnalyses::VertexingUtils::XPtoPar(x, p, Q); // convention Franco + + return param; +} + + +TMatrixDSym get_trackCov(const edm4hep::TrackState &atrack, bool Units_mm) { + auto covMatrix = atrack.covMatrix; + + TMatrixDSym covM = FCCAnalyses::VertexingUtils::Edm4hep2Delphes_TrackCovMatrix(covMatrix, Units_mm); + + return covM; +} + + +/** + * \brief Generates new track states, by rescaling the covariance matrix of the + * tracks. + */ +struct SmearedTracks { + bool m_debug; + TRandom m_random; + float m_smear_parameters[5]; + SmearedTracks(float smear_d0, float smear_phi, float smear_omega, + float smear_z0, float smear_tlambda, bool debug); + ROOT::VecOps::RVec + operator()( + const edm4hep::MCRecoParticleAssociationCollection &mcRecoAssoc); +}; + + +SmearedTracks::SmearedTracks(float smear_d0, float smear_phi, float smear_omega, + float smear_z0, float smear_tlambda, + bool debug = false) { + + m_smear_parameters[0] = smear_d0; + m_smear_parameters[1] = smear_phi; + m_smear_parameters[2] = smear_omega; + m_smear_parameters[3] = smear_z0; + m_smear_parameters[4] = smear_tlambda; + + m_debug = debug; +} + + +ROOT::VecOps::RVec +SmearedTracks::operator() ( + const edm4hep::MCRecoParticleAssociationCollection &mcRecoAssoc) { + + // returns a vector of TrackStates that is parallel to the collection of full + // tracks (alltracks), i.e. same number of entries, same order. The method + // retrieve the MC particle that is associated to a track, and builds a "track + // state" out of the MC particle (i.e. it determines the corresponding (d0, + // phi, omega, z0, tanlambda). From this vector, and from the covariance + // matrix of the track, which is scaled by the user, new track states are + // generated. + + ROOT::VecOps::RVec result; + + TVectorD zero(5); + for (int k = 0; k < 5; k++) { + zero(k) = 0.; + } + + for (const auto &assoc : mcRecoAssoc) { + const auto &recoParticle = assoc.getRec(); + const auto &mcParticle = assoc.getSim(); + + if (recoParticle.getTracks().size() < 1) { + continue; + } + + if (recoParticle.getTracks().size() > 1) { + std::cout << "More than one track for one reco particle encountered!" + << std::endl; + } + + const edm4hep::Track &track = recoParticle.getTracks().at(0); + + // the MC-truth track parameters, in Delphes's comvention + TVectorD mcTrackParam = TrackParamFromMC_DelphesConv(mcParticle); + // and in edm4hep convention + TVectorD mcTrackParam_edm4hep = + FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackParam(mcTrackParam, + false); + + if (track.getTrackStates().size() < 1) { + continue; + } + + if (track.getTrackStates().size() > 1) { + std::cout << "More than one track state for one track encountered!" + << std::endl; + } + + const edm4hep::TrackState &trackState = track.getTrackStates().at(0); + + // the covariance matrix of the track, in Delphes's convention + TMatrixDSym Cov = get_trackCov(trackState, false); + + // if the covMat of the track is pathological (numerical precision issue, + // fraction of tracks = 5e-6): return original track + if (Cov.Determinant() <= 0) { + result.emplace_back(trackState); + continue; + } + + // scale the covariance matrix + for (int j = 0; j < 5; j++) { + for (int k = 0; k < 5; k++) { + Cov[j][k] = Cov[j][k] * (m_smear_parameters[j] * m_smear_parameters[k]); + } + } + // if (m_debug) { + // Cov.Print(); + //} + + // generate a new track state (in Delphes's convention) + TVectorD smeared_param_delphes = + FCCAnalyses::SmearObjects::CovSmear(mcTrackParam, Cov, &m_random, m_debug); + + if (smeared_param_delphes == zero) { // Cholesky decomposition failed + result.emplace_back(trackState); + continue; + } + + // back to the edm4hep conventions.. + TVectorD smeared_param = FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackParam( + smeared_param_delphes, false); + + auto smearedTrackState = trackState; + if (smeared_param.GetNoElements() > 4) { + smearedTrackState.D0 = smeared_param[0]; + smearedTrackState.phi = smeared_param[1]; + smearedTrackState.omega = smeared_param[2]; + smearedTrackState.Z0 = smeared_param[3]; + smearedTrackState.tanLambda = smeared_param[4]; + } else { + result.emplace_back(trackState); + continue; + } + + // transform rescaled cov matrix from Delphes convention to EDM4hep + // convention + std::array covMatrix_edm4hep = + FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackCovMatrix(Cov, false); + + smearedTrackState.covMatrix = covMatrix_edm4hep; + + if (m_debug) { + std::cout << std::endl + << "Original track " << trackState.D0 << " " << trackState.phi << " " + << trackState.omega << " " << trackState.Z0 << " " << trackState.tanLambda + << std::endl; + std::cout << "Smeared track " << smearedTrackState.D0 << " " + << smearedTrackState.phi << " " << smearedTrackState.omega << " " + << smearedTrackState.Z0 << " " << smearedTrackState.tanLambda + << std::endl; + std::cout << "MC particle " << mcTrackParam_edm4hep[0] << " " + << mcTrackParam_edm4hep[1] << " " << mcTrackParam_edm4hep[2] + << " " << mcTrackParam_edm4hep[3] << " " + << mcTrackParam_edm4hep[4] << std::endl; + for (int j = 0; j < 15; j++) + std::cout << "smeared cov matrix(" << j + << "): " << smearedTrackState.covMatrix[j] << ", scale factor: " + << smearedTrackState.covMatrix[j] / trackState.covMatrix[j] + << std::endl; + } + /* + unsigned int recoIndex = assoc.getRec().getObjectID().index; + unsigned int mcIndex = assoc.getSim().getObjectID().index; + std::cout << "Reco: " << recoIndex << " ---> MC: " << mcIndex << std::endl; + */ + + result.emplace_back(smearedTrackState); + } + + return result; +} + + + + +int main(int argc, char *argv[]) { + // auto verbosity = ROOT::Experimental::RLogScopedVerbosity(ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + bool success = gInterpreter->Declare("#include \"edm4hep/TrackState.h\""); + if (!success) { + std::cerr << "ERROR: Unable to find edm4hep::TrackState header file!" + << std::endl; + return EXIT_FAILURE; + } + + int nCPU = 4; + if (argc > 1) { + nCPU = atoi(argv[1]); + } + + std::vector filePathList; + std::string filePathBase = "/home/jsmiesko/source/FCCAnalyses/inputs/"; + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_10.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_11.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_12.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_1.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_2.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_3.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_4.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_5.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_6.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_7.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_8.edm4hep.root"); + filePathList.emplace_back(filePathBase + "p8_ee_WW_ecm240/p8_ee_WW_ecm240_9.edm4hep.root"); + + ROOT::EnableImplicitMT(nCPU); + + ROOT::RDataFrame rdf(std::make_unique(filePathList)); + + // rdf.Describe().Print(); + std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = rdf.Define( + "SmearedTracks", + SmearedTracks(2.0, 2.0, 2.0, 2.0, 2.0, false), + {"MCRecoAssociations"} + ); + + auto rdf3 = rdf2.Define("smearTrack_omega", "return SmearedTracks.empty() ? -1 : SmearedTracks.at(0).omega;"); + auto h_smeared_tracks_omega = rdf3.Histo1D("smearTrack_omega"); + + + h_smeared_tracks_omega->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_smeared_tracks_omega->Draw(); + canvas->Print("source_smeared_tracks_omega.pdf"); + + return EXIT_SUCCESS; +}