Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add a c++ implementation for podio-dump #620

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ if(BUILD_TESTING)
include(cmake/podioTest.cmake)
add_subdirectory(tests)
endif()

find_package(fmt REQUIRED)

add_subdirectory(tools)
add_subdirectory(python)

Expand Down
30 changes: 30 additions & 0 deletions include/podio/utilities/MiscHelpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef PODIO_UTILITIES_MISCHELPERS_H
#define PODIO_UTILITIES_MISCHELPERS_H

#include <algorithm>
#include <string>
#include <vector>

namespace podio::utils {

/**
* Sort the input vector of strings alphabetically, case insensitive.
*/
inline std::vector<std::string> sortAlphabeticaly(std::vector<std::string> strings) {
// Obviously there is no tolower(std::string) in c++, so this is slightly more
// involved and we make use of the fact that lexicographical_compare works on
// ranges and the fact that we can feed it a dedicated comparison function,
// where we convert the strings to lower case char-by-char. The alternative is
// to make string copies inside the first lambda, transform them to lowercase
// and then use operator< of std::string, which would be effectively
// hand-writing what is happening below.
std::sort(strings.begin(), strings.end(), [](const auto& lhs, const auto& rhs) {
return std::lexicographical_compare(
lhs.begin(), lhs.end(), rhs.begin(), rhs.end(),
[](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); });
});
return strings;
}
} // namespace podio::utils

#endif // PODIO_UTILITIES_MISCHELPERS_H
2 changes: 1 addition & 1 deletion src/RNTupleWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat
const bool new_category = (catInfo.writer == nullptr);
if (new_category) {
// This is the minimal information that we need for now
catInfo.names = root_utils::sortAlphabeticaly(collsToWrite);
catInfo.names = podio::utils::sortAlphabeticaly(collsToWrite);
}

std::vector<root_utils::StoreCollection> collections;
Expand Down
2 changes: 1 addition & 1 deletion src/ROOTWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego
// been initialized
if (catInfo.tree == nullptr) {
catInfo.idTable = frame.getCollectionIDTableForWrite();
catInfo.collsToWrite = root_utils::sortAlphabeticaly(collsToWrite);
catInfo.collsToWrite = podio::utils::sortAlphabeticaly(collsToWrite);
catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str());
catInfo.tree->SetDirectory(m_file.get());
}
Expand Down
20 changes: 1 addition & 19 deletions src/rootUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy

#include "podio/CollectionIDTable.h"
#include "podio/utilities/MiscHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TBranch.h"
Expand Down Expand Up @@ -279,25 +280,6 @@ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable
return collInfo;
}

/**
* Sort the input vector of strings alphabetically, case insensitive.
*/
inline std::vector<std::string> sortAlphabeticaly(std::vector<std::string> strings) {
// Obviously there is no tolower(std::string) in c++, so this is slightly more
// involved and we make use of the fact that lexicographical_compare works on
// ranges and the fact that we can feed it a dedicated comparison function,
// where we convert the strings to lower case char-by-char. The alternative is
// to make string copies inside the first lambda, transform them to lowercase
// and then use operator< of std::string, which would be effectively
// hand-writing what is happening below.
std::sort(strings.begin(), strings.end(), [](const auto& lhs, const auto& rhs) {
return std::lexicographical_compare(
lhs.begin(), lhs.end(), rhs.begin(), rhs.end(),
[](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); });
});
return strings;
}

/**
* Check whether existingColls and candidateColls both contain the same
* collection names. Returns false if the two vectors differ in content. Inputs
Expand Down
4 changes: 4 additions & 0 deletions tools/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
add_executable(podio-dump-cpp src/podio-dump.cpp)
target_link_libraries(podio-dump-cpp PRIVATE podio::podio podio::podioIO fmt::fmt)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executable needs to be installed.


install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR})
install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-vis DESTINATION ${CMAKE_INSTALL_BINDIR})
if(ENABLE_RNTUPLE)
Expand Down
54 changes: 54 additions & 0 deletions tools/src/argparseUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#ifndef PODIO_TOOLS_ARGPARSEUTILS_H
#define PODIO_TOOLS_ARGPARSEUTILS_H

#include <algorithm>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>

template <typename... Flags>
auto findFlags(const std::vector<std::string>& argv, Flags... flags) {
return std::find_if(argv.begin(), argv.end(), [&](const auto& elem) { return ((elem == flags) || ...); });
}

inline std::vector<std::string> splitString(const std::string& str, const char delimiter) {
std::vector<std::string> tokens;
std::string token;
for (char ch : str) {
if (ch == delimiter) {
if (!token.empty()) {
tokens.push_back(token);
token.clear();
}
} else {
token += ch;
}
}
if (!token.empty()) {
tokens.push_back(token);
}
return tokens;
}

inline size_t parseSizeOrExit(const std::string& str) {
const auto parseError = [&str]() {
std::cerr << "'" << str << "' cannot be parsed into an integer number" << std::endl;
std::exit(1);
};

try {
std::size_t pos{};
auto number = std::stoll(str, &pos);
if (pos != str.size()) {
throw std::invalid_argument("");
}
return number;
} catch (const std::invalid_argument& err) {
parseError();
}

return -1;
}

#endif // PODIO_TOOLS_ARGPARSEUTILS_H
239 changes: 239 additions & 0 deletions tools/src/podio-dump.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
#include "argparseUtils.h"
#include "tabulate.h"

#include "podio/Frame.h"
#include "podio/Reader.h"
#include "podio/podioVersion.h"
#include "podio/utilities/MiscHelpers.h"

#include <fmt/core.h>
#include <fmt/ostream.h>
#include <fmt/ranges.h>

#include <algorithm>
#include <iostream>
#include <iterator>
#include <numeric>
#include <string>
#include <tuple>

template <>
struct fmt::formatter<podio::version::Version> : ostream_formatter {};

struct ParsedArgs {
std::string inputFile{};
std::string category{"events"};
std::vector<size_t> events = std::vector<size_t>(1, 0);
bool detailed{false};
};

constexpr auto usageMsg = R"(usage: podio-dump [-h] [-c CATEGORY] [-e ENTRIES] [-d] [--version] inputfile)";

constexpr auto helpMsg = R"(
Dump contents of a podio file to stdout

positional arguments:
inputfile Name of the file to dump content from

options:
-h, --help show this help message and exit
-c CATEGORY, --category CATEGORY
Which Frame category to dump
-e ENTRIES, --entries ENTRIES
Which entries to print. A single number, comma separated list of numbers or "first:last" for an inclusive range of entries. Defaults to the first entry.
-d, --detailed Dump the full contents not just the collection info
--version show program's version number and exit
)";

void printUsageAndExit() {
fmt::print(stderr, "{}\n", usageMsg);
std::exit(1);
}

auto getArgumentValueOrExit(const std::vector<std::string>& argv, std::vector<std::string>::const_iterator it) {
const int argc = argv.size();
const auto index = std::distance(argv.begin(), it);
if (index > argc - 2) {
printUsageAndExit();
}
return argv[index + 1];
}

std::vector<size_t> parseEventRange(const std::string& evtRange) {
const auto splitRange = splitString(evtRange, ',');
const auto parseError = [&evtRange]() {
fmt::print(stderr, "'{}' canot be parsed into a list of entries\n", evtRange);
std::exit(1);
};

if (splitRange.size() == 1) {
const auto colonSplitRange = splitString(evtRange, ':');
if (colonSplitRange.size() == 1) {
return {parseSizeOrExit(splitRange[0])};
} else if (colonSplitRange.size() == 2) {
// we have two numbers signifying an inclusive range
const auto start = parseSizeOrExit(colonSplitRange[0]);
const auto end = parseSizeOrExit(colonSplitRange[1]);
std::vector<size_t> events(end - start + 1);
std::iota(events.begin(), events.end(), start);
return events;
} else {
parseError();
}
} else {
std::vector<size_t> events;
events.reserve(splitRange.size());
std::transform(splitRange.begin(), splitRange.end(), std::back_inserter(events),
[](const auto& elem) { return parseSizeOrExit(elem); });

return events;
}

parseError();
return {};
}

ParsedArgs parseArgs(std::vector<std::string> argv) {
// find help or version
if (const auto it = findFlags(argv, "-h", "--help", "--version"); it != argv.end()) {
if (*it == "--version") {
fmt::print("podio {}\n", podio::version::build_version);
} else {
fmt::print("{}\n{}", usageMsg, helpMsg);
}
std::exit(0);
}

ParsedArgs args;
// detailed flag
if (const auto it = findFlags(argv, "-d", "--detailed"); it != argv.end()) {
args.detailed = true;
argv.erase(it);
}
// category
if (const auto it = findFlags(argv, "-c", "--category"); it != argv.end()) {
args.category = getArgumentValueOrExit(argv, it);
argv.erase(it, it + 2);
}
// event range
if (const auto it = findFlags(argv, "-e", "--events"); it != argv.end()) {
args.events = parseEventRange(*(it + 1));
argv.erase(it, it + 2);
}

if (argv.size() != 1) {
printUsageAndExit();
}
args.inputFile = argv[0];

return args;
}

template <typename T>
std::string getTypeString() {
if constexpr (std::is_same_v<T, int>) {
return "int";
} else if constexpr (std::is_same_v<T, float>) {
return "float";
} else if constexpr (std::is_same_v<T, double>) {
return "double";
} else if constexpr (std::is_same_v<T, std::string>) {
return "std::string";
}

return "unknown";
}

template <typename T>
void getParameterOverview(const podio::Frame& frame, std::vector<std::tuple<std::string, std::string, size_t>>& rows) {
const auto typeString = getTypeString<T>();
for (const auto& parKey : podio::utils::sortAlphabeticaly(frame.getParameterKeys<T>())) {
rows.emplace_back(parKey, typeString, frame.getParameter<std::vector<T>>(parKey)->size());
}
}

void printFrameOverview(const podio::Frame& frame) {
fmt::print("Collections:\n");
const auto collNames = frame.getAvailableCollections();

std::vector<std::tuple<std::string, std::string_view, size_t, std::string>> rows;
rows.reserve(collNames.size());

for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) {
const auto coll = frame.get(name);
rows.emplace_back(name, coll->getValueTypeName(), coll->size(), fmt::format("{:0>8x}", coll->getID()));
}
printTable(rows, {"Name", "ValueType", "Size", "ID"});

fmt::print("\nParameters:\n");
std::vector<std::tuple<std::string, std::string, size_t>> paramRows{};
getParameterOverview<int>(frame, paramRows);
getParameterOverview<float>(frame, paramRows);
getParameterOverview<double>(frame, paramRows);
getParameterOverview<std::string>(frame, paramRows);

printTable(paramRows, {"Name", "Type", "Elements"});
}

template <typename... Args>
void print_flush(fmt::format_string<Args...> fmtstr, Args&&... args) {
fmt::print(fmtstr, std::forward<Args>(args)...);
std::fflush(stdout);
}

void printFrameDetailed(const podio::Frame& frame) {
fmt::print("Collections:\n");
const auto collNames = frame.getAvailableCollections();
for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) {
const auto coll = frame.get(name);
print_flush("{}\n", name);
coll->print();
print_flush("\n");
}

print_flush("\nParameters\n:");
frame.getParameters().print();
print_flush("\n");
}

void printGeneralInfo(const podio::Reader& reader, const std::string& filename) {
fmt::print("input file: {}\n", filename);
fmt::print("datamodel model definitions stored in this file: {}\n\n", reader.getAvailableDatamodels());

std::vector<std::tuple<std::string, size_t>> rows{};
for (const auto& cat : reader.getAvailableCategories()) {
rows.emplace_back(cat, reader.getEntries(std::string(cat)));
}
fmt::print("Frame categories in this file:\nName\tEntries\n");
printTable(rows, {"Name", "Entries"});
}

void printFrame(const podio::Frame& frame, const std::string& category, size_t iEntry, bool detailed) {
fmt::print("{:#^82}\n", fmt::format(" {}: {} ", category, iEntry));
if (detailed) {
printFrameDetailed(frame);
} else {
printFrameOverview(frame);
}
}

int main(int argc, char* argv[]) {
// We strip the executable name off directly for parsing
const auto args = parseArgs({argv + 1, argv + argc});

auto reader = podio::makeReader(args.inputFile);

printGeneralInfo(reader, args.inputFile);

for (const auto event : args.events) {
try {
const auto& frame = reader.readFrame(args.category, event);
printFrame(frame, args.category, event, args.detailed);
} catch (std::runtime_error& err) {
fmt::print(stderr, "{}\n", err.what());
return 1;
}
}

return 0;
}
Loading
Loading