diff --git a/CMakeLists.txt b/CMakeLists.txt index 85a744c7b..700a271db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -200,6 +200,9 @@ if(BUILD_TESTING) include(cmake/podioTest.cmake) add_subdirectory(tests) endif() + +find_package(fmt REQUIRED) + add_subdirectory(tools) add_subdirectory(python) diff --git a/include/podio/utilities/MiscHelpers.h b/include/podio/utilities/MiscHelpers.h new file mode 100644 index 000000000..9d4eac4cf --- /dev/null +++ b/include/podio/utilities/MiscHelpers.h @@ -0,0 +1,30 @@ +#ifndef PODIO_UTILITIES_MISCHELPERS_H +#define PODIO_UTILITIES_MISCHELPERS_H + +#include +#include +#include + +namespace podio::utils { + +/** + * Sort the input vector of strings alphabetically, case insensitive. + */ +inline std::vector sortAlphabeticaly(std::vector strings) { + // Obviously there is no tolower(std::string) in c++, so this is slightly more + // involved and we make use of the fact that lexicographical_compare works on + // ranges and the fact that we can feed it a dedicated comparison function, + // where we convert the strings to lower case char-by-char. The alternative is + // to make string copies inside the first lambda, transform them to lowercase + // and then use operator< of std::string, which would be effectively + // hand-writing what is happening below. + std::sort(strings.begin(), strings.end(), [](const auto& lhs, const auto& rhs) { + return std::lexicographical_compare( + lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), + [](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); }); + }); + return strings; +} +} // namespace podio::utils + +#endif // PODIO_UTILITIES_MISCHELPERS_H diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 73c5db22d..78e79b84c 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -63,7 +63,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat const bool new_category = (catInfo.writer == nullptr); if (new_category) { // This is the minimal information that we need for now - catInfo.names = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.names = podio::utils::sortAlphabeticaly(collsToWrite); } std::vector collections; diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 21e2998db..c4a3bc30f 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -33,7 +33,7 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego // been initialized if (catInfo.tree == nullptr) { catInfo.idTable = frame.getCollectionIDTableForWrite(); - catInfo.collsToWrite = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.collsToWrite = podio::utils::sortAlphabeticaly(collsToWrite); catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str()); catInfo.tree->SetDirectory(m_file.get()); } diff --git a/src/rootUtils.h b/src/rootUtils.h index 007e4e202..60c35f372 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -2,6 +2,7 @@ #define PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy #include "podio/CollectionIDTable.h" +#include "podio/utilities/MiscHelpers.h" #include "podio/utilities/RootHelpers.h" #include "TBranch.h" @@ -279,25 +280,6 @@ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable return collInfo; } -/** - * Sort the input vector of strings alphabetically, case insensitive. - */ -inline std::vector sortAlphabeticaly(std::vector strings) { - // Obviously there is no tolower(std::string) in c++, so this is slightly more - // involved and we make use of the fact that lexicographical_compare works on - // ranges and the fact that we can feed it a dedicated comparison function, - // where we convert the strings to lower case char-by-char. The alternative is - // to make string copies inside the first lambda, transform them to lowercase - // and then use operator< of std::string, which would be effectively - // hand-writing what is happening below. - std::sort(strings.begin(), strings.end(), [](const auto& lhs, const auto& rhs) { - return std::lexicographical_compare( - lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), - [](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); }); - }); - return strings; -} - /** * Check whether existingColls and candidateColls both contain the same * collection names. Returns false if the two vectors differ in content. Inputs diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index cbacb59a2..4ea23f8d2 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,3 +1,7 @@ +add_executable(podio-dump-cpp src/podio-dump.cpp) +target_link_libraries(podio-dump-cpp PRIVATE podio::podio podio::podioIO fmt::fmt) + + install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-vis DESTINATION ${CMAKE_INSTALL_BINDIR}) if(ENABLE_RNTUPLE) diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h new file mode 100644 index 000000000..7e133012c --- /dev/null +++ b/tools/src/argparseUtils.h @@ -0,0 +1,54 @@ +#ifndef PODIO_TOOLS_ARGPARSEUTILS_H +#define PODIO_TOOLS_ARGPARSEUTILS_H + +#include +#include +#include +#include +#include + +template +auto findFlags(const std::vector& argv, Flags... flags) { + return std::find_if(argv.begin(), argv.end(), [&](const auto& elem) { return ((elem == flags) || ...); }); +} + +inline std::vector splitString(const std::string& str, const char delimiter) { + std::vector tokens; + std::string token; + for (char ch : str) { + if (ch == delimiter) { + if (!token.empty()) { + tokens.push_back(token); + token.clear(); + } + } else { + token += ch; + } + } + if (!token.empty()) { + tokens.push_back(token); + } + return tokens; +} + +inline size_t parseSizeOrExit(const std::string& str) { + const auto parseError = [&str]() { + std::cerr << "'" << str << "' cannot be parsed into an integer number" << std::endl; + std::exit(1); + }; + + try { + std::size_t pos{}; + auto number = std::stoll(str, &pos); + if (pos != str.size()) { + throw std::invalid_argument(""); + } + return number; + } catch (const std::invalid_argument& err) { + parseError(); + } + + return -1; +} + +#endif // PODIO_TOOLS_ARGPARSEUTILS_H diff --git a/tools/src/podio-dump.cpp b/tools/src/podio-dump.cpp new file mode 100644 index 000000000..9220eb717 --- /dev/null +++ b/tools/src/podio-dump.cpp @@ -0,0 +1,239 @@ +#include "argparseUtils.h" +#include "tabulate.h" + +#include "podio/Frame.h" +#include "podio/Reader.h" +#include "podio/podioVersion.h" +#include "podio/utilities/MiscHelpers.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +template <> +struct fmt::formatter : ostream_formatter {}; + +struct ParsedArgs { + std::string inputFile{}; + std::string category{"events"}; + std::vector events = std::vector(1, 0); + bool detailed{false}; +}; + +constexpr auto usageMsg = R"(usage: podio-dump [-h] [-c CATEGORY] [-e ENTRIES] [-d] [--version] inputfile)"; + +constexpr auto helpMsg = R"( +Dump contents of a podio file to stdout + +positional arguments: + inputfile Name of the file to dump content from + +options: + -h, --help show this help message and exit + -c CATEGORY, --category CATEGORY + Which Frame category to dump + -e ENTRIES, --entries ENTRIES + Which entries to print. A single number, comma separated list of numbers or "first:last" for an inclusive range of entries. Defaults to the first entry. + -d, --detailed Dump the full contents not just the collection info + --version show program's version number and exit +)"; + +void printUsageAndExit() { + fmt::print(stderr, "{}\n", usageMsg); + std::exit(1); +} + +auto getArgumentValueOrExit(const std::vector& argv, std::vector::const_iterator it) { + const int argc = argv.size(); + const auto index = std::distance(argv.begin(), it); + if (index > argc - 2) { + printUsageAndExit(); + } + return argv[index + 1]; +} + +std::vector parseEventRange(const std::string& evtRange) { + const auto splitRange = splitString(evtRange, ','); + const auto parseError = [&evtRange]() { + fmt::print(stderr, "'{}' canot be parsed into a list of entries\n", evtRange); + std::exit(1); + }; + + if (splitRange.size() == 1) { + const auto colonSplitRange = splitString(evtRange, ':'); + if (colonSplitRange.size() == 1) { + return {parseSizeOrExit(splitRange[0])}; + } else if (colonSplitRange.size() == 2) { + // we have two numbers signifying an inclusive range + const auto start = parseSizeOrExit(colonSplitRange[0]); + const auto end = parseSizeOrExit(colonSplitRange[1]); + std::vector events(end - start + 1); + std::iota(events.begin(), events.end(), start); + return events; + } else { + parseError(); + } + } else { + std::vector events; + events.reserve(splitRange.size()); + std::transform(splitRange.begin(), splitRange.end(), std::back_inserter(events), + [](const auto& elem) { return parseSizeOrExit(elem); }); + + return events; + } + + parseError(); + return {}; +} + +ParsedArgs parseArgs(std::vector argv) { + // find help or version + if (const auto it = findFlags(argv, "-h", "--help", "--version"); it != argv.end()) { + if (*it == "--version") { + fmt::print("podio {}\n", podio::version::build_version); + } else { + fmt::print("{}\n{}", usageMsg, helpMsg); + } + std::exit(0); + } + + ParsedArgs args; + // detailed flag + if (const auto it = findFlags(argv, "-d", "--detailed"); it != argv.end()) { + args.detailed = true; + argv.erase(it); + } + // category + if (const auto it = findFlags(argv, "-c", "--category"); it != argv.end()) { + args.category = getArgumentValueOrExit(argv, it); + argv.erase(it, it + 2); + } + // event range + if (const auto it = findFlags(argv, "-e", "--events"); it != argv.end()) { + args.events = parseEventRange(*(it + 1)); + argv.erase(it, it + 2); + } + + if (argv.size() != 1) { + printUsageAndExit(); + } + args.inputFile = argv[0]; + + return args; +} + +template +std::string getTypeString() { + if constexpr (std::is_same_v) { + return "int"; + } else if constexpr (std::is_same_v) { + return "float"; + } else if constexpr (std::is_same_v) { + return "double"; + } else if constexpr (std::is_same_v) { + return "std::string"; + } + + return "unknown"; +} + +template +void getParameterOverview(const podio::Frame& frame, std::vector>& rows) { + const auto typeString = getTypeString(); + for (const auto& parKey : podio::utils::sortAlphabeticaly(frame.getParameterKeys())) { + rows.emplace_back(parKey, typeString, frame.getParameter>(parKey)->size()); + } +} + +void printFrameOverview(const podio::Frame& frame) { + fmt::print("Collections:\n"); + const auto collNames = frame.getAvailableCollections(); + + std::vector> rows; + rows.reserve(collNames.size()); + + for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { + const auto coll = frame.get(name); + rows.emplace_back(name, coll->getValueTypeName(), coll->size(), fmt::format("{:0>8x}", coll->getID())); + } + printTable(rows, {"Name", "ValueType", "Size", "ID"}); + + fmt::print("\nParameters:\n"); + std::vector> paramRows{}; + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + + printTable(paramRows, {"Name", "Type", "Elements"}); +} + +template +void print_flush(fmt::format_string fmtstr, Args&&... args) { + fmt::print(fmtstr, std::forward(args)...); + std::fflush(stdout); +} + +void printFrameDetailed(const podio::Frame& frame) { + fmt::print("Collections:\n"); + const auto collNames = frame.getAvailableCollections(); + for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { + const auto coll = frame.get(name); + print_flush("{}\n", name); + coll->print(); + print_flush("\n"); + } + + print_flush("\nParameters\n:"); + frame.getParameters().print(); + print_flush("\n"); +} + +void printGeneralInfo(const podio::Reader& reader, const std::string& filename) { + fmt::print("input file: {}\n", filename); + fmt::print("datamodel model definitions stored in this file: {}\n\n", reader.getAvailableDatamodels()); + + std::vector> rows{}; + for (const auto& cat : reader.getAvailableCategories()) { + rows.emplace_back(cat, reader.getEntries(std::string(cat))); + } + fmt::print("Frame categories in this file:\nName\tEntries\n"); + printTable(rows, {"Name", "Entries"}); +} + +void printFrame(const podio::Frame& frame, const std::string& category, size_t iEntry, bool detailed) { + fmt::print("{:#^82}\n", fmt::format(" {}: {} ", category, iEntry)); + if (detailed) { + printFrameDetailed(frame); + } else { + printFrameOverview(frame); + } +} + +int main(int argc, char* argv[]) { + // We strip the executable name off directly for parsing + const auto args = parseArgs({argv + 1, argv + argc}); + + auto reader = podio::makeReader(args.inputFile); + + printGeneralInfo(reader, args.inputFile); + + for (const auto event : args.events) { + try { + const auto& frame = reader.readFrame(args.category, event); + printFrame(frame, args.category, event, args.detailed); + } catch (std::runtime_error& err) { + fmt::print(stderr, "{}\n", err.what()); + return 1; + } + } + + return 0; +} diff --git a/tools/src/tabulate.h b/tools/src/tabulate.h new file mode 100644 index 000000000..8de76e7dd --- /dev/null +++ b/tools/src/tabulate.h @@ -0,0 +1,58 @@ +#include + +#include +#include +#include +#include +#include +#include + +template +void printTable(const std::vector>& rows, const std::vector& headers) { + // Simply assume that all rows have the same widths + const auto nCols = headers.size(); + constexpr auto nColsFromRows = std::tuple_size_v>; + if (nCols != nColsFromRows) { + throw std::invalid_argument("headers and rows have to have the same number of columns"); + } + + // Transform all elements into strings first to determine column widths + std::vector> stringRows; + stringRows.reserve(rows.size()); + std::transform(rows.begin(), rows.end(), std::back_inserter(stringRows), [&nCols](const auto& elem) { + std::vector strs; + strs.reserve(nCols); + std::apply([&strs](auto&&... args) { (strs.emplace_back(fmt::format("{}", args)), ...); }, elem); + return strs; + }); + + // First figure out how large each column has to be to fit all the content + std::vector colWidths(nCols, 0); + for (size_t i = 0; i < nCols; ++i) { + colWidths[i] = headers[i].size(); + } + for (const auto& row : stringRows) { + for (size_t iCol = 0; iCol < nCols; ++iCol) { + colWidths[iCol] = std::max(row[iCol].size(), colWidths[iCol]); + } + } + + // print the table header + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:<{}} ", headers[iCol], colWidths[iCol]); + } + fmt::print("\n"); + std::cout << '\n'; + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:->{}} ", "", colWidths[iCol]); + } + fmt::print("\n"); + + // and the contents + for (const auto& row : stringRows) { + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:<{}} ", row[iCol], colWidths[iCol]); + } + fmt::print("\n"); + } +}