From 772a75b6c11dedbbe663f9acde4d4b9754a6b3d7 Mon Sep 17 00:00:00 2001 From: Foster Brereton Date: Mon, 17 Jun 2024 08:57:46 -0700 Subject: [PATCH] Improved typedef handling (#85) * Improvement to handle anonymous typedefs * adding fix for issue 84 * adding test case for anonymous struct conflict * adding a special case for `objc_object` --- include/orc/hash.hpp | 2 +- include/orc/orc.hpp | 2 + src/dwarf.cpp | 98 +++++++++++++++---- src/main.cpp | 24 +++-- src/orc.cpp | 10 ++ test/battery/typedef_anonymous_struct/a.cpp | 4 + test/battery/typedef_anonymous_struct/b.cpp | 4 + .../typedef_anonymous_struct/odrv_test.toml | 9 ++ test/src/main.cpp | 14 ++- 9 files changed, 132 insertions(+), 35 deletions(-) create mode 100644 test/battery/typedef_anonymous_struct/a.cpp create mode 100644 test/battery/typedef_anonymous_struct/b.cpp create mode 100644 test/battery/typedef_anonymous_struct/odrv_test.toml diff --git a/include/orc/hash.hpp b/include/orc/hash.hpp index f018b17..caf6ae7 100644 --- a/include/orc/hash.hpp +++ b/include/orc/hash.hpp @@ -37,7 +37,7 @@ inline std::size_t hash_combine(std::size_t seed, const T& x) { // This is the terminating hash_combine call when there's only one item left to hash into the // seed. It also serves as the combiner the other routine variant uses to generate its new // seed. - return (seed ^ x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed ^ (x + 0x9e3779b9 + (seed << 6) + (seed >> 2)); } template diff --git a/include/orc/orc.hpp b/include/orc/orc.hpp index 1f9729d..65fcb85 100644 --- a/include/orc/orc.hpp +++ b/include/orc/orc.hpp @@ -63,6 +63,8 @@ void register_dies(dies die_vector); std::string to_json(const std::vector&); +std::string version_json(); + } // namespace orc void orc_reset(); diff --git a/src/dwarf.cpp b/src/dwarf.cpp index e6b56d0..4273d82 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -341,16 +341,15 @@ void line_header::read(freader& s, bool needs_byteswap) { } /**************************************************************************************************/ - -std::size_t fatal_attribute_hash(const attribute_sequence& attributes) { -#if ORC_FEATURE(PROFILE_DIE_DETAILS) - ZoneScoped; -#endif // ORC_FEATURE(PROFILE_DIE_DETAILS) - - // We only hash the attributes that could contribute to an ODRV. We also sort that set of - // attributes by name to make sure the hashing is consistent regardless of attribute order. - constexpr const std::size_t max_names_k{32}; - std::array names{dw::at::none}; +// It is fixed to keep allocations from happening. +constexpr std::size_t max_names_k{32}; +using fixed_attribute_array = std::array; + +// for an incoming set of arbitrary attributes, return the subset of those that are fatal. +// We also sort the set of attributes by name to make sure subsequent traversal of this set +// is consistent. +fixed_attribute_array fatal_attributes_within(const attribute_sequence& attributes) { + fixed_attribute_array names{dw::at::none}; std::size_t count{0}; for (const auto& attr : attributes) { @@ -362,17 +361,32 @@ std::size_t fatal_attribute_hash(const attribute_sequence& attributes) { } std::sort(&names[0], &names[count]); + return names; +} + +/**************************************************************************************************/ + +std::size_t fatal_attribute_hash(const attribute_sequence& attributes) { +#if ORC_FEATURE(PROFILE_DIE_DETAILS) + ZoneScoped; +#endif // ORC_FEATURE(PROFILE_DIE_DETAILS) + + // We only hash the attributes that could contribute to an ODRV. That's what makes them "fatal" + std::size_t h{0}; - for (std::size_t i{0}; i < count; ++i) { + for (auto name : fatal_attributes_within(attributes)) { + // As soon as we hit our first no-name-attribute, we're done. + if (name == dw::at::none) break; + // If this assert fires, it means an attribute's value was passed over during evaluation, // but it was necessary for ODRV evaluation after all. The fix is to improve the attribute // form evaluation engine such that this attribute's value is no longer passed over. - const auto name = names[i]; const auto& attribute = attributes.get(name); assert(!attributes.has(name, attribute_value::type::passover)); - const auto attribute_hash = attribute._value.hash(); - h = orc::hash_combine(h, attribute_hash); + + const auto hash = attribute._value.hash(); + h = orc::hash_combine(h, hash); } return h; @@ -474,6 +488,7 @@ struct dwarf::implementation { std::vector _decl_files; std::unordered_map _type_cache; std::unordered_map _debug_str_cache; + pool_string _last_typedef_name; // for unnamed structs - see https://github.com/adobe/orc/issues/84 cu_header _cu_header; std::size_t _cu_header_offset{0}; // offset of the compilation unit header. Relative to __debug_info. std::size_t _cu_die_offset{0}; // offset of the `compile_unit` die. Relative to start of `debug_info` @@ -1380,6 +1395,18 @@ pool_string dwarf::implementation::resolve_type(attribute type) { result = empool("const " + recurse(attributes).allocate_string()); } else if (die._tag == dw::tag::pointer_type) { result = empool(recurse(attributes).allocate_string() + "*"); + } else if (die._tag == dw::tag::typedef_) { + if (const auto maybe_type = recurse(attributes)) { + result = maybe_type; + } else if (attributes.has_string(dw::at::name)) { + result = attributes.string(dw::at::name); + } else { + // `result` will be empty if we hit this in release + // builds. It's bad, but not UB, so the program can + // continue from here (though the results will be + // untrustworthy). + assert(!"Got a typedef with no name?"); + } } else if (attributes.has_string(dw::at::type)) { result = type.string(); } else if (attributes.has_reference(dw::at::type)) { @@ -1416,6 +1443,7 @@ die_pair dwarf::implementation::abbreviation_to_die(std::size_t die_address, pro die._tag = a._tag; die._has_children = a._has_children; + // Can we get rid of this memory allocation? This happens a lot... attributes.reserve(a._attributes.size()); std::transform(a._attributes.begin(), a._attributes.end(), std::back_inserter(attributes), @@ -1425,8 +1453,9 @@ die_pair dwarf::implementation::abbreviation_to_die(std::size_t die_address, pro }); if (mode == process_mode::complete) { + // These statements must be kept in sync with the ones dealing with issue 84 below. + // See https://github.com/adobe/orc/issues/84 path_identifier_set(die_identifier(die, attributes)); - die._path = empool(std::string_view(qualified_symbol_name(die, attributes))); } @@ -1507,6 +1536,16 @@ bool dwarf::implementation::skip_die(die& d, const attribute_sequence& attribute return true; } + // There are some symbols that are owned by the compiler and/or OS vendor (read: Apple) + // that have been observed to conflict. We skip them for our purposes, as we have no + // control over them. + if (d._path.view().find("::[u]::objc_object") == 0) { +#if ORC_FEATURE(PROFILE_DIE_DETAILS) + ZoneTextL("skipping: vendor- or compiler-defined symbol"); +#endif // ORC_FEATURE(PROFILE_DIE_DETAILS) + return true; + } + // lambdas are ephemeral and can't cause (hopefully) an ODRV if (d._path.view().find("lambda") != std::string::npos) { #if ORC_FEATURE(PROFILE_DIE_DETAILS) @@ -1652,12 +1691,37 @@ void dwarf::implementation::process_all_dies() { } } + post_process_die_attributes(attributes); + + // See https://github.com/adobe/orc/issues/84 + // This code accounts for unnamed structs that are part of + // a typedef expression. In such case the typedef actually adds a name + // for the purpose of linking, but the structure's + // die does not contain the name. Furthermore, the typedef die has + // been found to precede the die for the structure. So we grab it if + // we find a typedef, and use it if an ensuing structure_type has no name. + if (die._tag == dw::tag::typedef_ && attributes.has(dw::at::name)) { + _last_typedef_name = attributes.get(dw::at::name).string(); + } else if (die._tag == dw::tag::structure_type && + !attributes.has(dw::at::name) && + _last_typedef_name) { + attribute name; + name._name = dw::at::name; + name._form = dw::form::strp; + name._value.string(_last_typedef_name); + attributes.push_back(name); + + // These two statements must be in sync with the ones in `abbreviation_to_die` + path_identifier_set(die_identifier(die, attributes)); + die._path = empool(std::string_view(qualified_symbol_name(die, attributes))); + + _last_typedef_name = pool_string(); // reset it to avoid misuse + } + if (die._has_children) { path_identifier_push(); } - post_process_die_attributes(attributes); - die._skippable = skip_die(die, attributes); die._ofd_index = _ofd_index; die._hash = die_hash(die, attributes); diff --git a/src/main.cpp b/src/main.cpp index 528d596..60031e1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -451,13 +451,19 @@ auto epilogue(bool exception) { const auto& g = globals::instance(); if (g._object_file_count == 0) { - cout_safe([&](auto& s) { - const auto local_build = ORC_VERSION_STR() == std::string("local"); - const std::string tag_url = local_build ? "" : std::string(" (https://github.com/adobe/orc/releases/tag/") + ORC_VERSION_STR() + ")"; - s << "ORC (https://github.com/adobe/orc)\n"; - s << " version: " << ORC_VERSION_STR() << tag_url << '\n'; - s << " sha: " << ORC_SHA_STR() << '\n'; - }); + if (settings::instance()._output_file_mode == settings::output_file_mode::json) { + cout_safe([](auto& s){ + s << orc::version_json() << '\n'; + }); + } else { + cout_safe([&](auto& s) { + const auto local_build = ORC_VERSION_STR() == std::string("local"); + const std::string tag_url = local_build ? "" : std::string(" (https://github.com/adobe/orc/releases/tag/") + ORC_VERSION_STR() + ")"; + s << "ORC (https://github.com/adobe/orc)\n"; + s << " version: " << ORC_VERSION_STR() << tag_url << '\n'; + s << " sha: " << ORC_SHA_STR() << '\n'; + }); + } } else if (log_level_at_least(settings::log_level::warning)) { // Make sure these values are in sync with the `synopsis` json blob in `to_json`. cout_safe([&](auto& s) { @@ -510,9 +516,9 @@ void maybe_forward_to_linker(int argc, char** argv, const cmdline_results& cmdli } else if (cmdline._libtool_mode) { executable_path /= "libtool"; } else { - if (log_level_at_least(settings::log_level::warning)) { + if (log_level_at_least(settings::log_level::verbose)) { cout_safe([&](auto& s) { - s << "warning: libtool/ld mode could not be derived; forwarding to linker disabled\n"; + s << "verbose: libtool/ld mode could not be derived; forwarding to linker disabled\n"; }); } diff --git a/src/orc.cpp b/src/orc.cpp index cbf4a9a..9ccf765 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -46,6 +46,7 @@ #include "orc/str.hpp" #include "orc/string_pool.hpp" #include "orc/tracy.hpp" +#include "orc/version.hpp" /**************************************************************************************************/ @@ -702,6 +703,15 @@ std::string to_json(const std::vector& reports) { return result.dump(spaces_k); } +std::string version_json() { + nlohmann::json result; + + result["version"] = ORC_VERSION_STR(); + result["sha256"] = ORC_SHA_STR(); + + return result.dump(); +} + /**************************************************************************************************/ } // namespace orc diff --git a/test/battery/typedef_anonymous_struct/a.cpp b/test/battery/typedef_anonymous_struct/a.cpp new file mode 100644 index 0000000..def3ad5 --- /dev/null +++ b/test/battery/typedef_anonymous_struct/a.cpp @@ -0,0 +1,4 @@ +typedef struct { double _d; } S; + +// Required so the compiler generates a symbol. +int getd(S s) { return s._d; } diff --git a/test/battery/typedef_anonymous_struct/b.cpp b/test/battery/typedef_anonymous_struct/b.cpp new file mode 100644 index 0000000..2296da3 --- /dev/null +++ b/test/battery/typedef_anonymous_struct/b.cpp @@ -0,0 +1,4 @@ +typedef struct {int _i;} S; + +// Required so the compiler generates a symbol. +int geti(S s) { return s._i; } diff --git a/test/battery/typedef_anonymous_struct/odrv_test.toml b/test/battery/typedef_anonymous_struct/odrv_test.toml new file mode 100644 index 0000000..f7036de --- /dev/null +++ b/test/battery/typedef_anonymous_struct/odrv_test.toml @@ -0,0 +1,9 @@ +[[source]] + path = "a.cpp" + +[[source]] + path = "b.cpp" + +[[odrv]] + category = "structure:byte_size" + symbol = "S" diff --git a/test/src/main.cpp b/test/src/main.cpp index e095310..06a3b29 100644 --- a/test/src/main.cpp +++ b/test/src/main.cpp @@ -504,19 +504,17 @@ void run_battery_test(const std::filesystem::path& home) { /**************************************************************************************************/ -void traverse_directory_tree(std::filesystem::path& directory) { +void traverse_directory_tree(const std::filesystem::path& directory) { assert(is_directory(directory)); + if (exists(directory / tomlname_k)) { + run_battery_test(directory); + } + for (const auto& entry : std::filesystem::directory_iterator(directory)) { try { if (is_directory(entry)) { - std::filesystem::path path = entry.path(); - - if (exists(path / tomlname_k)) { - run_battery_test(path); - } - - traverse_directory_tree(path); + traverse_directory_tree(entry.path()); } } catch (...) { console_error() << "\nIn battery " << entry.path() << ":";