From e661382056ad695c8ff38ce7eff1993ff6b794d2 Mon Sep 17 00:00:00 2001 From: Sam De Roeck Date: Sat, 1 Feb 2025 00:14:22 -0800 Subject: [PATCH] Enforce use_hash flag for types of structured annotations in schema.thrift Summary: The schema.thrift representation constructed by thrift2ast has the [use_hash](https://fburl.com/code/e8a071lt) flag to convert any URIs or name identifiers to unique hashes. This prevents users from having to create & store an external mapping of identifiers to unique IDs in order to e.g. check for type equality. While the `Annotation` struct itself already hashed its underlying type, all annotation for a definition are also stored in a [map](https://www.internalfb.com/code/fbsource/[4ef64d09cf78d72d3bf29ae2b8c12251272133e2]/fbcode/thrift/lib/thrift/schema.thrift?lines=169), where the string is the URI/name of the type (again). This diff ensures that the `use_hash` flag is also correctly applied here. Reviewed By: iahs Differential Revision: D68911786 fbshipit-source-id: 2041e469956e9e8985fc192c6d41e88a35aceb08 --- .../src/thrift/compiler/sema/schematizer.cc | 38 ++-- .../src/thrift/compiler/sema/schematizer.h | 7 + .../thrift/compiler/test/schematizer_test.cc | 174 ++++++++++++++++-- 3 files changed, 189 insertions(+), 30 deletions(-) diff --git a/third-party/thrift/src/thrift/compiler/sema/schematizer.cc b/third-party/thrift/src/thrift/compiler/sema/schematizer.cc index 67f321067c169c..de178ddb19b1c4 100644 --- a/third-party/thrift/src/thrift/compiler/sema/schematizer.cc +++ b/third-party/thrift/src/thrift/compiler/sema/schematizer.cc @@ -52,15 +52,7 @@ std::unique_ptr val(Enm val) { std::unique_ptr val(std::string_view s) { return val(std::string{s}); } -std::string uri_or_name(const t_named& node) { - if (!node.uri().empty()) { - return node.uri(); - } - if (node.program()) { - return node.program()->scope_name(node); - } - return node.name(); -} + } // namespace t_type_ref schematizer::std_type(std::string_view uri) { @@ -68,15 +60,24 @@ t_type_ref schematizer::std_type(std::string_view uri) { static_cast(scope_.find_by_uri(uri))); } -std::unique_ptr schematizer::type_uri(const t_type& type) { - auto ret = t_const_value::make_map(); +// Returns the `TypeUri` type & the corresponding Uri value for the given node +schematizer::resolved_uri schematizer::calculate_uri(const t_named& node) { if (opts_.use_hash) { - ret->add_map(val("definitionKey"), val(identify_definition(type))); - } else if (!type.uri().empty()) { - ret->add_map(val("uri"), val(type.uri())); - } else { - ret->add_map(val("scopedName"), val(type.get_scoped_name())); + return {"definitionKey", identify_definition(node)}; } + if (!node.uri().empty()) { + return {"uri", node.uri()}; + } + if (node.program()) { + return {"scopedName", node.program()->scope_name(node)}; + } + return {"scopedName", node.name()}; +} + +std::unique_ptr schematizer::type_uri(const t_type& type) { + auto ret = t_const_value::make_map(); + auto uri = calculate_uri(type); + ret->add_map(val(uri.uri_type), val(std::move(uri.value))); ret->set_ttype(std_type("facebook.com/thrift/type/TypeUri")); return ret; } @@ -132,7 +133,10 @@ void schematizer::add_definition( } annot->set_ttype(std_type("facebook.com/thrift/type/Annotation")); - annots->add_map(val(uri_or_name(*item.type())), std::move(annot)); + auto uri = calculate_uri(*item.type()); + // We're ignoring the UriType here, as annotations are stored as + // map. + annots->add_map(val(std::move(uri.value)), std::move(annot)); } // Double write to deprecated externed path (T161963504). diff --git a/third-party/thrift/src/thrift/compiler/sema/schematizer.h b/third-party/thrift/src/thrift/compiler/sema/schematizer.h index 063afa36b17219..89df88040b1440 100644 --- a/third-party/thrift/src/thrift/compiler/sema/schematizer.h +++ b/third-party/thrift/src/thrift/compiler/sema/schematizer.h @@ -127,6 +127,13 @@ class schematizer { intern_func& intern_value); std::string_view program_checksum(const t_program& program); + + struct resolved_uri { + std::string_view uri_type; + std::string value; + }; + + resolved_uri calculate_uri(const t_named& node); }; // Tag for obtaining a compact-encoded schema for the root program via a diff --git a/third-party/thrift/src/thrift/compiler/test/schematizer_test.cc b/third-party/thrift/src/thrift/compiler/test/schematizer_test.cc index aa17d9410df590..93f8ae5d09ad14 100644 --- a/third-party/thrift/src/thrift/compiler/test/schematizer_test.cc +++ b/third-party/thrift/src/thrift/compiler/test/schematizer_test.cc @@ -25,19 +25,9 @@ #include -using apache::thrift::compiler::t_const_value; -using apache::thrift::compiler::t_enum; -using apache::thrift::compiler::t_enum_value; -using apache::thrift::compiler::t_field; -using apache::thrift::compiler::t_list; -using apache::thrift::compiler::t_map; -using apache::thrift::compiler::t_primitive_type; -using apache::thrift::compiler::t_program; -using apache::thrift::compiler::t_set; -using apache::thrift::compiler::t_struct; -using apache::thrift::compiler::t_type_ref; -using apache::thrift::compiler::t_typedef; +using namespace apache::thrift::compiler; using apache::thrift::compiler::detail::protocol_value_builder; +using apache::thrift::compiler::detail::schematizer; template std::unique_ptr val(Args&&... args) { @@ -133,12 +123,17 @@ std::unique_ptr make_foo_bar(const t_program* program) { return struct_ty; } -TEST(SchematizerTest, wrap_with_protocol_with_struct_ty) { +std::unique_ptr make_foo_bar_value() { auto strct = t_const_value::make_map(); strct->add_map(val("foo"), val(1)); strct->add_map(val("bar"), val(2)); strct->add_map(val("baz"), val(3.14)); strct->add_map(val("qux"), val(4.2)); + return strct; +} + +TEST(SchematizerTest, wrap_with_protocol_with_struct_ty) { + auto strct = make_foo_bar_value(); // With the type mapping, integers will be represented by their types in // FooBar. @@ -352,3 +347,156 @@ TEST(SchematizerTest, wrap_with_protocol_map) { expectWrappedValue( *inner_map_val2, t_const_value::CV_INTEGER, "i32Value", 888); } + +constexpr auto UTILITY_CLASSES = { + std::pair( + "StructuredAnnotation", + "facebook.com/thrift/type/StructuredAnnotation"), + std::pair("Annotation", "facebook.com/thrift/type/Annotation"), + std::pair("ProtocolValue", "facebook.com/thrift/protocol/Value"), + std::pair{"TypeUri", "facebook.com/thrift/type/TypeUri"}, +}; + +template +void with_schematizer(schematizer::options opts, F&& f) { + source_manager sm; + sm.add_virtual_file("my_prog", "abcd"); + + t_scope scope{}; + auto program = std::make_unique("my_prog", "my_prog"); + + // Add some well-known types to the scope. + auto protocol_value_ty = std::make_unique( + program.get(), "ProtocolValue"); // Can be empty + protocol_value_ty->set_uri("facebook.com/thrift/protocol/Value"); + std::vector> utility_types; + for (const auto& [name, uri] : UTILITY_CLASSES) { + utility_types.push_back(std::make_unique(program.get(), name)); + utility_types.back()->set_uri(uri); + scope.add_def(*utility_types.back().get()); + } + + schematizer schematizer{scope, sm, std::move(opts)}; + f(*program.get(), schematizer); +} + +template +void with_structured_annotation_uris( + const t_const_value& struct_schema, + const std::string_view struct_name, + F&& on_annotation, + G&& on_structured_annotation) { + const auto [attrs_key, attrs] = struct_schema.get_map().at(0); + EXPECT_EQ(attrs_key->get_string(), "attrs"); + const auto& definition_attrs = attrs->get_map(); + + const auto [name_key, name] = definition_attrs.at(0); + EXPECT_EQ(name_key->get_string(), "name"); + EXPECT_EQ(name->get_string(), struct_name); + + const auto [structured_annotations_key, structured_annotation_ids_set] = + definition_attrs.at(1); + EXPECT_EQ(structured_annotations_key->get_string(), "structuredAnnotations"); + const auto& structured_annotations = + structured_annotation_ids_set->get_list(); + + for (size_t i = 0; i < structured_annotations.size(); ++i) { + const auto annotation_id = structured_annotations.at(i)->get_integer(); + on_structured_annotation(i, annotation_id); + } + + const auto [annotations_key, annotations_map] = definition_attrs.at(2); + EXPECT_EQ(annotations_key->get_string(), "annotations"); + const auto& annotations = annotations_map->get_map(); + + for (size_t i = 0; i < annotations.size(); ++i) { + const auto [annotation_name, _] = annotations.at(i); + on_annotation(i, annotation_name->get_string()); + } +} + +template +void with_foo_bar_plus_structured_annotations(t_program& program, F f) { + auto foo_bar_with_annotations_ty = + std::make_unique(&program, "FooBarWithStructuredAnnotations"); + auto foo_bar_ty = make_foo_bar(&program); + foo_bar_with_annotations_ty->add_structured_annotation( + std::make_unique( + &program, + t_type_ref{*foo_bar_ty}, + "FooBarWithStructuredAnnotations", + make_foo_bar_value())); + + f(*foo_bar_with_annotations_ty, *foo_bar_ty); +} + +TEST(SchematizerTest, use_hash_for_structured_annotations) { + for (const bool use_hash : {true, false}) { + schematizer::options opts; + opts.use_hash = use_hash; + size_t id = 0; + std::vector> interned_values; + opts.intern_value = [&](std::unique_ptr val, t_program*) { + interned_values.push_back(std::move(val)); + return static_cast(id++); + }; + + with_schematizer(opts, [&](t_program& program, schematizer& schematizer) { + with_foo_bar_plus_structured_annotations( + program, + [&](const auto& foo_bar_with_annotations_ty, const auto& foo_bar_ty) { + // Generate a schema.thrift representation of + // `FooBarWithStructuredAnnotations` + const std::unique_ptr struct_schema = + schematizer.gen_schema(foo_bar_with_annotations_ty); + + // Verify that the URIs of the structured annotations are + // hashed + with_structured_annotation_uris( + *struct_schema, + "FooBarWithStructuredAnnotations", + [&](size_t annotation_idx, const std::string& annotation_name) { + ASSERT_EQ(annotation_idx, 0); // There's only one annotation + + if (use_hash) { + EXPECT_TRUE( + annotation_name.find("FooBar") == std::string::npos); + EXPECT_EQ( + annotation_name, + schematizer.identify_definition(foo_bar_ty)); + } else { + EXPECT_EQ( + annotation_name, + "my_prog.FooBar"); // This should not be hashed + } + }, + [&](size_t annotation_idx, size_t id) { + ASSERT_EQ( + annotation_idx, + 0); // There's only one structured annotation + + const auto& interned_annotation = + interned_values.at(id)->get_map(); + const auto [type_key, type_uri] = interned_annotation.at(1); + ASSERT_EQ(type_key->get_string(), "type"); + + // TypeUri should be a union, so check for the appropriate + // field + const auto [type_uri_key, type_uri_value] = + type_uri->get_map().at(0); + if (use_hash) { + EXPECT_EQ(type_uri_key->get_string(), "definitionKey"); + EXPECT_EQ( + type_uri_value->get_string(), + schematizer.identify_definition(foo_bar_ty)); + } else { + EXPECT_EQ(type_uri_key->get_string(), "scopedName"); + EXPECT_EQ( + type_uri_value->get_string(), + "my_prog.FooBar"); // This should not be hashed + } + }); + }); + }); + } +}