From 04bff1078f24f78647722c9c13ba21d4028ec0aa Mon Sep 17 00:00:00 2001 From: Elia Migliore Date: Fri, 6 Oct 2023 16:18:27 +0200 Subject: [PATCH] The scope of this pr is to replace the old parsing strategy and move towards a tree based type verification strategy. From this commit it's possible to refer to a type using a partial path and not only a fully specified path. If we declare for e.g. the following schema: ```protobuf syntax = "proto3"; package my.awesome.customer.plan.entity.v1beta1; message CustomerPlan { string plan_name = 1; } ``` When importing it it's sufficient to use a part of the full specified path (that in this case would be `my.awesome.customer.plan.entity.v1beta1.CustomerPlan`). As e.g. it would be enough to use `entity.v1beta1.CustomerPlan` or `v1beta1.CustomerPlan`, so the following declaration would be accepted: ```protobuf syntax = "proto3"; package my.awesome.customer.plan.entity.v1beta1; import "my/awesome/customer/plan/entity/v1beta1/entity.proto"; message CustomerPlanEvent { message Created { entity.v1beta1.CustomerPlan plan = 1; } } ``` --- karapace/protobuf/dependency.py | 83 ---- karapace/protobuf/known_dependency.py | 39 +- karapace/protobuf/proto_file_element.py | 21 +- karapace/protobuf/schema.py | 374 ++++++++++++++++-- karapace/protobuf/syntax.py | 2 +- .../unit/protobuf/test_proto_file_element.py | 30 +- tests/unit/protobuf/test_proto_parser.py | 6 +- tests/unit/protobuf/test_protobuf_schema.py | 241 ++++++++++- tests/unit/test_dependency_verifier.py | 61 --- 9 files changed, 630 insertions(+), 227 deletions(-) delete mode 100644 karapace/protobuf/dependency.py delete mode 100644 tests/unit/test_dependency_verifier.py diff --git a/karapace/protobuf/dependency.py b/karapace/protobuf/dependency.py deleted file mode 100644 index c2694ba03..000000000 --- a/karapace/protobuf/dependency.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -karapace - dependency - -Copyright (c) 2023 Aiven Ltd -See LICENSE for details -""" - -from karapace.dependency import DependencyVerifierResult -from karapace.protobuf.known_dependency import DependenciesHardcoded, KnownDependency -from karapace.protobuf.one_of_element import OneOfElement -from typing import List, Optional, Set - - -class ProtobufDependencyVerifier: - def __init__(self) -> None: - self.declared_types: List[str] = [] - self.used_types: List[str] = [] - self.import_path: List[str] = [] - - def add_declared_type(self, full_name: str) -> None: - self.declared_types.append(full_name) - - def add_used_type(self, parent: str, element_type: str) -> None: - if element_type.find("map<") == 0: - end = element_type.find(">") - virgule = element_type.find(",") - key = element_type[4:virgule] - value = element_type[virgule + 1 : end] - value = value.strip() - self.used_types.append(parent + ";" + key) - self.used_types.append(parent + ";" + value) - else: - self.used_types.append(parent + ";" + element_type) - - def add_import(self, import_name: str) -> None: - self.import_path.append(import_name) - - def is_type_declared( - self, - used_type: str, - declared_index: Set[str], - father_child_type: Optional[str], - used_type_with_scope: Optional[str], - ) -> bool: - return ( - used_type in declared_index - or (used_type_with_scope is not None and used_type_with_scope in declared_index) - or (father_child_type is not None and father_child_type in declared_index) - or "." + used_type in declared_index - ) - - def verify(self) -> DependencyVerifierResult: - declared_index = set(self.declared_types) - for used_type in self.used_types: - delimiter = used_type.rfind(";") - father_child_type = None - used_type_with_scope = None - if delimiter != -1: - used_type_with_scope = used_type[:delimiter] + "." + used_type[delimiter + 1 :] - father_delimiter = used_type[:delimiter].find(".") - if father_delimiter != -1: - father_child_type = used_type[:father_delimiter] + "." + used_type[delimiter + 1 :] - used_type = used_type[delimiter + 1 :] - - if used_type in DependenciesHardcoded.index: - continue - - known_pkg = KnownDependency.index_simple.get(used_type) or KnownDependency.index.get(used_type) - if known_pkg is not None and known_pkg in self.import_path: - continue - - if self.is_type_declared(used_type, declared_index, father_child_type, used_type_with_scope): - continue - - return DependencyVerifierResult(False, f'type "{used_type}" is not defined') - - return DependencyVerifierResult(True) - - -def process_one_of(verifier: ProtobufDependencyVerifier, package_name: str, parent_name: str, one_of: OneOfElement) -> None: - parent = package_name + "." + parent_name - for field in one_of.fields: - verifier.add_used_type(parent, field.element_type) diff --git a/karapace/protobuf/known_dependency.py b/karapace/protobuf/known_dependency.py index 32d95e7e9..31c4a8b33 100644 --- a/karapace/protobuf/known_dependency.py +++ b/karapace/protobuf/known_dependency.py @@ -107,26 +107,21 @@ def static_init(cls) -> None: cls.index_simple[item] = key -@static_init class DependenciesHardcoded: - index: Set = set() - - @classmethod - def static_init(cls) -> None: - cls.index = { - "bool", - "bytes", - "double", - "float", - "fixed32", - "fixed64", - "int32", - "int64", - "sfixed32", - "sfixed64", - "sint32", - "sint64", - "string", - "uint32", - "uint64", - } + index: Set[str] = { + "bool", + "bytes", + "double", + "float", + "fixed32", + "fixed64", + "int32", + "int64", + "sfixed32", + "sfixed64", + "sint32", + "sint64", + "string", + "uint32", + "uint64", + } diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 1019655ce..b60ff33ab 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -7,10 +7,13 @@ from karapace.dependency import Dependency from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes +from karapace.protobuf.extend_element import ExtendElement from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax from karapace.protobuf.type_element import TypeElement -from typing import Dict, List, Optional +from typing import Dict, List, NewType, Optional def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optional[Dict[str, Dependency]], is_self: bool): @@ -29,18 +32,22 @@ def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optio _collect_dependencies_types(compare_types, sub_deps, is_self) +TypeName = NewType("TypeName", str) +PackageName = NewType("PackageName", str) + + class ProtoFileElement: def __init__( self, location: Location, - package_name: Optional[str] = None, + package_name: Optional[PackageName] = None, syntax: Optional[Syntax] = None, - imports: Optional[list] = None, - public_imports: Optional[list] = None, + imports: Optional[List[TypeName]] = None, + public_imports: Optional[List[TypeName]] = None, types: Optional[List[TypeElement]] = None, - services: Optional[list] = None, - extend_declarations: Optional[list] = None, - options: Optional[list] = None, + services: Optional[List[ServiceElement]] = None, + extend_declarations: Optional[List[ExtendElement]] = None, + options: Optional[List[OptionElement]] = None, ) -> None: if types is None: types = list() diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 3829f9056..676591870 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -2,23 +2,28 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from karapace.dataclasses import default_dataclass + # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Schema.kt # Ported partially for required functionality. from karapace.dependency import Dependency, DependencyVerifierResult from karapace.protobuf.compare_result import CompareResult -from karapace.protobuf.dependency import process_one_of, ProtobufDependencyVerifier from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.exception import IllegalArgumentException +from karapace.protobuf.known_dependency import DependenciesHardcoded, KnownDependency from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_file_element import ProtoFileElement from karapace.protobuf.proto_parser import ProtoParser from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented from karapace.schema_references import Reference -from typing import Mapping, Optional, Sequence +from typing import Iterable, List, Mapping, Optional, Sequence, Set, Tuple + +import itertools def add_slashes(text: str) -> str: @@ -106,6 +111,138 @@ def option_element_string(option: OptionElement) -> str: return f"option {result};\n" +@default_dataclass +class UsedType: + parent_object_qualified_name: str + used_attribute_type: str + + +@default_dataclass +class SourceFileReference: + reference: str + import_order: int + + +@default_dataclass +class TypeTree: + token: str + children: List["TypeTree"] + source_reference: Optional[SourceFileReference] + + def source_reference_tree_recursive(self) -> Iterable[Optional[SourceFileReference]]: + sources = [] if self.source_reference is None else [self.source_reference] + for child in self.children: + sources = itertools.chain(sources, child.source_reference_tree()) + return sources + + def source_reference_tree(self) -> Iterable[SourceFileReference]: + return filter(lambda x: x is not None, self.source_reference_tree_recursive()) + + def inserted_elements(self) -> int: + """ + Returns the newest element generation accessible from that node. + Where with the term generation we mean the order for which a message + has been imported. + If called on the root of the tree it corresponds with the number of + fully specified path objects present in the tree. + """ + return max(reference.import_order for reference in self.source_reference_tree()) + + def oldest_matching_import(self) -> int: + """ + Returns the oldest element generation accessible from that node. + Where with the term generation we mean the order for which a message + has been imported. + """ + return min(reference.import_order for reference in self.source_reference_tree()) + + def expand_missing_absolute_path_recursive(self, oldest_import: int) -> Sequence[str]: + """ + Method that, once called on a node, traverse all the leaf and + return the oldest imported element with the common postfix. + This is also the current behaviour + of protobuf while dealing with a not fully specified path, it seeks for + the firstly imported message with a matching path. + """ + if self.source_reference is not None: + if self.source_reference.import_order == oldest_import: + return [self.token] + return [] + + for child in self.children: + maybe_oldest_child = child.expand_missing_absolute_path_recursive(oldest_import) + if maybe_oldest_child is not None: + return list(maybe_oldest_child) + [self.token] + + return [] + + def expand_missing_absolute_path(self) -> Sequence[str]: + oldest_import = self.oldest_matching_import() + expanded_missing_path = self.expand_missing_absolute_path_recursive(oldest_import) + assert ( + expanded_missing_path is not None + ), "each node should have, by construction, at least a leaf that is a fully specified path" + return expanded_missing_path[:-1] # skipping myself since I was matched + + @property + def is_fully_qualified_type(self) -> bool: + return len(self.children) == 0 + + def represent(self, level=0) -> str: + spacing = " " * 3 * level + if self.is_fully_qualified_type: + return f"{spacing}>{self.token}" + child_repr = "\n".join(child.represent(level=level + 1) for child in self.children) + return f"{spacing}{self.token} ->\n{child_repr}" + + def __repr__(self) -> str: + return self.represent() + + +def _add_new_type_recursive( + parent_tree: TypeTree, + remaining_tokens: List[str], + file: str, + inserted_elements: int, +) -> None: + """ + Add the new types to the TypeTree recursively, + nb: this isn't a pure function, the remaining_tokens object it's mutated, call it with carefully. + """ + if remaining_tokens: + token = remaining_tokens.pop() + for child in parent_tree.children: + if child.token == token: + return _add_new_type_recursive( + child, remaining_tokens, file, inserted_elements + ) # add a reference from which object/file was coming from + + new_leaf = TypeTree( + token=token, + children=[], + source_reference=( + None if remaining_tokens else SourceFileReference(reference=file, import_order=inserted_elements) + ), + ) + parent_tree.children.append(new_leaf) + return _add_new_type_recursive(new_leaf, remaining_tokens, file, inserted_elements) + return None + + +def add_new_type( + root_tree: TypeTree, + full_path_type: str, + file: str, + inserted_elements: int, +) -> None: + _add_new_type_recursive( + root_tree, + full_path_type.split("."), + file, + inserted_elements, + ) # one more it's added after that instruction + + class ProtobufSchema: DEFAULT_LOCATION = Location("", "") @@ -123,20 +260,191 @@ def __init__( self.references = references self.dependencies = dependencies + def type_in_tree(self, tree: TypeTree, remaining_tokens: List[str]) -> Optional[TypeTree]: + if remaining_tokens: + to_seek = remaining_tokens.pop() + + for child in tree.children: + if child.token == to_seek: + return self.type_in_tree(child, remaining_tokens) + return None + return tree + + def type_exist_in_tree(self, tree: TypeTree, remaining_tokens: List[str]) -> bool: + return self.type_in_tree(tree, remaining_tokens) is not None + + def recursive_imports(self) -> Set[str]: + imports = set(self.proto_file_element.imports) + + if self.dependencies: + for key in self.dependencies: + imports = imports | self.dependencies[key].get_schema().schema.recursive_imports() + + return imports + + def are_type_usage_valid(self, root_type_tree: TypeTree, used_types: List[UsedType]) -> Tuple[bool, Optional[str]]: + # Please note that this check only ensures the requested type exists. However, for performance reasons, it works in + # the opposite way of how specificity works in Protobuf. In Protobuf, the type is matched not only to check if it + # exists, but also based on the order of search: local definition comes before imported types. In this code, we + # first check imported types and then the actual used type. If we combine the second and third checks and add a + # monotonic number that inserts a reference of where and when the type was declared and imported, we can uniquely + # and precisely identify the involved type with a lookup. Unfortunately, with the current structure, this is not + # possible. This would require an additional refactor of the second condition going to reconstruct also here the + # type tree and applying it into the existing one (that with the monotonic counter would be enough to make sure the + # algorithm can match and keep track of the references based on the import order) + for used_type in used_types: + parent_type_fully_qualified_name = used_type.parent_object_qualified_name[1:] + attribute_type = used_type.used_attribute_type + + is_parent_object_declared = self.type_exist_in_tree( + root_type_tree, + list( + parent_type_fully_qualified_name.split("."), + ), + ) + is_attribute_type_primitive = attribute_type in DependenciesHardcoded.index + + # NB: this is a direct translation the previous parsing code, in theory we can construct the + # same tree starting from the imports and just append the new declarations the tree, we just have + # to add the values associated to the map after seeking for all the values associated with the imports. + # e.g. if the user import `google/protobuf/struct.proto` + # we lookup in the KnownDependencies and we can construct the same postfix token trie by adding: + # "google/protobuf/struct.proto": [ + # "google.protobuf.Struct", + # "google.protobuf.Value", + # "google.protobuf.NullValue", + # "google.protobuf.ListValue", + # ] + # left as todo. + + if is_attribute_type_primitive: + is_attribute_type_included_in_server = True + elif attribute_type in KnownDependency.index_simple or attribute_type in KnownDependency.index: + required_import = KnownDependency.index_simple.get(attribute_type, None) + if required_import is None: + required_import = KnownDependency.index[attribute_type] + + is_attribute_type_included_in_server = required_import in self.recursive_imports() + else: + is_attribute_type_included_in_server = False + + is_attribute_primitive_or_declared_before = is_attribute_type_included_in_server or self.type_exist_in_tree( + root_type_tree, + list(attribute_type.lstrip(".").split(".")), + ) + + if not is_parent_object_declared or not is_attribute_primitive_or_declared_before: + return False, attribute_type if is_parent_object_declared else used_type.parent_object_qualified_name + return True, None + def verify_schema_dependencies(self) -> DependencyVerifierResult: - verifier = ProtobufDependencyVerifier() - self.collect_dependencies(verifier) - return verifier.verify() + declared_types_root_tree = self.types_tree() + used_types_list = self.used_types() + are_declarations_valid, maybe_wrong_declaration = self.are_type_usage_valid( + declared_types_root_tree, used_types_list + ) + if are_declarations_valid: + return DependencyVerifierResult(True) + return DependencyVerifierResult(False, f'type "{maybe_wrong_declaration}" is not defined') - def collect_dependencies(self, verifier: ProtobufDependencyVerifier) -> None: + def nested_type_tree( + self, + root_tree: TypeTree, + parent_name: str, + nested_type: TypeElement, + filename: str, + inserted_types: int, + ) -> int: + nested_component_full_path_name = parent_name + "." + nested_type.name + add_new_type(root_tree, nested_component_full_path_name, filename, inserted_types) + inserted_types += 1 + for child in nested_type.nested_types: + self.nested_type_tree(root_tree, nested_component_full_path_name, child, filename, inserted_types) + inserted_types += 1 + + return inserted_types + + def types_tree_recursive( + self, + root_tree: TypeTree, + inserted_types: int, + filename: str, + ) -> int: + # verify that the import it's the same as the order of importing + if self.dependencies: + for dependency in self.dependencies: + inserted_types = ( + self.dependencies[dependency] + .get_schema() + .schema.types_tree_recursive(root_tree, inserted_types, dependency) + ) + + # we can add an incremental number and a reference to the file + # to get back which is the file who a certain declaration it's referring to + # basically during a lookup you traverse all the leaf after the last point + # in common with the declaration and you take the node with the minimum numer, + # the reference in that node it's the file used in the body. Not done yet since + # now isn't required. + + package_name = self.proto_file_element.package_name or "" + for element_type in self.proto_file_element.types: + type_name = element_type.name + full_name = package_name + "." + type_name + add_new_type(root_tree, full_name, filename, inserted_types) + inserted_types += 1 + + for nested_type in element_type.nested_types: + inserted_types = self.nested_type_tree(root_tree, full_name, nested_type, filename, inserted_types) + + return inserted_types + + def types_tree(self) -> TypeTree: + root_tree = TypeTree( + token=".", + children=[], + source_reference=None, + ) + self.types_tree_recursive(root_tree, 0, "main_schema_file") + return root_tree + + @staticmethod + def used_type(parent: str, element_type: str) -> List[UsedType]: + if element_type.find("map<") == 0: + end = element_type.find(">") + virgule = element_type.find(",") + key_element_type = element_type[4:virgule] + value_element_type = element_type[virgule + 1 : end] + value_element_type = value_element_type.strip() + return [ + UsedType(parent_object_qualified_name=parent, used_attribute_type=key_element_type), + UsedType(parent_object_qualified_name=parent, used_attribute_type=value_element_type), + ] + return [UsedType(parent_object_qualified_name=parent, used_attribute_type=element_type)] + + @staticmethod + def dependencies_one_of( + package_name: str, + parent_name: str, + one_of: OneOfElement, + ) -> List[UsedType]: + parent = package_name + "." + parent_name + dependencies = [] + for field in one_of.fields: + dependencies.append( + UsedType( + parent_object_qualified_name=parent, + used_attribute_type=field.element_type, + ) + ) + return dependencies + + def used_types(self) -> List[UsedType]: + dependencies_used_types = [] if self.dependencies: for key in self.dependencies: - self.dependencies[key].get_schema().schema.collect_dependencies(verifier) + dependencies_used_types += self.dependencies[key].get_schema().schema.used_types() - for i in self.proto_file_element.imports: - verifier.add_import(i) - for i in self.proto_file_element.public_imports: - verifier.add_import(i) + used_types = [] package_name = self.proto_file_element.package_name if package_name is None: @@ -146,45 +454,36 @@ def collect_dependencies(self, verifier: ProtobufDependencyVerifier) -> None: for element_type in self.proto_file_element.types: type_name = element_type.name full_name = package_name + "." + type_name - verifier.add_declared_type(full_name) - verifier.add_declared_type(type_name) if isinstance(element_type, MessageElement): for one_of in element_type.one_ofs: - process_one_of(verifier, package_name, type_name, one_of) + used_types += self.dependencies_one_of(package_name, type_name, one_of) for field in element_type.fields: - verifier.add_used_type(full_name, field.element_type) + used_types += self.used_type(full_name, field.element_type) for nested_type in element_type.nested_types: - self._process_nested_type(verifier, package_name, type_name, nested_type) + used_types += self.nested_used_type(package_name, type_name, nested_type) - def _process_nested_type( - self, verifier: ProtobufDependencyVerifier, package_name: str, parent_name, element_type: TypeElement - ): - verifier.add_declared_type(package_name + "." + parent_name + "." + element_type.name) - verifier.add_declared_type(parent_name + "." + element_type.name) - ancestor_only = parent_name.find(".") - if ancestor_only != -1: - verifier.add_declared_type(parent_name[:ancestor_only] + "." + element_type.name) + return used_types + + def nested_used_type( + self, + package_name: str, + parent_name: str, + element_type: TypeElement, + ) -> List[str]: + used_types = [] if isinstance(element_type, MessageElement): for one_of in element_type.one_ofs: # The parent name for nested one of fields is the parent and element type name. # The field type name is handled in process_one_of function. one_of_parent_name = parent_name + "." + element_type.name - process_one_of(verifier, package_name, one_of_parent_name, one_of) + used_types += self.dependencies_one_of(package_name, one_of_parent_name, one_of) for field in element_type.fields: - # since we declare the subtype in the same level of the scope, it's legit - # use the same scoping when declare the dependent type. - if field.element_type in [defined_in_same_scope.name for defined_in_same_scope in element_type.nested_types]: - verifier.add_used_type(parent_name + "." + element_type.name, field.element_type) - else: - ancestor_only = parent_name.find(".") - if ancestor_only != -1: - verifier.add_used_type(parent_name[:ancestor_only], field.element_type) - else: - verifier.add_used_type(parent_name, field.element_type) - + used_types += self.used_type(package_name + "." + parent_name, field.element_type) for nested_type in element_type.nested_types: - self._process_nested_type(verifier, package_name, parent_name + "." + element_type.name, nested_type) + used_types += self.nested_used_type(package_name, parent_name + "." + element_type.name, nested_type) + + return used_types def __str__(self) -> str: if not self.cache_string: @@ -238,6 +537,7 @@ def to_schema(self) -> str: strings.append("\n") for service in shm.services: strings.append(str(service.to_schema())) + return "".join(strings) def compare(self, other: "ProtobufSchema", result: CompareResult) -> CompareResult: diff --git a/karapace/protobuf/syntax.py b/karapace/protobuf/syntax.py index bbf03de13..8da68cd40 100644 --- a/karapace/protobuf/syntax.py +++ b/karapace/protobuf/syntax.py @@ -18,7 +18,7 @@ def _missing_(cls, value: object) -> None: raise IllegalArgumentException(f"unexpected syntax: {value}") def __str__(self) -> str: - return self.value + return str(self.value) def __repr__(self) -> str: return self.value diff --git a/tests/unit/protobuf/test_proto_file_element.py b/tests/unit/protobuf/test_proto_file_element.py index fc82db8b7..fb4a63e9b 100644 --- a/tests/unit/protobuf/test_proto_file_element.py +++ b/tests/unit/protobuf/test_proto_file_element.py @@ -11,7 +11,7 @@ from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement from karapace.protobuf.option_element import OptionElement, PACKED_OPTION_ELEMENT -from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.proto_file_element import PackageName, ProtoFileElement, TypeName from karapace.protobuf.proto_parser import ProtoParser from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax @@ -58,7 +58,7 @@ def test_simple_to_schema(): def test_simple_with_imports_to_schema(): element = MessageElement(location=location, name="Message") - file = ProtoFileElement(location=location, imports=["example.other"], types=[element]) + file = ProtoFileElement(location=location, imports=[TypeName("example.other")], types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -73,13 +73,15 @@ def test_simple_with_imports_to_schema(): def test_add_multiple_dependencies(): element = MessageElement(location=location, name="Message") - file = ProtoFileElement(location=location, imports=["example.other", "example.another"], types=[element]) + file = ProtoFileElement( + location=location, imports=[TypeName("example.other"), TypeName("example.another")], types=[element] + ) assert len(file.imports) == 2 def test_simple_with_public_imports_to_schema(): element = MessageElement(location=location, name="Message") - file = ProtoFileElement(location=location, public_imports=["example.other"], types=[element]) + file = ProtoFileElement(location=location, public_imports=[TypeName("example.other")], types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -94,14 +96,18 @@ def test_simple_with_public_imports_to_schema(): def test_add_multiple_public_dependencies(): element = MessageElement(location=location, name="Message") - file = ProtoFileElement(location=location, public_imports=["example.other", "example.another"], types=[element]) + file = ProtoFileElement( + location=location, public_imports=[TypeName("example.other"), TypeName("example.another")], types=[element] + ) assert len(file.public_imports) == 2 def test_simple_with_both_imports_to_schema(): element = MessageElement(location=location, name="Message") - file = ProtoFileElement(location=location, imports=["example.thing"], public_imports=["example.other"], types=[element]) + file = ProtoFileElement( + location=location, imports=[TypeName("example.thing")], public_imports=[TypeName("example.other")], types=[element] + ) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -198,9 +204,9 @@ def test_multiple_everything_to_schema(): service2 = ServiceElement(location=location.at(22, 1), name="Service2") file = ProtoFileElement( location=location, - package_name="example.simple", - imports=["example.thing"], - public_imports=["example.other"], + package_name=PackageName("example.simple"), + imports=[TypeName("example.thing")], + public_imports=[TypeName("example.other")], types=[element1, element2], services=[service1, service2], extend_declarations=[extend1, extend2], @@ -266,9 +272,9 @@ def test_default_is_set_in_proto2(): file = ProtoFileElement( syntax=Syntax.PROTO_2, location=location, - package_name="example.simple", - imports=["example.thing"], - public_imports=["example.other"], + package_name=PackageName("example.simple"), + imports=[TypeName("example.thing")], + public_imports=[TypeName("example.other")], types=[message], ) expected = """ diff --git a/tests/unit/protobuf/test_proto_parser.py b/tests/unit/protobuf/test_proto_parser.py index 51a16063d..316276796 100644 --- a/tests/unit/protobuf/test_proto_parser.py +++ b/tests/unit/protobuf/test_proto_parser.py @@ -18,7 +18,7 @@ from karapace.protobuf.message_element import MessageElement from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.option_element import OptionElement -from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.proto_file_element import ProtoFileElement, TypeName from karapace.protobuf.proto_parser import ProtoParser from karapace.protobuf.reserved_element import ReservedElement from karapace.protobuf.rpc_element import RpcElement @@ -1188,13 +1188,13 @@ def test_option_parentheses(): def test_imports(): proto = 'import "src/test/resources/unittest_import.proto";\n' - expected = ProtoFileElement(location=location, imports=["src/test/resources/unittest_import.proto"]) + expected = ProtoFileElement(location=location, imports=[TypeName("src/test/resources/unittest_import.proto")]) assert ProtoParser.parse(location, proto) == expected def test_public_imports(): proto = 'import public "src/test/resources/unittest_import.proto";\n' - expected = ProtoFileElement(location=location, public_imports=["src/test/resources/unittest_import.proto"]) + expected = ProtoFileElement(location=location, public_imports=[TypeName("src/test/resources/unittest_import.proto")]) assert ProtoParser.parse(location, proto) == expected diff --git a/tests/unit/protobuf/test_protobuf_schema.py b/tests/unit/protobuf/test_protobuf_schema.py index 6021b45c3..9b89a1425 100644 --- a/tests/unit/protobuf/test_protobuf_schema.py +++ b/tests/unit/protobuf/test_protobuf_schema.py @@ -2,11 +2,13 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +from karapace.dependency import Dependency from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location -from karapace.protobuf.schema import ProtobufSchema +from karapace.protobuf.schema import ProtobufSchema, SourceFileReference, TypeTree from karapace.schema_models import SchemaType, ValidatedTypedSchema +from karapace.typing import Subject from tests.schemas.protobuf import ( schema_protobuf_compare_one, schema_protobuf_order_after, @@ -376,3 +378,240 @@ def test_protobuf_self_referencing_schema(): """ assert isinstance(ValidatedTypedSchema.parse(SchemaType.PROTOBUF, proto4).schema, ProtobufSchema) + + +def partial_path_protobuf_schema() -> ProtobufSchema: + plan = """\ + syntax = "proto3"; + + package my.awesome.customer.plan.entity.v1beta1; + + message CustomerPlan { + string plan_name = 1; + } + """ + + customer_plan_event = """\ + syntax = "proto3"; + + package my.awesome.customer.plan.entity.v1beta1; + import "my/awesome/customer/plan/entity/v1beta1/entity.proto"; + + message CustomerPlanEvent { + message Created { + //the full path declaration is `my.awesome.customer.plan.entity.v1beta1.CustomerPlan plan = 1;` + entity.v1beta1.CustomerPlan plan = 1; + } + } + """ + + no_ref_schema = ValidatedTypedSchema.parse(SchemaType.PROTOBUF, plan) + dep = Dependency("this_is_ignored", Subject("this_also"), 1, no_ref_schema) + ref_schema = ValidatedTypedSchema.parse(SchemaType.PROTOBUF, customer_plan_event, None, {"foobar": dep}) + schema = ref_schema.schema + assert isinstance(schema, ProtobufSchema) + return schema + + +def test_type_tree_rendering() -> None: + schema = partial_path_protobuf_schema() + assert ( + str(schema.types_tree()) + == """. -> + CustomerPlan -> + v1beta1 -> + entity -> + plan -> + customer -> + awesome -> + >my + CustomerPlanEvent -> + v1beta1 -> + entity -> + plan -> + customer -> + awesome -> + >my + Created -> + CustomerPlanEvent -> + v1beta1 -> + entity -> + plan -> + customer -> + awesome -> + >my""" + ) + + +def test_type_tree_parsed_structure() -> None: + schema = partial_path_protobuf_schema() + assert schema.types_tree() == TypeTree( + token=".", + children=[ + TypeTree( + token="CustomerPlan", + children=[ + TypeTree( + token="v1beta1", + children=[ + TypeTree( + token="entity", + children=[ + TypeTree( + token="plan", + children=[ + TypeTree( + token="customer", + children=[ + TypeTree( + token="awesome", + children=[ + TypeTree( + token="my", + children=[], + source_reference=SourceFileReference( + reference="foobar", import_order=0 + ), + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ), + TypeTree( + token="CustomerPlanEvent", + children=[ + TypeTree( + token="v1beta1", + children=[ + TypeTree( + token="entity", + children=[ + TypeTree( + token="plan", + children=[ + TypeTree( + token="customer", + children=[ + TypeTree( + token="awesome", + children=[ + TypeTree( + token="my", + children=[], + source_reference=SourceFileReference( + reference="main_schema_file", import_order=1 + ), + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ), + TypeTree( + token="Created", + children=[ + TypeTree( + token="CustomerPlanEvent", + children=[ + TypeTree( + token="v1beta1", + children=[ + TypeTree( + token="entity", + children=[ + TypeTree( + token="plan", + children=[ + TypeTree( + token="customer", + children=[ + TypeTree( + token="awesome", + children=[ + TypeTree( + token="my", + children=[], + source_reference=SourceFileReference( + reference="main_schema_file", import_order=2 + ), + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ) + ], + source_reference=None, + ), + ], + source_reference=None, + ) + + +def test_type_tree_expand_types() -> None: + schema = partial_path_protobuf_schema() + + # the count start from 0 + assert schema.types_tree().inserted_elements() == 2 + + tokens_to_seek = "entity.v1beta1.CustomerPlan".split(".") + matched = schema.type_in_tree(schema.types_tree(), tokens_to_seek) + assert matched is not None + assert matched.expand_missing_absolute_path() == ["my", "awesome", "customer", "plan"] + + tokens_to_seek = "entity.v1beta1.CustomerPlan".split(".") + matched = schema.type_in_tree(schema.types_tree(), tokens_to_seek) + assert matched is not None + assert matched.expand_missing_absolute_path() == ["my", "awesome", "customer", "plan"] + + matched = schema.type_in_tree(schema.types_tree(), ["Created"]) + assert matched is not None + assert matched.expand_missing_absolute_path() == [ + "my", + "awesome", + "customer", + "plan", + "entity", + "v1beta1", + "CustomerPlanEvent", + ] diff --git a/tests/unit/test_dependency_verifier.py b/tests/unit/test_dependency_verifier.py deleted file mode 100644 index 5bbf3d019..000000000 --- a/tests/unit/test_dependency_verifier.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -karapace - tests_dependency_verifier - -Copyright (c) 2023 Aiven Ltd -See LICENSE for details -""" - -from karapace.protobuf.dependency import ProtobufDependencyVerifier - -import logging - -log = logging.getLogger(__name__) - - -async def test_protobuf_dependency_verifier(): - declared_types = [ - ".a1.Place", - "Place", - ".a1.Customer", - "Customer", - ".a1.TestMessage", - "TestMessage", - ".a1", - ".TestMessage", - ".Enum", - "TestMessage.Enum", - ".a1.TestMessage.Value", - "TestMessage.Value", - ".a1.TestMessage.Value.Label", - "TestMessage", - ".Value.Label", - ] - - used_types = [ - ".a1.Place;string", - ".a1.Place;int32", - ".a1.Customer;string", - ".a1.Customer;int32", - ".a1.Customer;Place", - ".a1.TestMessage;int32", - ".a1.TestMessage;int32", - ".a1.TestMessage;string", - ".a1.TestMessage;.a1.TestMessage.Value", - "TestMessage;Customer", - "TestMessage;int32", - "TestMessage.Value;int32", - "TestMessage.Value;string", - ] - - verifier = ProtobufDependencyVerifier() - for declared in declared_types: - verifier.add_declared_type(declared) - for used in used_types: - x = used.split(";") - verifier.add_used_type(x[0], x[1]) - - result = verifier.verify() - assert result.result, True - - verifier.add_used_type("TestMessage.Delta", "Tag") - assert result.result, False