From 8992c80c72b5d3c1c7c5a58fdb25fa3d735f5a10 Mon Sep 17 00:00:00 2001 From: Hana Joo Date: Tue, 3 Dec 2024 05:20:38 -0800 Subject: [PATCH] Cache the result of `sub_one_annotation` which happens during the construction of "BadType". Also update types on the signatures along the way. When a bad match of type happens, it doesn't necessarily mean that there would be a type error. It means that out of many possible signature matches (or type matches) between different types including generics, there is something that doesn't match and we cannot use that result. This result will be used later when producing diagnostics to indicate which pair of types mismatch, in the case where there is no single type match out of those combinations. The problem is that when the combinations (e.g. multiple signatures, generics, recursive types) that needs to be verified need to be type checked, it tries out so many different combinations of types, and generate tons of the same "BadMatch" which is just redundant. Making it even worse, it's not only the computation but also the memory pressure caused by this because it seems to construct something which is memory intensive, thus GC frequently showing up in profile in these particular cases. There is still some risk that this might increase the peak memory consumption because once put in the cache, the objects will have the same lifetime as the type checker, but the drastic performance improvement in these particular cases seem worth it. A better solution might be actually making the type checker not call into generating these seemingly redundant type checks on these crazy combinations, but at a glance this seemed like a fundamental design issue that lives in the complex nature of the type graph, and without being able to understand / changing it this seems to be the shortest path without hurting code health too much. PiperOrigin-RevId: 702301684 --- pytype/CMakeLists.txt | 1 + pytype/abstract/_interpreter_function.py | 2 +- pytype/abstract/abstract_utils.py | 8 ++-- pytype/annotation_utils.py | 51 ++++++++++++++++++++---- 4 files changed, 50 insertions(+), 12 deletions(-) diff --git a/pytype/CMakeLists.txt b/pytype/CMakeLists.txt index 6ec0db45b..bc57b6adb 100644 --- a/pytype/CMakeLists.txt +++ b/pytype/CMakeLists.txt @@ -183,6 +183,7 @@ py_library( pytype.overlays.overlays pytype.pytd.pytd pytype.typegraph.cfg + pytype.types.types ) py_library( diff --git a/pytype/abstract/_interpreter_function.py b/pytype/abstract/_interpreter_function.py index 67f496aee..4327e7c47 100644 --- a/pytype/abstract/_interpreter_function.py +++ b/pytype/abstract/_interpreter_function.py @@ -636,7 +636,7 @@ def call( # lower frames (ones closer to the end of self.ctx.vm.frames) take # precedence over higher ones. for frame in reversed(self.ctx.vm.frames): - annotation_substs = abstract_utils.combine_substs( + annotation_substs = abstract_utils.combine_substs( # pytype: disable=wrong-arg-types frame.substs, annotation_substs ) # Keep type parameters without substitutions, as they may be needed for diff --git a/pytype/abstract/abstract_utils.py b/pytype/abstract/abstract_utils.py index 2aa6b0672..2d8f0d7f7 100644 --- a/pytype/abstract/abstract_utils.py +++ b/pytype/abstract/abstract_utils.py @@ -1,7 +1,7 @@ """Utilities for abstract.py.""" import collections -from collections.abc import Collection, Generator, Iterable, Mapping, Sequence +from collections.abc import Generator, Iterable, Mapping, Sequence import dataclasses import logging from typing import Any, TYPE_CHECKING, TypeGuard @@ -841,9 +841,9 @@ def is_generic_protocol(val: "_base.BaseValue") -> bool: def combine_substs( - substs1: Collection[dict[str, cfg.Variable]] | None, - substs2: Collection[dict[str, cfg.Variable]] | None, -) -> Collection[dict[str, cfg.Variable]]: + substs1: Sequence[Mapping[str, cfg.Variable]] | None, + substs2: Sequence[Mapping[str, cfg.Variable]] | None, +) -> Sequence[Mapping[str, cfg.Variable]]: """Combines the two collections of type parameter substitutions.""" if substs1 and substs2: return tuple({**sub1, **sub2} for sub1 in substs1 for sub2 in substs2) # pylint: disable=g-complex-comprehension diff --git a/pytype/annotation_utils.py b/pytype/annotation_utils.py index 298eca77f..ffb95e6d7 100644 --- a/pytype/annotation_utils.py +++ b/pytype/annotation_utils.py @@ -1,7 +1,7 @@ """Utilities for inline type annotations.""" import collections -from collections.abc import Sequence +from collections.abc import Callable, Mapping, Sequence import dataclasses import itertools from typing import Any @@ -27,6 +27,21 @@ class AnnotatedValue: class AnnotationUtils(utils.ContextWeakrefMixin): """Utility class for inline type annotations.""" + def __init__(self, ctx): + super().__init__(ctx) + # calling sub_one_annotation is costly, due to calling multiple of chained + # constructors (via annot.replace) and generating complex data structure. + # And in some corner cases which includes recursive generic types with + # overloads, it causes massive call to construction of bad match which calls + # sub_one_annotations. We only store caches in case when all types are + # ground i.e. subst is empty. + # A better solution might be not to make those seemingly redundant request + # from the type checker, but for now this is a comprimise to gain + # performance in those weird corner cases. + self.annotation_sub_cache: dict[ + tuple[cfg.CFGNode, abstract.BaseValue], abstract.BaseValue + ] = dict() + def sub_annotations(self, node, annotations, substs, instantiate_unbound): """Apply type parameter substitutions to a dictionary of annotations.""" if substs and all(substs): @@ -42,7 +57,7 @@ def _get_type_parameter_subst( self, node: cfg.CFGNode, annot: abstract.TypeParameter, - substs: Sequence[dict[str, cfg.Variable]], + substs: Sequence[Mapping[str, cfg.Variable]], instantiate_unbound: bool, ) -> abstract.BaseValue: """Helper for sub_one_annotation.""" @@ -68,16 +83,38 @@ def _get_type_parameter_subst( vals = [annot] return self.ctx.convert.merge_classes(vals) - def sub_one_annotation(self, node, annot, substs, instantiate_unbound=True): + def sub_one_annotation( + self, + node: cfg.CFGNode, + annot: abstract.BaseValue, + substs: Sequence[Mapping[str, cfg.Variable]], + instantiate_unbound: bool = True, + ): def get_type_parameter_subst(annotation): return self._get_type_parameter_subst( node, annotation, substs, instantiate_unbound ) + do_caching = not substs or (len(substs) == 1 and not substs[0]) - return self._do_sub_one_annotation(node, annot, get_type_parameter_subst) + if do_caching: + res = self.annotation_sub_cache.get((node, annot), None) + if res: + return res - def _do_sub_one_annotation(self, node, annot, get_type_parameter_subst_fn): + res = self._do_sub_one_annotation(node, annot, get_type_parameter_subst) + if do_caching: + self.annotation_sub_cache[(node, annot)] = res + return res + + def _do_sub_one_annotation( + self, + node: cfg.CFGNode, + annot: abstract.BaseValue, + get_type_parameter_subst_fn: Callable[ + [abstract.BaseValue], abstract.BaseValue + ], + ): """Apply type parameter substitutions to an annotation.""" # We push annotations onto 'stack' and move them to the 'done' stack as they # are processed. For each annotation, we also track an 'inner_type_keys' @@ -92,7 +129,7 @@ def _do_sub_one_annotation(self, node, annot, get_type_parameter_subst_fn): done = [] while stack: cur, inner_type_keys = stack.pop() - if not cur.formal: + if not cur.formal: # pytype: disable=attribute-error done.append(cur) elif isinstance(cur, mixin.NestedAnnotation): if cur.is_late_annotation() and any(t[0] == cur for t in stack): @@ -411,7 +448,7 @@ def _sub_and_instantiate(self, node, name, typ, substs): class_substs = abstract_utils.combine_substs( substs, [{"typing.Self": self.ctx.vm.frame.first_arg}] ) - type_for_value = self.sub_one_annotation( + type_for_value = self.sub_one_annotation( # pytype: disable=wrong-arg-types node, typ, class_substs, instantiate_unbound=False ) else: