Skip to content

Commit

Permalink
Cache the result of sub_one_annotation which happens during the con…
Browse files Browse the repository at this point in the history
…struction of "BadType". Also update types on the signatures along the way.

When a bad match of type happens, it doesn't necessarily mean that there would be a type error. It means that out of many possible signature matches (or type matches) between different types including generics, there is something that doesn't match and we cannot use that result. This result will be used later when producing diagnostics to indicate which pair of types mismatch, in the case where there is no single type match out of those combinations.

The problem is that when the combinations (e.g. multiple signatures, generics, recursive types) that needs to be verified need to be type checked, it tries out so many different combinations of types, and generate tons of the same "BadMatch" which is just redundant. Making it even worse, it's not only the computation but also the memory pressure caused by this because it seems to construct something which is memory intensive, thus GC frequently showing up in profile in these particular cases.

There is still some risk that this might increase the peak memory consumption because once put in the cache, the objects will have the same lifetime as the type checker, but the drastic performance improvement in these particular cases seem worth it.

A better solution might be actually making the type checker not call into generating these seemingly redundant type checks on these crazy combinations, but at a glance this seemed like a fundamental design issue that lives in the complex nature of the type graph, and without being able to understand / changing it this seems to be the shortest path without hurting code health too much.

PiperOrigin-RevId: 702301684
  • Loading branch information
h-joo authored and copybara-github committed Dec 3, 2024
1 parent e05ad68 commit 8992c80
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 12 deletions.
1 change: 1 addition & 0 deletions pytype/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ py_library(
pytype.overlays.overlays
pytype.pytd.pytd
pytype.typegraph.cfg
pytype.types.types
)

py_library(
Expand Down
2 changes: 1 addition & 1 deletion pytype/abstract/_interpreter_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def call(
# lower frames (ones closer to the end of self.ctx.vm.frames) take
# precedence over higher ones.
for frame in reversed(self.ctx.vm.frames):
annotation_substs = abstract_utils.combine_substs(
annotation_substs = abstract_utils.combine_substs( # pytype: disable=wrong-arg-types
frame.substs, annotation_substs
)
# Keep type parameters without substitutions, as they may be needed for
Expand Down
8 changes: 4 additions & 4 deletions pytype/abstract/abstract_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Utilities for abstract.py."""

import collections
from collections.abc import Collection, Generator, Iterable, Mapping, Sequence
from collections.abc import Generator, Iterable, Mapping, Sequence
import dataclasses
import logging
from typing import Any, TYPE_CHECKING, TypeGuard
Expand Down Expand Up @@ -841,9 +841,9 @@ def is_generic_protocol(val: "_base.BaseValue") -> bool:


def combine_substs(
substs1: Collection[dict[str, cfg.Variable]] | None,
substs2: Collection[dict[str, cfg.Variable]] | None,
) -> Collection[dict[str, cfg.Variable]]:
substs1: Sequence[Mapping[str, cfg.Variable]] | None,
substs2: Sequence[Mapping[str, cfg.Variable]] | None,
) -> Sequence[Mapping[str, cfg.Variable]]:
"""Combines the two collections of type parameter substitutions."""
if substs1 and substs2:
return tuple({**sub1, **sub2} for sub1 in substs1 for sub2 in substs2) # pylint: disable=g-complex-comprehension
Expand Down
51 changes: 44 additions & 7 deletions pytype/annotation_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Utilities for inline type annotations."""

import collections
from collections.abc import Sequence
from collections.abc import Callable, Mapping, Sequence
import dataclasses
import itertools
from typing import Any
Expand All @@ -27,6 +27,21 @@ class AnnotatedValue:
class AnnotationUtils(utils.ContextWeakrefMixin):
"""Utility class for inline type annotations."""

def __init__(self, ctx):
super().__init__(ctx)
# calling sub_one_annotation is costly, due to calling multiple of chained
# constructors (via annot.replace) and generating complex data structure.
# And in some corner cases which includes recursive generic types with
# overloads, it causes massive call to construction of bad match which calls
# sub_one_annotations. We only store caches in case when all types are
# ground i.e. subst is empty.
# A better solution might be not to make those seemingly redundant request
# from the type checker, but for now this is a comprimise to gain
# performance in those weird corner cases.
self.annotation_sub_cache: dict[
tuple[cfg.CFGNode, abstract.BaseValue], abstract.BaseValue
] = dict()

def sub_annotations(self, node, annotations, substs, instantiate_unbound):
"""Apply type parameter substitutions to a dictionary of annotations."""
if substs and all(substs):
Expand All @@ -42,7 +57,7 @@ def _get_type_parameter_subst(
self,
node: cfg.CFGNode,
annot: abstract.TypeParameter,
substs: Sequence[dict[str, cfg.Variable]],
substs: Sequence[Mapping[str, cfg.Variable]],
instantiate_unbound: bool,
) -> abstract.BaseValue:
"""Helper for sub_one_annotation."""
Expand All @@ -68,16 +83,38 @@ def _get_type_parameter_subst(
vals = [annot]
return self.ctx.convert.merge_classes(vals)

def sub_one_annotation(self, node, annot, substs, instantiate_unbound=True):
def sub_one_annotation(
self,
node: cfg.CFGNode,
annot: abstract.BaseValue,
substs: Sequence[Mapping[str, cfg.Variable]],
instantiate_unbound: bool = True,
):

def get_type_parameter_subst(annotation):
return self._get_type_parameter_subst(
node, annotation, substs, instantiate_unbound
)
do_caching = not substs or (len(substs) == 1 and not substs[0])

return self._do_sub_one_annotation(node, annot, get_type_parameter_subst)
if do_caching:
res = self.annotation_sub_cache.get((node, annot), None)
if res:
return res

def _do_sub_one_annotation(self, node, annot, get_type_parameter_subst_fn):
res = self._do_sub_one_annotation(node, annot, get_type_parameter_subst)
if do_caching:
self.annotation_sub_cache[(node, annot)] = res
return res

def _do_sub_one_annotation(
self,
node: cfg.CFGNode,
annot: abstract.BaseValue,
get_type_parameter_subst_fn: Callable[
[abstract.BaseValue], abstract.BaseValue
],
):
"""Apply type parameter substitutions to an annotation."""
# We push annotations onto 'stack' and move them to the 'done' stack as they
# are processed. For each annotation, we also track an 'inner_type_keys'
Expand All @@ -92,7 +129,7 @@ def _do_sub_one_annotation(self, node, annot, get_type_parameter_subst_fn):
done = []
while stack:
cur, inner_type_keys = stack.pop()
if not cur.formal:
if not cur.formal: # pytype: disable=attribute-error
done.append(cur)
elif isinstance(cur, mixin.NestedAnnotation):
if cur.is_late_annotation() and any(t[0] == cur for t in stack):
Expand Down Expand Up @@ -411,7 +448,7 @@ def _sub_and_instantiate(self, node, name, typ, substs):
class_substs = abstract_utils.combine_substs(
substs, [{"typing.Self": self.ctx.vm.frame.first_arg}]
)
type_for_value = self.sub_one_annotation(
type_for_value = self.sub_one_annotation( # pytype: disable=wrong-arg-types
node, typ, class_substs, instantiate_unbound=False
)
else:
Expand Down

0 comments on commit 8992c80

Please sign in to comment.