From cecb3adf9a68151bd6e2e8e6aa48c1bc02fafe89 Mon Sep 17 00:00:00 2001 From: yanovs <42386943+yanovs@users.noreply.github.com> Date: Fri, 24 Nov 2023 09:19:14 -0500 Subject: [PATCH] Fix perturb spec bug (#16) * Raise error when trying to perturb a spec * Add perturb section to README --- README.md | 177 ++++++++++++++++++++++------------ dilib/__init__.py | 1 + dilib/config.py | 12 ++- dilib/errors.py | 4 + dilib/specs.py | 35 ++++++- dilib/tests/test_container.py | 10 ++ 6 files changed, 170 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index a713a84..8a23cec 100644 --- a/README.md +++ b/README.md @@ -8,16 +8,16 @@ Dependency injection (DI) library for python ## About DI -[Dependency injection](https://en.wikipedia.org/wiki/Dependency_injection) -can be thought of as a **software engineering pattern** +[Dependency injection](https://en.wikipedia.org/wiki/Dependency_injection) +can be thought of as a **software engineering pattern** as well as a **framework**. The goal is to develop objects in a more composable and modular way. -The **pattern** is: when creating objects, always express what you depend on, +The **pattern** is: when creating objects, always express what you depend on, and let someone else give you those dependencies. (This is sometimes referred to as the "Hollywood principle": "Don't call us; we'll call you.") -The **framework** is meant to ease the inevitable boilerplate +The **framework** is meant to ease the inevitable boilerplate that occurs when following this pattern, and `dilib` is one such framework. See the [Google Clean Code Talk about Dependency Injection](https://testing.googleblog.com/2008/11/clean-code-talks-dependency-injection.html). @@ -34,16 +34,16 @@ pip install dilib There are 3 major parts of this framework: -- `dilib.{Prototype,Singleton}`: A recipe that describes how to instantiate +- `dilib.{Prototype,Singleton}`: A recipe that describes how to instantiate the object when needed later. `dilib.Prototype` indicates to the retriever -that a new instance should be created per retrieval, -while `dilib.Singleton` indicates only 1 instance of the object +that a new instance should be created per retrieval, +while `dilib.Singleton` indicates only 1 instance of the object should exist. (Both spec types inherit from `dilib.Spec`.) -- `dilib.Config`: Nestable bag of types and values, bound by specs, +- `dilib.Config`: Nestable bag of types and values, bound by specs, that can be loaded, perturbed, and saved. -- `dilib.Container`: The object retriever--it's in charge of -_materializing_ the aforementioned delayed specs that -are wired together by config into actual instances +- `dilib.Container`: The object retriever--it's in charge of +_materializing_ the aforementioned delayed specs that +are wired together by config into actual instances (plus caching, if indicated by the spec). ```python @@ -94,7 +94,7 @@ class EngineConfig(dilib.Config): class CarConfig(dilib.Config): - # Configs depend on other configs via types. + # Configs depend on other configs via types. # Here, CarConfig depends on EngineConfig. engine_config = EngineConfig(token_prefix="baz") @@ -120,30 +120,30 @@ assert container.config.car is container.car # Because it's a Singleton ``` Notes: -- `Car` *takes in* an `Engine` via its constructor +- `Car` *takes in* an `Engine` via its constructor (known as "constructor injection"), instead of making or getting one within itself. -- For this to work, `Car` cannot make any assumptions about -*what kind* of `Engine` it received. Different engines have different +- For this to work, `Car` cannot make any assumptions about +*what kind* of `Engine` it received. Different engines have different constructor params but have the [same API and semantics](https://en.wikipedia.org/wiki/Liskov_substitution_principle). -- In order to take advantage of typing (e.g., `mypy`, PyCharm auto-complete), -use `dilib.get_config(...)` and `container.config`, -which are type-safe alternatives to `CarConfig().get(...)` and +- In order to take advantage of typing (e.g., `mypy`, PyCharm auto-complete), +use `dilib.get_config(...)` and `container.config`, +which are type-safe alternatives to `CarConfig().get(...)` and direct `container` access. Note also how we set the `engine` config field type -to the base class `Engine`--this way, clients of the config are -abstracted away from which implementation is currently configured. +to the base class `Engine`--this way, clients of the config are +abstracted away from which implementation is currently configured. ### API Overview - `dilib.Config`: Inherit from this to specify your objects and params -- `config = dilib.get_config(ConfigClass, **global_inputs)`: Instantiate +- `config = dilib.get_config(ConfigClass, **global_inputs)`: Instantiate config object - Alternatively: `config = ConfigClass().get(**global_inputs)` - `container = dilib.get_container(config)`: Instantiate container object -by passing in the config object +by passing in the config object - Alternatively: `container = dilib.Container(config)` - `container.config.x_config.y_config.z`: Get the instantianted object - - Alternatively: `container.x_config.y_config.z`, + - Alternatively: `container.x_config.y_config.z`, or even `container["x_config.y_config.z"]` Specs: @@ -152,7 +152,7 @@ Specs: - `dilib.Forward`: Forward to a different config field - `dilib.Prototype`: Instantiate a new object at each container retrieval - `dilib.Singleton`: Instantiate and cache object at each container retrieval -- `dilib.Singleton{Tuple,List,Dict}`: Special helpers to ease +- `dilib.Singleton{Tuple,List,Dict}`: Special helpers to ease collections of specs. E.g.: ```python @@ -179,11 +179,13 @@ class CollectionsConfig(dilib.Config): xy_dict1 = dilib.SingletonDict({"x": x, "y": y}) xy_dict2 = dilib.SingletonDict({"x": x, "y": y}, z=z) - # You can also build a partial kwargs dict that can be + # You can also build a partial kwargs dict that can be # re-used and combined downstream partial_kwargs = dilib.SingletonDict(x=x, y=y) values0 = dilib.Singleton(ValuesWrapper, __lazy_kwargs=partial_kwargs) - values1 = dilib.Singleton(ValuesWrapper, z=4, __lazy_kwargs=partial_kwargs) + values1 = dilib.Singleton( + ValuesWrapper, z=4, __lazy_kwargs=partial_kwargs + ) config = dilib.get_config(CollectionsConfig) @@ -200,7 +202,7 @@ assert container.config.xy_dict2 == {"x": 1, "y": 2, "z": 3} ### pinject -A prominent DI library in +A prominent DI library in python is [`pinject`](https://github.com/google/pinject). #### Advantages of dilib @@ -211,20 +213,20 @@ python is [`pinject`](https://github.com/google/pinject). - Getting is via *names* rather than *classes*. - In `pinject`, the equivalent of container attr access takes a class (like `Car`) rather than a config address. -- No implicit wiring: No assumptions are made about aligning +- No implicit wiring: No assumptions are made about aligning arg names with config params. - - Granted, `pinject` does have an explicit mode, + - Granted, `pinject` does have an explicit mode, but the framework's default state is implicit. - - The explicit wiring in dilib configs obviates the need - for complications like [inject decorators](https://github.com/google/pinject#safety) + - The explicit wiring in dilib configs obviates the need + for complications like [inject decorators](https://github.com/google/pinject#safety) and [annotations](https://github.com/google/pinject#annotations). -- Minimal or no pollution of objects: Objects are not aware of +- Minimal or no pollution of objects: Objects are not aware of the DI framework. The only exception is: if you want the IDE autocompletion to work when wiring up configs in an environment that does not support `ParamSpec` (e.g., `car = Car(engine=...)`), you have -to inherit from, e.g., `dilib.SingletonMixin`. But this is completely -optional; in `pinject`, on the other hand, one is required to +to inherit from, e.g., `dilib.SingletonMixin`. But this is completely +optional; in `pinject`, on the other hand, one is required to decorate with `@pinject.inject()` in some circumstances. ### dependency-injector @@ -234,13 +236,13 @@ Another prominent DI library in python is [`dependency-injector`](https://github #### Advantages of dilib - `dilib` discourages use of class-level state by not supporting it -(that is, `dilib.Container` is equivalent to +(that is, `dilib.Container` is equivalent to `dependency_injector.containers.DynamicContainer`) -- Cleaner separation between "config" and "container" +- Cleaner separation between "config" and "container" (dependency-injector conflates the two) - Easy-to-use perturbing with simple `config.x = new_value` syntax - Easier to nest configs via config locator pattern -- Child configs are typed instead of relying on +- Child configs are typed instead of relying on `DependenciesContainer` stub (which aids in IDE auto-complete) - Easier-to-use global input configuration - Written in native python for more transparency @@ -249,41 +251,85 @@ Another prominent DI library in python is [`dependency-injector`](https://github ### Prevent Pollution of Objects -The dependency between the DI config and the actual objects in the -object graph should be one way: -the DI config depends on the object graph types and values. -This keeps the objects clean of +The dependency between the DI config and the actual objects in the +object graph should be one way: +the DI config depends on the object graph types and values. +This keeps the objects clean of particular decisions made by the DI framework. -(`dilib` offers optional mixins that violate this decision -for users that want to favor the typing and +(`dilib` offers optional mixins that violate this decision +for users that want to favor the typing and auto-completion benefits of using the object types directly.) ### Child Configs are Singletons by Type -In `dilib`, when you set a child config on a config object, -you're not actually instantiating the child config. -Rather, you're creating a spec that will be instantiated -when the root config's `.get()` is called. -This means that the config instances are singletons by type -(unlike the actual objects specified in the config, which are by alias). -It would be cleaner to create instances of common configs and -pass them through to other configs -(that's what DI is all about, after all!). However, the decision was made -to not allow this because this would make -building up configs almost as complicated as building up the -actual object graph users are interested in -(essentially, the user would be engaged in an abstract meta-DI problem). -As such, all references to the same config type are -automatically resolved to the same instance, -at the expense of some flexibility and directness. -The upside, however, is that it's much easier to create nested configs, +In `dilib`, when you set a child config on a config object, +you're not actually instantiating the child config. +Rather, you're creating a spec that will be instantiated +when the root config's `.get()` is called. +This means that the config instances are singletons by type +(unlike the actual objects specified in the config, which are by alias). +It would be cleaner to create instances of common configs and +pass them through to other configs +(that's what DI is all about, after all!). However, the decision was made +to not allow this because this would make +building up configs almost as complicated as building up the +actual object graph users are interested in +(essentially, the user would be engaged in an abstract meta-DI problem). +As such, all references to the same config type are +automatically resolved to the same instance, +at the expense of some flexibility and directness. +The upside, however, is that it's much easier to create nested configs, which means users can get to designing the actual object graph quicker. +### Perturb Config Fields with Ease + +A major goal of `dilib` is the ability to perturb any config field +and have a guarantee that, when instantiated, all objects that depend on +that field will see the same perturbed value. + +This guarantee of self-consistency is achieved by separating config +specification from object instantiation, allowing perturbation to safely occur +in between. Note that once a config object is passed into a container, +it is automatically frozen and further perturbations are no longer allowed. + +This enables the user to easily perform param scans, integration tests, +and more, even with params that are deeply embedded in the system. E.g.: + +```python +def get_container( + db_addr: str = "db-addr", + perturb_func: Callable[[CarConfig], None] | None = None, +) -> dilib.Container[CarConfig]: + config = dilib.get_config(CarConfig, db_addr=db_addr) + if perturb_func is not None: + perturb_func(config) + return dilib.get_container(config) + + +def perturb_func_a(config: CarConfig) -> None: + config.engine_config.token = "a" + + +def perturb_func_b(config: CarConfig) -> None: + config.engine_config.token = "b" + + +# Create multiple containers for each perturbation +ctr_a = get_container(perturb_func=perturb_func_a) +ctr_b = get_container(perturb_func=perturb_func_b) + +# Get cars corresponding to each perturbation, all in the same process space. +# No matter what object we get from ctr_a, it will only have been +# created using objects that have seen token="a". +car_a = ctr_a.config.car +car_b = ctr_b.config.car +``` + ### Factories for Dynamic Objects -If you need to configure objects dynamically -(e.g., check db value to resolve what type to use, +If you need to configure objects dynamically +(e.g., check db value to resolve what type to use, set config keys based on another value), consider a factory pattern like: ```python @@ -298,7 +344,8 @@ class Foo: value: int -# Factory that takes static params via constructor injection and dynamic params via method injection +# Factory that takes static params via constructor injection and +# dynamic params via method injection @dataclasses.dataclass(frozen=True) class FooFactory: db_host: str @@ -320,6 +367,8 @@ class FooClient: class FooConfig(dilib.Config): db_host = dilib.GlobalInput(type_=str, default="some-db-addr") - foo_factory = dilib.Singleton(FooFactory, db_host=db_host, alpha=1, beta=2) + foo_factory = dilib.Singleton( + FooFactory, db_host=db_host, alpha=1, beta=2 + ) foo_client = dilib.Singleton(FooClient, foo_factory=foo_factory) ``` diff --git a/dilib/__init__.py b/dilib/__init__.py index cba71e8..7975b2a 100644 --- a/dilib/__init__.py +++ b/dilib/__init__.py @@ -8,6 +8,7 @@ from dilib.errors import FrozenConfigError as FrozenConfigError from dilib.errors import InputConfigError as InputConfigError from dilib.errors import NewKeyConfigError as NewKeyConfigError +from dilib.errors import PerturbSpecError as PerturbSpecError from dilib.errors import SetChildConfigError as SetChildConfigError from dilib.specs import Forward as Forward from dilib.specs import GlobalInput as GlobalInput diff --git a/dilib/config.py b/dilib/config.py index 9550f5d..5e90e14 100644 --- a/dilib/config.py +++ b/dilib/config.py @@ -15,6 +15,11 @@ class ConfigSpec(dilib.specs.Spec[TC]): """Represents nestable bag of types and values.""" + _INTERNAL_FIELDS = dilib.specs.Spec._INTERNAL_FIELDS + [ + "cls", + "local_inputs", + ] + def __init__(self, cls: type[TC], **local_inputs: Any) -> None: super().__init__() self.cls = cls @@ -57,7 +62,7 @@ def __hash__(self) -> int: class Config: """Description of how specs depend on each other.""" - _INTERNAL_FIELDS = ( + _INTERNAL_FIELDS = [ "_config_locator", "_keys", "_specs", @@ -71,7 +76,7 @@ class Config: "freeze", "_get_spec", "_get_child_class", - ) + ] def __new__( cls, *args: Any, _materialize: bool = False, **kwargs: Any @@ -257,8 +262,7 @@ def __setattr__(self, key: str, value: Any) -> None: or key == "_INTERNAL_FIELDS" or key in self._INTERNAL_FIELDS ): - super().__setattr__(key, value) - return + return super().__setattr__(key, value) if self._frozen: raise dilib.errors.FrozenConfigError( diff --git a/dilib/errors.py b/dilib/errors.py index 5b9b62d..84f5175 100644 --- a/dilib/errors.py +++ b/dilib/errors.py @@ -16,3 +16,7 @@ class NewKeyConfigError(ConfigError): class SetChildConfigError(ConfigError): pass + + +class PerturbSpecError(ConfigError): + pass diff --git a/dilib/specs.py b/dilib/specs.py index 6cf8d7a..305dd6e 100644 --- a/dilib/specs.py +++ b/dilib/specs.py @@ -9,7 +9,9 @@ from typing import Any, Callable, Generic, TypeVar, cast -from typing_extensions import ParamSpec, TypeAlias +from typing_extensions import ParamSpec, TypeAlias, override + +import dilib.errors MISSING = object() MISSING_DICT: dict = dict() # Need a special typed sentinel for mypy @@ -49,6 +51,7 @@ def __getattr__(self, attr: str) -> AttrFuture: class Spec(Generic[T]): """Represents delayed object to be instantiated later.""" + _INTERNAL_FIELDS = ["spec_id"] NEXT_SPEC_ID = 0 def __init__(self, spec_id: SpecID | None = None) -> None: @@ -57,6 +60,25 @@ def __init__(self, spec_id: SpecID | None = None) -> None: def __getattr__(self, attr: str) -> AttrFuture: return AttrFuture(self.spec_id, [attr]) + @override + def __setattr__(self, name: str, value: Any) -> None: + if ( + name.startswith("__") + or name == "_INTERNAL_FIELDS" + or name in self._INTERNAL_FIELDS + ): + return super().__setattr__(name, value) + + # NB: We considered supporting this kind of perturbation, + # but the issue is that we don't know whether the config + # this spec is attached to has been frozen. For sake of safety + # and simplicity, we raise an error here instead. + raise dilib.errors.PerturbSpecError( + "Cannot set on a spec. " + "If you'd like to perturb a value used by a spec, " + "promote it to be a config field and perturb the config instead." + ) + # For mypy def __call__(self, *args: Any, **kwargs: Any) -> Any: return None @@ -73,6 +95,8 @@ def _get_next_spec_id(cls) -> SpecID: class _Object(Spec[T]): """Represents fully-instantiated object to pass through.""" + _INTERNAL_FIELDS = Spec._INTERNAL_FIELDS + ["obj"] + def __init__(self, obj: T, spec_id: SpecID | None = None) -> None: super().__init__(spec_id=spec_id) self.obj = obj @@ -92,6 +116,8 @@ def Object(obj: T) -> T: # noqa: N802 class _Input(Spec[T]): """Represents user input to config.""" + _INTERNAL_FIELDS = Spec._INTERNAL_FIELDS + ["type_", "default"] + def __init__( self, type_: type[T] | None = None, default: Any = MISSING ) -> None: @@ -143,6 +169,13 @@ def LocalInput( # noqa: N802 class _Callable(Spec[T]): """Represents callable (e.g., func, type) to be called with given args.""" + _INTERNAL_FIELDS = Spec._INTERNAL_FIELDS + [ + "func_or_type", + "args", + "lazy_kwargs", + "kwargs", + ] + def __init__( self, func_or_type: Callable[..., T], diff --git a/dilib/tests/test_container.py b/dilib/tests/test_container.py index c8bd857..c592104 100644 --- a/dilib/tests/test_container.py +++ b/dilib/tests/test_container.py @@ -409,3 +409,13 @@ def test_contains(more_type_safe: bool) -> None: assert missing_key not in container assert missing_key not in config_proxy assert missing_key not in container._config + + +class PerturbSpecConfig(dilib.Config): + foo = dilib.Singleton(test_config.ValuesWrapper, 1, 2, z=3) + + +def test_perturb_spec() -> None: + config = dilib.get_config(PerturbSpecConfig) + with pytest.raises(dilib.PerturbSpecError): + config.foo.x = 100 # type: ignore[misc]