From f0763bf9993a0b792b0bb213f031247128d70bdb Mon Sep 17 00:00:00 2001 From: Antony Milne Date: Fri, 5 Apr 2024 15:55:04 +0100 Subject: [PATCH] Tidy comments --- vizro-core/examples/_dev/app.py | 1 + .../src/vizro/managers/_data_manager.py | 47 ++----------------- .../models/_components/_components_utils.py | 16 ++++--- 3 files changed, 13 insertions(+), 51 deletions(-) diff --git a/vizro-core/examples/_dev/app.py b/vizro-core/examples/_dev/app.py index cc6012700..1ab90d2af 100644 --- a/vizro-core/examples/_dev/app.py +++ b/vizro-core/examples/_dev/app.py @@ -30,6 +30,7 @@ vm.Graph(figure=px.scatter("fast_expire_data", "sepal_width", "sepal_length")), vm.Graph(figure=px.scatter("no_expire_data", "sepal_width", "sepal_length")), ], + controls=[vm.Filter(column="species")], ) dashboard = vm.Dashboard(pages=[page]) app = Vizro().build(dashboard) diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py index 238f0af1a..93f0cda0a 100644 --- a/vizro-core/src/vizro/managers/_data_manager.py +++ b/vizro-core/src/vizro/managers/_data_manager.py @@ -16,6 +16,7 @@ # * caching # * new error messages that are raised # * set cache to null in all other tests +# * copy returned # TODO: __main__ in this file: remove/move to docs @@ -123,6 +124,8 @@ def __repr__(self): """ # Note that using repr(self.__load_data) is not good since it depends on the id of self.__load_data and so # would not be consistent across processes. + # lambda function has __qualname__. partial does not unless explicitly assigned a name but will not work with + # flask-caching anyway since flask_caching.utils.function_namespace would not be able to find a name for it. return f"{self.__class__.__name__}({self._name}, {self.__load_data.__qualname__})" @@ -242,47 +245,3 @@ def _clear(self): data_manager = DataManager() - - -if __name__ == "__main__": - from functools import partial - - import vizro.plotly.express as px - - dm = data_manager - dm["iris"] = px.data.iris() - - dm._add_component("component_id_a", "iris") - print(len(dm._get_component_data("component_id_a"))) # 150 # noqa: T201 - - dm._add_component("component_id_b", "iris") - df_a = dm._get_component_data("component_id_a") - df_a.drop(columns="species", inplace=True) - print(df_a.shape) # (150, 5) # noqa: T201 - df_b = dm._get_component_data("component_id_b") - print(df_b.shape) # (150, 6) # noqa: T201 - - # Lazy loading example 1 - def retrieve_iris(): - df = px.data.iris() - subset = df.query("species == 'setosa'") - return subset - - dm["iris_subset"] = retrieve_iris - dm._add_component("component_id_c", "iris_subset") - print(len(dm._get_component_data("component_id_c"))) # 50 # noqa: T201 - - # Lazy loading example 2 - def retrieve_one_species(species): - df = px.data.iris() - subset = df[df["species"] == species].copy() - return subset - - dm["data_from_external_1"] = lambda: retrieve_one_species("setosa") - dm._add_component("component_id_d", "data_from_external_1") - print(len(dm._get_component_data("component_id_d"))) # 50 # noqa: T201 - - # Lazy loading example 3 - dm["data_from_external_2"] = partial(retrieve_one_species, "setosa") - dm._add_component("component_id_e", "data_from_external_2") - print(len(dm._get_component_data("component_id_e"))) # 50 # noqa: T201 diff --git a/vizro-core/src/vizro/models/_components/_components_utils.py b/vizro-core/src/vizro/models/_components/_components_utils.py index 81cd93fb9..832d4b373 100644 --- a/vizro-core/src/vizro/models/_components/_components_utils.py +++ b/vizro-core/src/vizro/models/_components/_components_utils.py @@ -29,20 +29,22 @@ def _process_callable_data_frame(captured_callable, values): data_frame = captured_callable["data_frame"] if isinstance(data_frame, str): - # Enable running with DynamicData, e.g. px.scatter("iris") from the Python API and specification of - # "data_frame": "iris" through JSON. In these cases, data already exists in the data manager and just needs to be - # linked to the component. + # Named data source, which could be dynamic or static. This means px.scatter("iris") from the Python API and + # specification of "data_frame": "iris" through JSON. In these cases, data already exists in the data manager + # and just needs to be linked to the component. data_source_name = data_frame else: - # Standard StaticData case for px.scatter(df: pd.DataFrame). - # Extract dataframe from the captured function and put it into the data manager. + # Unnamed data source, which must be a pd.DataFrame and hence static data. This means px.scatter(pd.DataFrame()) + # and is only possible from the Python API. Extract dataframe from the captured function and put it into the + # data manager. # Unlike with model_manager, it doesn't matter if the random seed is different across workers here. So long as we - # always fetch StaticData data from the data manager my going through the appropriate Figure component, the right - # data name will be fetched. It also doesn't matter that multiple Figures with the same underlying data + # always fetch static data from the data manager by going through the appropriate Figure component, the right + # data source name will be fetched. It also doesn't matter if multiple Figures with the same underlying data # each have their own entry in the data manager, since the underlying pd.DataFrame will still be the same and not # copied into each one, so no memory is wasted. logger.debug("Adding data to data manager for Figure with id %s", values["id"]) data_source_name = str(uuid.uuid4()) + data_manager[data_source_name] = data_frame data_manager._add_component(values["id"], data_source_name) # No need to keep the data in the captured function any more so remove it to save memory.