Tidy comments

mckinsey · Apr 5, 2024 · f0763bf · f0763bf
1 parent 87f0fa2
commit f0763bf
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 51 deletions.
diff --git a/vizro-core/examples/_dev/app.py b/vizro-core/examples/_dev/app.py
@@ -30,6 +30,7 @@
         vm.Graph(figure=px.scatter("fast_expire_data", "sepal_width", "sepal_length")),
         vm.Graph(figure=px.scatter("no_expire_data", "sepal_width", "sepal_length")),
     ],
+    controls=[vm.Filter(column="species")],
 )
 dashboard = vm.Dashboard(pages=[page])
 app = Vizro().build(dashboard)

diff --git a/vizro-core/src/vizro/managers/_data_manager.py b/vizro-core/src/vizro/managers/_data_manager.py
@@ -16,6 +16,7 @@
 # * caching
 # * new error messages that are raised
 # * set cache to null in all other tests
+# * copy returned
 
 # TODO: __main__ in this file: remove/move to docs
 
@@ -123,6 +124,8 @@ def __repr__(self):
         """
         # Note that using repr(self.__load_data) is not good since it depends on the id of self.__load_data and so
         # would not be consistent across processes.
+        # lambda function has __qualname__. partial does not unless explicitly assigned a name but will not work with
+        # flask-caching anyway since flask_caching.utils.function_namespace would not be able to find a name for it.
         return f"{self.__class__.__name__}({self._name}, {self.__load_data.__qualname__})"
 
 
@@ -242,47 +245,3 @@ def _clear(self):
 
 
 data_manager = DataManager()
-
-
-if __name__ == "__main__":
-    from functools import partial
-
-    import vizro.plotly.express as px
-
-    dm = data_manager
-    dm["iris"] = px.data.iris()
-
-    dm._add_component("component_id_a", "iris")
-    print(len(dm._get_component_data("component_id_a")))  # 150   # noqa: T201
-
-    dm._add_component("component_id_b", "iris")
-    df_a = dm._get_component_data("component_id_a")
-    df_a.drop(columns="species", inplace=True)
-    print(df_a.shape)  # (150, 5)   # noqa: T201
-    df_b = dm._get_component_data("component_id_b")
-    print(df_b.shape)  # (150, 6)   # noqa: T201
-
-    # Lazy loading example 1
-    def retrieve_iris():
-        df = px.data.iris()
-        subset = df.query("species == 'setosa'")
-        return subset
-
-    dm["iris_subset"] = retrieve_iris
-    dm._add_component("component_id_c", "iris_subset")
-    print(len(dm._get_component_data("component_id_c")))  # 50   # noqa: T201
-
-    # Lazy loading example 2
-    def retrieve_one_species(species):
-        df = px.data.iris()
-        subset = df[df["species"] == species].copy()
-        return subset
-
-    dm["data_from_external_1"] = lambda: retrieve_one_species("setosa")
-    dm._add_component("component_id_d", "data_from_external_1")
-    print(len(dm._get_component_data("component_id_d")))  # 50   # noqa: T201
-
-    # Lazy loading example 3
-    dm["data_from_external_2"] = partial(retrieve_one_species, "setosa")
-    dm._add_component("component_id_e", "data_from_external_2")
-    print(len(dm._get_component_data("component_id_e")))  # 50   # noqa: T201
diff --git a/vizro-core/src/vizro/models/_components/_components_utils.py b/vizro-core/src/vizro/models/_components/_components_utils.py
@@ -29,20 +29,22 @@ def _process_callable_data_frame(captured_callable, values):
     data_frame = captured_callable["data_frame"]
 
     if isinstance(data_frame, str):
-        # Enable running with DynamicData, e.g. px.scatter("iris") from the Python API and specification of
-        # "data_frame": "iris" through JSON. In these cases, data already exists in the data manager and just needs to be
-        # linked to the component.
+        # Named data source, which could be dynamic or static. This means px.scatter("iris") from the Python API and
+        # specification of "data_frame": "iris" through JSON. In these cases, data already exists in the data manager
+        # and just needs to be linked to the component.
         data_source_name = data_frame
     else:
-        # Standard StaticData case for px.scatter(df: pd.DataFrame).
-        # Extract dataframe from the captured function and put it into the data manager.
+        # Unnamed data source, which must be a pd.DataFrame and hence static data. This means px.scatter(pd.DataFrame())
+        # and is only possible from the Python API. Extract dataframe from the captured function and put it into the
+        # data manager.
         # Unlike with model_manager, it doesn't matter if the random seed is different across workers here. So long as we
-        # always fetch StaticData data from the data manager my going through the appropriate Figure component, the right
-        # data name will be fetched. It also doesn't matter that multiple Figures with the same underlying data
+        # always fetch static data from the data manager by going through the appropriate Figure component, the right
+        # data source name will be fetched. It also doesn't matter if multiple Figures with the same underlying data
         # each have their own entry in the data manager, since the underlying pd.DataFrame will still be the same and not
         # copied into each one, so no memory is wasted.
         logger.debug("Adding data to data manager for Figure with id %s", values["id"])
         data_source_name = str(uuid.uuid4())
+        data_manager[data_source_name] = data_frame
 
     data_manager._add_component(values["id"], data_source_name)
     # No need to keep the data in the captured function any more so remove it to save memory.