Skip to content

Commit

Permalink
Tidy comments
Browse files Browse the repository at this point in the history
  • Loading branch information
antonymilne committed Apr 5, 2024
1 parent 87f0fa2 commit f0763bf
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 51 deletions.
1 change: 1 addition & 0 deletions vizro-core/examples/_dev/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
vm.Graph(figure=px.scatter("fast_expire_data", "sepal_width", "sepal_length")),
vm.Graph(figure=px.scatter("no_expire_data", "sepal_width", "sepal_length")),
],
controls=[vm.Filter(column="species")],
)
dashboard = vm.Dashboard(pages=[page])
app = Vizro().build(dashboard)
Expand Down
47 changes: 3 additions & 44 deletions vizro-core/src/vizro/managers/_data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# * caching
# * new error messages that are raised
# * set cache to null in all other tests
# * copy returned

# TODO: __main__ in this file: remove/move to docs

Expand Down Expand Up @@ -123,6 +124,8 @@ def __repr__(self):
"""
# Note that using repr(self.__load_data) is not good since it depends on the id of self.__load_data and so
# would not be consistent across processes.
# lambda function has __qualname__. partial does not unless explicitly assigned a name but will not work with
# flask-caching anyway since flask_caching.utils.function_namespace would not be able to find a name for it.
return f"{self.__class__.__name__}({self._name}, {self.__load_data.__qualname__})"


Expand Down Expand Up @@ -242,47 +245,3 @@ def _clear(self):


data_manager = DataManager()


if __name__ == "__main__":
from functools import partial

import vizro.plotly.express as px

dm = data_manager
dm["iris"] = px.data.iris()

dm._add_component("component_id_a", "iris")
print(len(dm._get_component_data("component_id_a"))) # 150 # noqa: T201

dm._add_component("component_id_b", "iris")
df_a = dm._get_component_data("component_id_a")
df_a.drop(columns="species", inplace=True)
print(df_a.shape) # (150, 5) # noqa: T201
df_b = dm._get_component_data("component_id_b")
print(df_b.shape) # (150, 6) # noqa: T201

# Lazy loading example 1
def retrieve_iris():
df = px.data.iris()
subset = df.query("species == 'setosa'")
return subset

dm["iris_subset"] = retrieve_iris
dm._add_component("component_id_c", "iris_subset")
print(len(dm._get_component_data("component_id_c"))) # 50 # noqa: T201

# Lazy loading example 2
def retrieve_one_species(species):
df = px.data.iris()
subset = df[df["species"] == species].copy()
return subset

dm["data_from_external_1"] = lambda: retrieve_one_species("setosa")
dm._add_component("component_id_d", "data_from_external_1")
print(len(dm._get_component_data("component_id_d"))) # 50 # noqa: T201

# Lazy loading example 3
dm["data_from_external_2"] = partial(retrieve_one_species, "setosa")
dm._add_component("component_id_e", "data_from_external_2")
print(len(dm._get_component_data("component_id_e"))) # 50 # noqa: T201
16 changes: 9 additions & 7 deletions vizro-core/src/vizro/models/_components/_components_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,22 @@ def _process_callable_data_frame(captured_callable, values):
data_frame = captured_callable["data_frame"]

if isinstance(data_frame, str):
# Enable running with DynamicData, e.g. px.scatter("iris") from the Python API and specification of
# "data_frame": "iris" through JSON. In these cases, data already exists in the data manager and just needs to be
# linked to the component.
# Named data source, which could be dynamic or static. This means px.scatter("iris") from the Python API and
# specification of "data_frame": "iris" through JSON. In these cases, data already exists in the data manager
# and just needs to be linked to the component.
data_source_name = data_frame
else:
# Standard StaticData case for px.scatter(df: pd.DataFrame).
# Extract dataframe from the captured function and put it into the data manager.
# Unnamed data source, which must be a pd.DataFrame and hence static data. This means px.scatter(pd.DataFrame())
# and is only possible from the Python API. Extract dataframe from the captured function and put it into the
# data manager.
# Unlike with model_manager, it doesn't matter if the random seed is different across workers here. So long as we
# always fetch StaticData data from the data manager my going through the appropriate Figure component, the right
# data name will be fetched. It also doesn't matter that multiple Figures with the same underlying data
# always fetch static data from the data manager by going through the appropriate Figure component, the right
# data source name will be fetched. It also doesn't matter if multiple Figures with the same underlying data
# each have their own entry in the data manager, since the underlying pd.DataFrame will still be the same and not
# copied into each one, so no memory is wasted.
logger.debug("Adding data to data manager for Figure with id %s", values["id"])
data_source_name = str(uuid.uuid4())
data_manager[data_source_name] = data_frame

data_manager._add_component(values["id"], data_source_name)
# No need to keep the data in the captured function any more so remove it to save memory.
Expand Down

0 comments on commit f0763bf

Please sign in to comment.