Merge branch 'main' into docs/vizro_ai/add_dashboard_examples

mckinsey · Nov 12, 2024 · 1554a8b · 1554a8b
2 parents 148210b + c442686
commit 1554a8b
Show file tree

Hide file tree

Showing 12 changed files with 467 additions and 181 deletions.
diff --git a/vizro-core/changelog.d/20241105_170003_antony.milne_new_interaction.md b/vizro-core/changelog.d/20241105_170003_antony.milne_new_interaction.md
@@ -0,0 +1,48 @@
+<!--
+A new scriv changelog fragment.
+
+Uncomment the section that is right (remove the HTML comment wrapper).
+-->
+
+<!--
+### Highlights ✨
+
+- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Removed
+
+- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Added
+
+- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Changed
+
+- A bullet item for the Changed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Deprecated
+
+- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Fixed
+
+- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Security
+
+- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
diff --git a/vizro-core/changelog.d/20241106_104745_antony.milne_dynamic_filter_2.md b/vizro-core/changelog.d/20241106_104745_antony.milne_dynamic_filter_2.md
@@ -0,0 +1,47 @@
+<!--
+A new scriv changelog fragment.
+
+Uncomment the section that is right (remove the HTML comment wrapper).
+-->
+
+<!--
+### Highlights ✨
+
+- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Removed
+
+- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Added
+
+- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+
+### Changed
+
+- Improve performance of data loading. ([#850](https://github.com/mckinsey/vizro/pull/850), [#857](https://github.com/mckinsey/vizro/pull/857))
+
+<!--
+### Deprecated
+
+- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Fixed
+
+- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Security
+
+- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
diff --git a/vizro-core/src/vizro/actions/_actions_utils.py b/vizro-core/src/vizro/actions/_actions_utils.py
@@ -2,14 +2,14 @@
 
 from __future__ import annotations
 
-from collections import defaultdict
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any, Literal, Optional, TypedDict, Union
 
 import pandas as pd
 
 from vizro._constants import ALL_OPTION, NONE_OPTION
 from vizro.managers import data_manager, model_manager
+from vizro.managers._data_manager import DataSourceName
 from vizro.managers._model_manager import ModelID
 from vizro.models.types import MultiValueType, SelectorType, SingleValueType
 
@@ -23,7 +23,7 @@ class CallbackTriggerDict(TypedDict):
     """Represent dash.ctx.args_grouping item. Shortened as 'ctd' in the code.
 
     Args:
-        id: The component ID. If it`s a pattern matching ID, it will be a dict.
+        id: The component ID. If it's a pattern matching ID, it will be a dict.
         property: The component property used in the callback.
         value: The value of the component property at the time the callback was fired.
         str_id: For pattern matching IDs, it's the stringified dict ID without white spaces.
@@ -47,7 +47,18 @@ def _get_component_actions(component) -> list[Action]:
     )
 
 
-def _apply_filters(data_frame: pd.DataFrame, ctds_filters: list[CallbackTriggerDict], target: str) -> pd.DataFrame:
+def _apply_filter_controls(
+    data_frame: pd.DataFrame, ctds_filters: list[CallbackTriggerDict], target: ModelID
+) -> pd.DataFrame:
+    """Applies filters from a vm.Filter model in the controls.
+
+    Args:
+        data_frame: unfiltered DataFrame.
+        ctds_filters: list of CallbackTriggerDict for filters.
+        target: id of targeted Figure.
+
+    Returns: filtered DataFrame.
+    """
     for ctd in ctds_filters:
         selector_value = ctd["value"]
         selector_value = selector_value if isinstance(selector_value, list) else [selector_value]
@@ -84,8 +95,19 @@ def _get_parent_vizro_model(_underlying_callable_object_id: str) -> VizroBaseMod
 
 
 def _apply_filter_interaction(
-    data_frame: pd.DataFrame, ctds_filter_interaction: list[dict[str, CallbackTriggerDict]], target: str
+    data_frame: pd.DataFrame, ctds_filter_interaction: list[dict[str, CallbackTriggerDict]], target: ModelID
 ) -> pd.DataFrame:
+    """Applies filters from a filter_interaction.
+
+    This will be removed in future when filter interactions are implemented using controls.
+
+    Args:
+        data_frame: unfiltered DataFrame.
+        ctds_filter_interaction: structure containing CallbackTriggerDict for filter interactions.
+        target: id of targeted Figure.
+
+    Returns: filtered DataFrame.
+    """
     for ctd_filter_interaction in ctds_filter_interaction:
         triggered_model = model_manager[ctd_filter_interaction["modelID"]["id"]]
         data_frame = triggered_model._filter_interaction(
@@ -105,120 +127,150 @@ def _validate_selector_value_none(value: Union[SingleValueType, MultiValueType])
     return value
 
 
-def _create_target_arg_mapping(dot_separated_strings: list[str]) -> dict[str, list[str]]:
-    results = defaultdict(list)
-    for string in dot_separated_strings:
-        if "." not in string:
-            raise ValueError(f"Provided string {string} must contain a '.'")
-        component, arg = string.split(".", 1)
-        results[component].append(arg)
-    return results
+def _get_target_dot_separated_strings(dot_separated_strings: list[str], target: ModelID, data_frame: bool) -> list[str]:
+    """Filters list of dot separated strings to get just those relevant for a single target.
 
+    Args:
+        dot_separated_strings: list of dot separated strings that can be targeted by a vm.Parameter,
+            e.g. ["target_name.data_frame.arg", "target_name.x"]
+        target: id of targeted Figure.
+        data_frame: whether to return only DataFrame parameters starting "data_frame." or only non-DataFrame parameters.
 
-def _update_nested_graph_properties(
-    graph_config: dict[str, Any], dot_separated_string: str, value: Any
+    Returns:
+        List of dot separated strings for target.
+    """
+    result = []
+
+    for dot_separated_string_with_target in dot_separated_strings:
+        if dot_separated_string_with_target.startswith(f"{target}."):
+            dot_separated_string = dot_separated_string_with_target.removeprefix(f"{target}.")
+            # We only want data_frame parameters when data_frame = True.
+            if dot_separated_string.startswith("data_frame.") == data_frame:
+                result.append(dot_separated_string)
+    return result
+
+
+def _update_nested_figure_properties(
+    figure_config: dict[str, Any], dot_separated_string: str, value: Any
 ) -> dict[str, Any]:
     keys = dot_separated_string.split(".")
-    current_property = graph_config
+    current_property = figure_config
 
     for key in keys[:-1]:
         current_property = current_property.setdefault(key, {})
 
     current_property[keys[-1]] = value
-    return graph_config
+    return figure_config
+
 
+def _get_parametrized_config(
+    ctd_parameters: list[CallbackTriggerDict], target: ModelID, data_frame: bool
+) -> dict[str, Any]:
+    """Convert parameters into a keyword-argument dictionary.
 
-def _get_parametrized_config(target: ModelID, ctd_parameters: list[CallbackTriggerDict]) -> dict[str, Any]:
-    # TODO - avoid calling _captured_callable. Once we have done this we can remove _arguments from
-    #  CapturedCallable entirely.
-    config = deepcopy(model_manager[target].figure._arguments)
+    Args:
+        ctd_parameters: list of CallbackTriggerDicts for vm.Parameter.
+        target: id of targeted figure.
+        data_frame: whether to return only DataFrame parameters starting "data_frame." or only non-DataFrame parameters.
 
-    # It's not possible to address nested argument of data_frame like data_frame.x.y, just top-level ones like
-    # data_frame.x.
-    config["data_frame"] = {}
+    Returns: keyword-argument dictionary.
+
+    """
+    if data_frame:
+        # This entry is inserted (but will always be empty) even for static data so that the load/_multi_load calls
+        # look identical for dynamic data with no arguments and static data. Note it's not possible to address nested
+        # argument of data_frame like data_frame.x.y, just top-level ones like data_frame.x.
+        config: dict[str, Any] = {"data_frame": {}}
+    else:
+        # TODO - avoid calling _captured_callable. Once we have done this we can remove _arguments from
+        #  CapturedCallable entirely. This might mean not being able to address nested parameters.
+        config = deepcopy(model_manager[target].figure._arguments)
+        del config["data_frame"]
 
     for ctd in ctd_parameters:
         # TODO: needs to be refactored so that it is independent of implementation details
-        selector_value = ctd["value"]
+        parameter_value = ctd["value"]
 
-        if hasattr(selector_value, "__iter__") and ALL_OPTION in selector_value:  # type: ignore[operator]
-            selector: SelectorType = model_manager[ctd["id"]]
-
-            # Even if options are provided as list[dict], the Dash component only returns a list of values.
+        selector: SelectorType = model_manager[ctd["id"]]
+        if hasattr(parameter_value, "__iter__") and ALL_OPTION in parameter_value:  # type: ignore[operator]
+            # Even if an option is provided as list[dict], the Dash component only returns a list of values.
             # So we need to ensure that we always return a list only as well to provide consistent types.
-            if all(isinstance(option, dict) for option in selector.options):
-                selector_value = [option["value"] for option in selector.options]
-            else:
-                selector_value = selector.options
+            parameter_value = [option["value"] if isinstance(option, dict) else option for option in selector.options]
 
-        selector_value = _validate_selector_value_none(selector_value)
-        selector_actions = _get_component_actions(model_manager[ctd["id"]])
+        parameter_value = _validate_selector_value_none(parameter_value)
 
-        for action in selector_actions:
+        for action in _get_component_actions(selector):
             if action.function._function.__name__ != "_parameter":
                 continue
 
-            action_targets = _create_target_arg_mapping(action.function["targets"])
-
-            if target not in action_targets:
-                continue
-
-            for action_targets_arg in action_targets[target]:
-                config = _update_nested_graph_properties(
-                    graph_config=config, dot_separated_string=action_targets_arg, value=selector_value
+            for dot_separated_string in _get_target_dot_separated_strings(
+                action.function["targets"], target, data_frame
+            ):
+                config = _update_nested_figure_properties(
+                    figure_config=config, dot_separated_string=dot_separated_string, value=parameter_value
                 )
 
     return config
 
 
 # Helper functions used in pre-defined actions ----
-def _get_targets_data_and_config(
+def _apply_filters(
+    data: pd.DataFrame,
     ctds_filter: list[CallbackTriggerDict],
     ctds_filter_interaction: list[dict[str, CallbackTriggerDict]],
-    ctds_parameters: list[CallbackTriggerDict],
-    targets: list[ModelID],
+    target: ModelID,
 ):
-    all_filtered_data = {}
-    all_parameterized_config = {}
-
+    # Takes in just one target, so dataframe is filtered repeatedly for every target that uses it.
+    # Potentially this could be de-duplicated but it's not so important since filtering is a relatively fast
+    # operation (compared to data loading).
+    filtered_data = _apply_filter_controls(data_frame=data, ctds_filters=ctds_filter, target=target)
+    filtered_data = _apply_filter_interaction(
+        data_frame=filtered_data, ctds_filter_interaction=ctds_filter_interaction, target=target
+    )
+    return filtered_data
+
+
+def _get_unfiltered_data(
+    ctds_parameters: list[CallbackTriggerDict], targets: list[ModelID]
+) -> dict[ModelID, pd.DataFrame]:
+    # Takes in multiple targets to ensure that data can be loaded efficiently using _multi_load and not repeated for
+    # every single target.
+    # Getting unfiltered data requires data frame parameters. We pass in all ctd_parameters and then find the
+    # data_frame ones by passing data_frame=True in the call to _get_paramaterized_config. Static data is also
+    # handled here and will just have empty dictionary for its kwargs.
+    multi_data_source_name_load_kwargs: list[tuple[DataSourceName, dict[str, Any]]] = []
     for target in targets:
-        # parametrized_config includes a key "data_frame" that is used in the data loading function.
-        parameterized_config = _get_parametrized_config(target=target, ctd_parameters=ctds_parameters)
-        data_source_name = model_manager[target]["data_frame"]
-        data_frame = data_manager[data_source_name].load(**parameterized_config["data_frame"])
-
-        filtered_data = _apply_filters(data_frame=data_frame, ctds_filters=ctds_filter, target=target)
-        filtered_data = _apply_filter_interaction(
-            data_frame=filtered_data, ctds_filter_interaction=ctds_filter_interaction, target=target
+        dynamic_data_load_params = _get_parametrized_config(
+            ctd_parameters=ctds_parameters, target=target, data_frame=True
         )
+        data_source_name = model_manager[target]["data_frame"]
+        multi_data_source_name_load_kwargs.append((data_source_name, dynamic_data_load_params["data_frame"]))
 
-        # Parameters affecting data_frame have already been used above in data loading and so are excluded from
-        # all_parameterized_config.
-        all_filtered_data[target] = filtered_data
-        all_parameterized_config[target] = {
-            key: value for key, value in parameterized_config.items() if key != "data_frame"
-        }
-
-    return all_filtered_data, all_parameterized_config
+    return dict(zip(targets, data_manager._multi_load(multi_data_source_name_load_kwargs)))
 
 
 def _get_modified_page_figures(
     ctds_filter: list[CallbackTriggerDict],
     ctds_filter_interaction: list[dict[str, CallbackTriggerDict]],
     ctds_parameters: list[CallbackTriggerDict],
-    targets: Optional[list[ModelID]] = None,
-) -> dict[str, Any]:
-    targets = targets or []
-
-    filtered_data, parameterized_config = _get_targets_data_and_config(
-        ctds_filter=ctds_filter,
-        ctds_filter_interaction=ctds_filter_interaction,
-        ctds_parameters=ctds_parameters,
-        targets=targets,
-    )
+    targets: list[ModelID],
+) -> dict[ModelID, Any]:
+    outputs: dict[ModelID, Any] = {}
+
+    # TODO: the structure here would be nicer if we could get just the ctds for a single target at one time,
+    #  so you could do apply_filters on a target a pass only the ctds relevant for that target.
+    #  Consider restructuring ctds to a more convenient form to make this possible.
+
+    for target, unfiltered_data in _get_unfiltered_data(ctds_parameters, targets).items():
+        filtered_data = _apply_filters(unfiltered_data, ctds_filter, ctds_filter_interaction, target)
+        outputs[target] = model_manager[target](
+            data_frame=filtered_data,
+            **_get_parametrized_config(ctd_parameters=ctds_parameters, target=target, data_frame=False),
+        )
 
-    outputs: dict[str, Any] = {}
-    for target in targets:
-        outputs[target] = model_manager[target](data_frame=filtered_data[target], **parameterized_config[target])
+    # TODO NEXT: will need to pass unfiltered_data into Filter.__call__.
+    # This dictionary is filtered for correct targets already selected in Filter.__call__ or that could be done here
+    # instead.
+    # {target: data_frame for target, data_frame in unfiltered_data.items() if target in self.targets}
 
     return outputs