mckinsey · antonymilne · Nov 11, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
@@ -0,0 +1,48 @@
+<!--
+A new scriv changelog fragment.
+
+Uncomment the section that is right (remove the HTML comment wrapper).
+-->
+
+<!--
+### Highlights ✨
+
+- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Removed
+
+- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Added
+
+- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Changed
+
+- A bullet item for the Changed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Deprecated
+
+- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Fixed
+
+- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Security
+
+- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
@@ -0,0 +1,47 @@
+<!--
+A new scriv changelog fragment.
+
+Uncomment the section that is right (remove the HTML comment wrapper).
+-->
+
+<!--
+### Highlights ✨
+
+- A bullet item for the Highlights ✨ category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Removed
+
+- A bullet item for the Removed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Added
+
+- A bullet item for the Added category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+
+### Changed
+
+- Improve performance of data loading. ([#850](https://github.com/mckinsey/vizro/pull/850), [#857](https://github.com/mckinsey/vizro/pull/857))
+
+<!--
+### Deprecated
+
+- A bullet item for the Deprecated category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Fixed
+
+- A bullet item for the Fixed category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
+<!--
+### Security
+
+- A bullet item for the Security category with a link to the relevant PR at the end of your entry, e.g. Enable feature XXX. ([#1](https://github.com/mckinsey/vizro/pull/1))
+
+-->
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-from collections import defaultdict
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any, Literal, Optional, TypedDict, Union
 
@@ -23,7 +22,7 @@ class CallbackTriggerDict(TypedDict):
     """Represent dash.ctx.args_grouping item. Shortened as 'ctd' in the code.
 
     Args:
-        id: The component ID. If it`s a pattern matching ID, it will be a dict.
+        id: The component ID. If it's a pattern matching ID, it will be a dict.
         property: The component property used in the callback.
         value: The value of the component property at the time the callback was fired.
         str_id: For pattern matching IDs, it's the stringified dict ID without white spaces.
@@ -47,7 +46,9 @@ def _get_component_actions(component) -> list[Action]:
     )
 
 
-def _apply_filters(data_frame: pd.DataFrame, ctds_filters: list[CallbackTriggerDict], target: str) -> pd.DataFrame:
+def _apply_control_filters(
+    data_frame: pd.DataFrame, ctds_filters: list[CallbackTriggerDict], target: str
+) -> pd.DataFrame:
     for ctd in ctds_filters:
         selector_value = ctd["value"]
         selector_value = selector_value if isinstance(selector_value, list) else [selector_value]
@@ -105,37 +106,44 @@ def _validate_selector_value_none(value: Union[SingleValueType, MultiValueType])
     return value
 
 
-def _create_target_arg_mapping(dot_separated_strings: list[str]) -> dict[str, list[str]]:
-    results = defaultdict(list)
-    for string in dot_separated_strings:
-        if "." not in string:
-            raise ValueError(f"Provided string {string} must contain a '.'")
-        component, arg = string.split(".", 1)
-        results[component].append(arg)
-    return results
+def _filter_dot_separated_strings(dot_separated_strings: list[str], target: str, data_frame: bool) -> list[str]:
+    result = []
+
+    for dot_separated_string_with_target in dot_separated_strings:
+        if dot_separated_string_with_target.startswith(f"{target}."):
+            dot_separated_string = dot_separated_string_with_target.removeprefix(f"{target}.")
+            if (data_frame and dot_separated_string.startswith("data_frame.")) or (
+                not data_frame and not dot_separated_string.startswith("data_frame.")
+            ):
+                result.append(dot_separated_string)
+    return result
 
 
-def _update_nested_graph_properties(
-    graph_config: dict[str, Any], dot_separated_string: str, value: Any
+def _update_nested_figure_properties(
+    figure_config: dict[str, Any], dot_separated_string: str, value: Any
 ) -> dict[str, Any]:
     keys = dot_separated_string.split(".")
-    current_property = graph_config
+    current_property = figure_config
 
     for key in keys[:-1]:
         current_property = current_property.setdefault(key, {})
 
     current_property[keys[-1]] = value
-    return graph_config
+    return figure_config
 
 
-def _get_parametrized_config(target: ModelID, ctd_parameters: list[CallbackTriggerDict]) -> dict[str, Any]:
-    # TODO - avoid calling _captured_callable. Once we have done this we can remove _arguments from
-    #  CapturedCallable entirely.
-    config = deepcopy(model_manager[target].figure._arguments)
-
-    # It's not possible to address nested argument of data_frame like data_frame.x.y, just top-level ones like
-    # data_frame.x.
-    config["data_frame"] = {}
+def _get_parametrized_config(
+    ctd_parameters: list[CallbackTriggerDict], target: ModelID, data_frame: bool
+) -> dict[str, Any]:
+    if data_frame:
+        # It's not possible to address nested argument of data_frame like data_frame.x.y, just top-level ones like
+        # data_frame.x.
+        config: dict[str, Any] = {"data_frame": {}}
+    else:
+        # TODO - avoid calling _captured_callable. Once we have done this we can remove _arguments from
+        #  CapturedCallable entirely. This might mean not being able to address nested parameters.
+        config = deepcopy(model_manager[target].figure._arguments)
+        del config["data_frame"]
 
     for ctd in ctd_parameters:
         # TODO: needs to be refactored so that it is independent of implementation details
@@ -152,73 +160,76 @@ def _get_parametrized_config(target: ModelID, ctd_parameters: list[CallbackTrigg
                 selector_value = selector.options
 
         selector_value = _validate_selector_value_none(selector_value)
-        selector_actions = _get_component_actions(model_manager[ctd["id"]])
 
-        for action in selector_actions:
+        for action in _get_component_actions(model_manager[ctd["id"]]):
             if action.function._function.__name__ != "_parameter":
                 continue
 
-            action_targets = _create_target_arg_mapping(action.function["targets"])
-
-            if target not in action_targets:
-                continue
-
-            for action_targets_arg in action_targets[target]:
-                config = _update_nested_graph_properties(
-                    graph_config=config, dot_separated_string=action_targets_arg, value=selector_value
+            for dot_separated_string in _filter_dot_separated_strings(action.function["targets"], target, data_frame):
+                config = _update_nested_figure_properties(
+                    figure_config=config, dot_separated_string=dot_separated_string, value=selector_value
                 )
 
     return config
 
 
 # Helper functions used in pre-defined actions ----
-def _get_targets_data_and_config(
+def _apply_filters(
+    data: pd.DataFrame,
     ctds_filter: list[CallbackTriggerDict],
     ctds_filter_interaction: list[dict[str, CallbackTriggerDict]],
-    ctds_parameters: list[CallbackTriggerDict],
-    targets: list[ModelID],
+    target: ModelID,
 ):
-    all_filtered_data = {}
-    all_parameterized_config = {}
+    # Takes in just one target, so dataframe is filtered repeatedly for every target that uses it.
+    # Potentially this could be de-duplicated but it's not so important since filtering is a relatively fast
+    # operation (compared to data loading).
+    filtered_data = _apply_control_filters(data_frame=data, ctds_filters=ctds_filter, target=target)
+    filtered_data = _apply_filter_interaction(
+        data_frame=filtered_data, ctds_filter_interaction=ctds_filter_interaction, target=target
+    )
+    return filtered_data
 
-    for target in targets:
-        # parametrized_config includes a key "data_frame" that is used in the data loading function.
-        parameterized_config = _get_parametrized_config(target=target, ctd_parameters=ctds_parameters)
-        data_source_name = model_manager[target]["data_frame"]
-        data_frame = data_manager[data_source_name].load(**parameterized_config["data_frame"])
 
-        filtered_data = _apply_filters(data_frame=data_frame, ctds_filters=ctds_filter, target=target)
-        filtered_data = _apply_filter_interaction(
-            data_frame=filtered_data, ctds_filter_interaction=ctds_filter_interaction, target=target
+def _get_unfiltered_data(
+    ctds_parameters: list[CallbackTriggerDict], targets: list[ModelID]
+) -> dict[ModelID, pd.DataFrame]:
+    # Takes in multiple targets to ensure that data can be loaded efficiently using _multi_load and not repeated for
+    # every single target.
+    # Getting unfiltered data requires data frame parameters. We pass in all ctd_parameters and then find the
+    # data_frame ones by passing data_frame=True in the call to _get_paramaterized_config.
+    multi_data_source_name_load_kwargs = []
+    for target in targets:
+        dynamic_data_load_params = _get_parametrized_config(
+            ctd_parameters=ctds_parameters, target=target, data_frame=True
         )
+        data_source_name = model_manager[target]["data_frame"]
+        multi_data_source_name_load_kwargs.append((data_source_name, dynamic_data_load_params["data_frame"]))
 
-        # Parameters affecting data_frame have already been used above in data loading and so are excluded from
-        # all_parameterized_config.
-        all_filtered_data[target] = filtered_data
-        all_parameterized_config[target] = {
-            key: value for key, value in parameterized_config.items() if key != "data_frame"
-        }
-
-    return all_filtered_data, all_parameterized_config
+    return dict(zip(targets, data_manager._multi_load(multi_data_source_name_load_kwargs)))
 
 
 def _get_modified_page_figures(
     ctds_filter: list[CallbackTriggerDict],
     ctds_filter_interaction: list[dict[str, CallbackTriggerDict]],
     ctds_parameters: list[CallbackTriggerDict],
-    targets: Optional[list[ModelID]] = None,
-) -> dict[str, Any]:
-    targets = targets or []
-
-    filtered_data, parameterized_config = _get_targets_data_and_config(
-        ctds_filter=ctds_filter,
-        ctds_filter_interaction=ctds_filter_interaction,
-        ctds_parameters=ctds_parameters,
-        targets=targets,
-    )
+    targets: list[ModelID],
+) -> dict[ModelID, Any]:
+    outputs: dict[ModelID, Any] = {}
+
+    # TODO: the structure here would be nicer if we could get just the ctds for a single target at one time,
+    #  so you could do apply_filters on a target a pass only the ctds relevant for that target.
+    #  Consider restructuring ctds to a more convenient form to make this possible.
+
+    for target, unfiltered_data in _get_unfiltered_data(ctds_parameters, targets).items():
+        filtered_data = _apply_filters(unfiltered_data, ctds_filter, ctds_filter_interaction, target)
+        outputs[target] = model_manager[target](
+            data_frame=filtered_data,
+            **_get_parametrized_config(ctd_parameters=ctds_parameters, target=target, data_frame=False),
+        )
 
-    outputs: dict[str, Any] = {}
-    for target in targets:
-        outputs[target] = model_manager[target](data_frame=filtered_data[target], **parameterized_config[target])
+    # TODO NEXT: will need to pass unfiltered_data into Filter.__call__.
+    # This dictionary is filtered for correct targets already selected in Filter.__call__ or that could be done here
+    # instead.
+    # {target: data_frame for target, data_frame in unfiltered_data.items() if target in self.targets}
 
     return outputs
@@ -16,7 +16,7 @@ def _filter(
     targets: list[ModelID],
     filter_function: Callable[[pd.Series, Any], pd.Series],
     **inputs: dict[str, Any],
-) -> dict[str, Any]:
+) -> dict[ModelID, Any]:
     """Filters targeted charts/components on page by interaction with `Filter` control.
 
     Args:
@@ -28,11 +28,10 @@ def _filter(
 
     Returns:
         Dict mapping target component ids to modified charts/components e.g. {'my_scatter': Figure({})}
-
     """
     return _get_modified_page_figures(
-        targets=targets,
         ctds_filter=ctx.args_grouping["external"]["filters"],
         ctds_filter_interaction=ctx.args_grouping["external"]["filter_interaction"],
         ctds_parameters=ctx.args_grouping["external"]["parameters"],
+        targets=targets,
     )
@@ -10,7 +10,7 @@
 
 
 @capture("action")
-def _on_page_load(targets: list[ModelID], **inputs: dict[str, Any]) -> dict[str, Any]:
+def _on_page_load(targets: list[ModelID], **inputs: dict[str, Any]) -> dict[ModelID, Any]:
     """Applies controls to charts on page once the page is opened (or refreshed).
 
     Args:
@@ -23,8 +23,8 @@ def _on_page_load(targets: list[ModelID], **inputs: dict[str, Any]) -> dict[str,
 
     """
     return _get_modified_page_figures(
-        targets=targets,
         ctds_filter=ctx.args_grouping["external"]["filters"],
         ctds_filter_interaction=ctx.args_grouping["external"]["filter_interaction"],
         ctds_parameters=ctx.args_grouping["external"]["parameters"],
+        targets=targets,
     )
@@ -10,7 +10,7 @@
 
 
 @capture("action")
-def _parameter(targets: list[str], **inputs: dict[str, Any]) -> dict[str, Any]:
+def _parameter(targets: list[str], **inputs: dict[str, Any]) -> dict[ModelID, Any]:
     """Modifies parameters of targeted charts/components on page.
 
     Args:
@@ -25,8 +25,8 @@ def _parameter(targets: list[str], **inputs: dict[str, Any]) -> dict[str, Any]:
     target_ids: list[ModelID] = [target.split(".")[0] for target in targets]  # type: ignore[misc]
 
     return _get_modified_page_figures(
-        targets=target_ids,
         ctds_filter=ctx.args_grouping["external"]["filters"],
         ctds_filter_interaction=ctx.args_grouping["external"]["filter_interaction"],
         ctds_parameters=ctx.args_grouping["external"]["parameters"],
+        targets=target_ids,
     )