Skip to content

Commit

Permalink
Final preparation
Browse files Browse the repository at this point in the history
  • Loading branch information
petar-qb committed Nov 19, 2024
1 parent 46350cd commit 7d05570
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 122 deletions.
220 changes: 98 additions & 122 deletions vizro-core/examples/scratch_dev/app.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import dash
import datetime
import time
import yaml
import pandas as pd
import plotly.express as px

from dash import Dash, html, dcc, Output, callback, clientside_callback, Input, State, set_props
import dash_mantine_components as dmc

from time import sleep

# TODO-TEST: How to test this?
# =====================================================================
# *** ATTENTION: THIS IS THE ONLY VARIABLE YOU NEED TO SET ***
# Set CONTROL_SELECTOR to test different control types
# Choose between: dcc.Dropdown, dcc.Checklist, dcc.RadioItems, dcc.Slider, dcc.RangeSlider
# For example: CONTROL_SELECTOR = dcc.RadioItems
# Optionally, choose between IS_DROPDOWN_MULTI = True or False for dcc.Dropdown selector.
# =====================================================================
CONTROL_SELECTOR = dcc.Checklist

Expand All @@ -36,19 +39,54 @@
"dcc.Dropdown, dcc.Checklist, dcc.RadioItems, dcc.Slider, or dcc.RangeSlider."
)


# Hardcoded global variable.
SELECTOR_TYPE = {
"categorical": [dcc.Dropdown, dcc.Checklist, dcc.RadioItems],
"numerical": [dcc.Slider, dcc.RangeSlider],
}

# This is automatically calculated based on CONTROL_SELECTOR
LOADING_DATA_PREFILTER_COLUMN = None

if CONTROL_SELECTOR in SELECTOR_TYPE["categorical"]:
LOADING_DATA_PREFILTER_COLUMN = "species"
elif CONTROL_SELECTOR in SELECTOR_TYPE["numerical"]:
LOADING_DATA_PREFILTER_COLUMN = "sepal_length"


# like dynamic data
def slow_load():
print("running slow_load")
time.sleep(0.1)
return px.data.iris().sample(6)
sleep(0.1)

# Load the full iris dataset
df = px.data.iris()
df["date_column"] = pd.date_range(start=pd.to_datetime("2024-01-01"), periods=len(df), freq="D")

with open("data.yaml", "r") as file:
data = yaml.safe_load(file)
data = data or {}

filter_column = LOADING_DATA_PREFILTER_COLUMN
if filter_column == "species":
final_df = pd.concat(
objs=[
df[df[filter_column] == "setosa"].head(data.get("setosa", 0)),
df[df[filter_column] == "versicolor"].head(data.get("versicolor", 0)),
df[df[filter_column] == "virginica"].head(data.get("virginica", 0)),
],
ignore_index=True,
)
elif filter_column == "sepal_length":
final_df = df[df[filter_column].between(data.get("min"), data.get("max"), inclusive="both")]
elif filter_column == "date_column":
date_min = pd.to_datetime(data.get("date_min"))
date_max = pd.to_datetime(data.get("date_max"))
final_df = df[df[filter_column].between(date_min, date_max, inclusive="both")]
else:
raise ValueError("Invalid FILTER_COLUMN")

return final_df


# Like pre-build - doesn't get run again when reload page
Expand All @@ -71,30 +109,6 @@ def numerical_filter_pre_build():
pre_build_min, pre_build_max, pre_build_numerical_value = numerical_filter_pre_build()


# --- Pages ---
common = [
html.H1(id="dashboard_title", children="Dashboard"),
html.Div(dcc.Link("Homepage", href="/")),
html.Div(dcc.Link("Another page", href="/another-page")),
]


def make_page(content):
page_build_obj = html.Div(
[
*common,
html.P(datetime.datetime.now()),
*content,
]
)
return page_build_obj


# homepage build
def homepage(**kwargs):
return make_page([html.H2("Homepage")])


# Like filter build - gets run every time page is loaded
def categorical_filter_build(options=None):
kwargs = {}
Expand Down Expand Up @@ -123,6 +137,30 @@ def numerical_filter_build(min_value=None, max_value=None):
)


# --- Pages ---
common = [
html.H1(id="dashboard_title", children="Dashboard"),
html.Div(dcc.Link("Homepage", href="/")),
html.Div(dcc.Link("Another page", href="/another-page")),
]


def _page_build(content):
page_build_obj = html.Div(
[
*common,
html.P(datetime.datetime.now()),
*content,
]
)
return page_build_obj


# homepage build
def homepage(**kwargs):
return _page_build([html.H2("Homepage")])


# Like another-page build
def another_page(**kwargs):
def _get_initial_page_build_object():
Expand All @@ -141,29 +179,30 @@ def _get_initial_page_build_object():
else:
raise ValueError("Invalid CONTROL_SELECTOR.")

return make_page(
return _page_build(
[
dcc.Store(id="on_page_load_trigger_another_page"),
html.H2("Another page"),

# # This does NOT work because id="filter" doesn't exist but is used as OPL callback State.
# # This does not work because id="filter" doesn't exist but is used as OPL callback State.
# dcc.Loading(id="filter_container"),

# # Possible solution is to alter filter.options from on_page_load. This would work, but it's not optimal.
# # This does not work because OPL filter input ("value") is missing, but is used for filtering dfs.
# html.Div(
# html.Div(id="filter"),
# id="filter_container",
# ),

# # This does not work because setting the different "value" clears the session persistence storage
# dcc.Dropdown(id="filter", options=options, value=options, multi=True, persistence=True),

# # Outer container can be changed with dcc.Loading.
# # This works because filter "value" is always set to the same value in the page_build
# # and persistence is applied properly before the OPL.
html.Div(
_get_initial_page_build_object(),
id="filter_container",
),

# # Does not work because OPL filter input is missing, but it's used for filtering figures data_frame.
# html.Div(
# html.Div(id="filter"),
# id="filter_container",
# ),

html.Br(),
dcc.RadioItems(id="parameter", options=["sepal_width", "sepal_length"], value="sepal_width", persistence=True, persistence_type="session"),
dcc.Loading(dcc.Graph(id="graph1")),
Expand All @@ -172,22 +211,14 @@ def _get_initial_page_build_object():
)


def graph1_call(data_frame):
def graph1_build(data_frame):
return px.bar(data_frame, x="species", color="species")


def graph2_call(data_frame, x):
def graph2_build(data_frame, x):
return px.scatter(data_frame, x=x, y="petal_width", color="species")


# NOTE:
# You could do just update_filter to update options/value rather than replacing whole dcc.Dropdown object. Then would
# need to write it for rangeslider and dropdown etc. separately though. Probably easier to just replace whole object.
# This is consistent with how Graph, AgGrid etc. work.
# BUT controls are different from Graphs since you can set the pre-selected value that should be shown when
# user first visits page. Is this possible still with dynamic filter? -> YES


def get_data(species):
df = slow_load()
return df[df["species"].isin(species)]
Expand All @@ -207,9 +238,6 @@ def get_data(species):
)


# TODO: write something like get_modified_figures function to reduce repetition.


@callback(
output=[
Output("graph1", "figure"),
Expand All @@ -218,7 +246,6 @@ def get_data(species):
],
inputs=[
Input("global_on_page_load_another_page_action_trigger", "data"),
State("filter", "value"),
State("parameter", "value"),
],
Expand All @@ -228,17 +255,18 @@ def on_page_load(data, persisted_filter_value, x):
# Ideally, OPL flow should look like this:
# 1. Page.build() -> returns static layout (placeholder elements for dynamic components).
# 2. Persistence is applied. -> So, filter values are the same as the last time the page was visited.
# 3. OPL -> returns dynamic components based on the controls values (persisted)
# 3. OPL -> returns dynamic components based on the persisted controls values
# 3.1. Load DFs (include DFP values here)
# 3.2. Calculate new filter values:
# e.g. new_filter_values = [value for value in persisted_filter_value if value in new_filter_options]
# e.g. new_min = max(persisted_min, new_min); new_max = min(persisted_max, new_max)
# e.g. options = persisted_current_value + new_options
# e.g. new_min = min(persisted_min, new_min); new_max = max(persisted_max, new_max)
# 3.3. Apply filters on DFs
# 3.4. Apply parameters on config
# 3.5. Return dynamic components (Figures and dynamic controls)

# Why actions are better than dash.callback here?
# 1. They solve the circular dependency problem of the full graph.
# 1. They solve the circular dependency problem of the full graph (filter_1 -> graph -> filter_2 -> filter_1).
# graph -> filter_2 (filter_interaction over the controls); filter_2 -> filter_1 (cascading filters).
# 2. They are explicit which means they can be configured in any way users want. There's no undesired behavior.

# TODO: Last solution found -> hence put in highlighted TODO:
Expand All @@ -249,9 +277,8 @@ def on_page_load(data, persisted_filter_value, x):
# * It works! :D *
# 2. OPL -> Manipulations with filter and options:
# 2.1. Recalculate options.
# 2.2. Recalculated value. (persisted_filter_value that exists in recalculated options)
# 2.3. Filter figures data_frame with recalculated value.
# 2.4. Create a new filter object with recalculated options and original value.
# 2.2. Filter figures data_frame with recalculated value.
# 2.3. Create a new filter object with recalculated options and original value.
# Limitations:
# 1. do_filter is triggered automatically after OPL.
# This shouldn't be the issue since actions loop controls it.
Expand All @@ -262,12 +289,13 @@ def on_page_load(data, persisted_filter_value, x):
# This is probably dash bug because Dropdown is handled a lot with async which probably causes that returned
# Dropdown from the page_build or OPL triggers the "recordUiEdit" which should not trigger.
# ** Problem is solved by returning dmc.DateRangePicker instead of dcc.Dropdown from page.build. **
# --- (A.M.): How to achieve all of these: ---
# * get correct selected value passed into graph calls -> Works with this solution.
# * populate filter with right values for user on first page load -> Works with this solution.
# * update filter options on page load -> Works with this solution.
# * persist filter values on page change -> Works with this solution.

# 3. We should stick with new_options = current_value + new_options for all our selectors to keep in sync
# persistence stored values with the UI selected values.
# 4. set_props also could be used but we avoid this due to lack of documentation and potential bugs.
# # Usage:
# # set_props(component_id="filter_container", props={"children": new_filter_obj})
# # More about set_props:
# # -> https://dash.plotly.com/advanced-callbacks#setting-properties-directly
print("running on_page_load")
df = slow_load()

Expand All @@ -287,25 +315,6 @@ def on_page_load(data, persisted_filter_value, x):
# --- Filtering data: ---
df = df[df["species"].isin(persisted_filter_value)]

# --- set_props ---
# set_props(component_id="filter_container", props={"children": new_filter_obj})
# More about set_props:
# -> https://dash.plotly.com/advanced-callbacks#setting-properties-directly
# -> https://community.plotly.com/t/dash-2-17-0-released-callback-updates-with-set-props-no-output-callbacks-layout-as-list-dcc-loading-trace-zorder/84343
# Limitations:
# 1. Component properties updated using set_props won't appear in the callback graph for debugging.
# - This is not a problem because our graph debugging is already unreadable. :D
# 2. Component properties updated using set_props won't appear as loading when they are wrapped with a `dcc.Loading` component.
# - Potential solution. Set controls as dash.Output and then use set_props to update them + dash.no_update as a return value for them.
# 3. set_props doesn't validate the id or property names provided, so no error will be displayed if they contain typos. This can make apps that use set_props harder to debug.
# - That's okay since it's internal Vizro stuff and shouldn't affect user.
# 4. Using set_props with chained callbacks may lead to unexpected results.
# - It even behaves better because it doesn't trigger the "do_filter" callback.
# Open questions:
# 1. Is there any concern about different filter selectors? -> No. (I haven't tested the DatePicker it yet.)
# 2. Can we handle if filter selector changes dynamically? -> Potentially, (I haven't tested it yet.)
# 3. Is there a bug with set_props or with dash.Output?!

# --- Calculate numerical filter ---
if CONTROL_SELECTOR in SELECTOR_TYPE["numerical"]:
persisted_filter_value = persisted_filter_value if isinstance(persisted_filter_value, list) else [persisted_filter_value, persisted_filter_value]
Expand All @@ -317,15 +326,16 @@ def on_page_load(data, persisted_filter_value, x):
numerical_filter_max = max(numerical_filter_max, persisted_filter_value[1])

new_filter_obj = numerical_filter_build(min_value=numerical_filter_min, max_value=numerical_filter_max)
# set_props(component_id="numerical_filter_container", props={"children": new_filter_obj})

# --- Filtering data: ---
df = df[(df["sepal_length"] >= numerical_filter_value[0]) & (df["sepal_length"] <= numerical_filter_value[1])]
df = df[(df["sepal_length"] >= persisted_filter_value[0]) & (df["sepal_length"] <= persisted_filter_value[1])]

print("")
return graph1_call(df), graph2_call(df, x), new_filter_obj
return graph1_build(df), graph2_build(df, x), new_filter_obj


# TODO-DEV: You can enable filtering by uncommenting the code below, but do_filter could be trigger from OPL.
# There's no similar problems with when it's called in the Vizro app due to action loop breaking mechanism.
# @callback(
# Output("graph1", "figure", allow_duplicate=True),
# Output("graph2", "figure", allow_duplicate=True),
Expand All @@ -348,7 +358,7 @@ def on_page_load(data, persisted_filter_value, x):
# df1 = get_data(species)
# df2 = get_data(species)
# print("")
# return graph1_call(df1), graph2_call(df2, x)
# return graph1_build(df1), graph2_build(df2, x)
#
#
# @callback(
Expand All @@ -371,39 +381,5 @@ def on_page_load(data, persisted_filter_value, x):
app.layout = html.Div([dcc.Store("global_on_page_load_another_page_action_trigger"), dash.page_container])


##### NEXT STEPS FOR PETAR

# How to update dynamic filter?
# Options:
# 1. on_page_load_controls and then on_page_load_components sequentially. Need to figure out how to get components
# into loading state to begin with - set as loading build and then change back in OPL callback? Means two callbacks.
# 2. on_page_load_controls and then on_page_load_components in parallel. NO, bad when caching
# 3. on_page_load_everything. THIS IS THE ONE WE PREFER.
# Can't have on_page_load_controls trigger regular "apply filter" etc. callbacks as could lead to many of them in
# parallel.

# So need to make sure that either method 1 or 3 doesn't trigger regular callbacks. Not sure
# how to achieve this...
# Could put manual no_update in those regular callbacks but is not nice.
# Could actually just do on_page_load_controls and then use all regular callbacks in parallel - so long as caching
# turned on then on_page_load_controls will have warmed it up so then no problem with regular callbacks.
# But still not good because regular callbacks will override same output graph multiple times.

# Maybe actually need on_page_load_controls to trigger regular filters in general? And just not have too many of them.

# persistence still works
# changing page now does on_page_load which then triggers do_filter
# so effectively running do_filter twice
# How can we avoid this?

# Consider actions loop and when one callback should trigger another etc.

# How does persistence work?
# How does triggering callbacks work in vizro?
# How *should* triggering callbacks work in vizro? Can we align it more with Dash?
# How to handle filter options persistence and updating etc.?
# How to avoid the regular filters being triggered after on_page_load runs?
# IMPORTANT: also consider parametrised data case.

if __name__ == "__main__":
app.run(debug=True, dev_tools_hot_reload=False)
13 changes: 13 additions & 0 deletions vizro-core/examples/scratch_dev/data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Choose from 0-50
setosa: 5
versicolor: 10
virginica: 15

# Choose from: 4.8 to 7.4
min: 5
max: 7

# Choose from:
# 2020-01-01 to 2020-05-29
date_min: 2024-01-01
date_max: 2024-05-29

0 comments on commit 7d05570

Please sign in to comment.