From bb0e6a22111579c2e9c3b3c8e67c95289b4a5275 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 8 Jan 2025 14:18:18 -0500 Subject: [PATCH] confidence interval text + histogram table (#211) * Checkpoint: Plot not there on initial load, but after that it works * reactive calc is the right tool for the job * render dataframe * one line * unused imports * df typing * three columns * wrap table in details * a little testing * better names --- dp_wizard/app/analysis_panel.py | 9 +-- dp_wizard/app/components/column_module.py | 76 +++++++++++++++-------- dp_wizard/utils/dp_helper.py | 4 +- tests/test_app.py | 4 ++ 4 files changed, 56 insertions(+), 37 deletions(-) diff --git a/dp_wizard/app/analysis_panel.py b/dp_wizard/app/analysis_panel.py index 8f32705..3e7da3a 100644 --- a/dp_wizard/app/analysis_panel.py +++ b/dp_wizard/app/analysis_panel.py @@ -6,7 +6,6 @@ from dp_wizard.app.components.inputs import log_slider from dp_wizard.app.components.column_module import column_ui, column_server from dp_wizard.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names -from dp_wizard.utils.dp_helper import confidence from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip from dp_wizard.utils.code_generators import make_privacy_loss_block @@ -44,18 +43,12 @@ def analysis_ui(): ui.card( ui.card_header("Simulation"), ui.markdown( - f""" + """ This simulation will assume a normal distribution between the specified lower and upper bounds. Until you make a release, your CSV will not be read except to determine the columns. - The actual value is within the error bar - with {int(confidence * 100)}% confidence. - """ - ), - ui.markdown( - """ What is the approximate number of rows in the dataset? This number is only used for the simulation and not the final calculation. diff --git a/dp_wizard/app/components/column_module.py b/dp_wizard/app/components/column_module.py index d46fe9c..938edc8 100644 --- a/dp_wizard/app/components/column_module.py +++ b/dp_wizard/app/components/column_module.py @@ -1,11 +1,14 @@ from logging import info +from htmltools.tags import details, summary from shiny import ui, render, module, reactive, Inputs, Outputs, Session +from shiny.types import SilentException from dp_wizard.utils.dp_helper import make_accuracy_histogram from dp_wizard.utils.shared import plot_histogram from dp_wizard.utils.code_generators import make_column_config_block from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip, hide_if +from dp_wizard.utils.dp_helper import confidence default_weight = "2" @@ -42,12 +45,7 @@ def column_ui(): # pragma: no cover ), ui.output_ui("optional_weight_ui"), ], - [ - ui.output_plot("column_plot", height="300px"), - # Make plot smaller than default: - # about the same size as the other column. - output_code_sample("Column Definition", "column_code"), - ], + ui.output_ui("histogram_preview_ui"), col_widths=col_widths, # type: ignore ), ) @@ -97,6 +95,27 @@ def _set_bins(): def _set_weight(): weights.set({**weights(), name: input.weight()}) + @reactive.calc() + def accuracy_histogram(): + lower_x = float(input.lower()) + upper_x = float(input.upper()) + bin_count = int(input.bins()) + weight = float(input.weight()) + weights_sum = sum(float(weight) for weight in weights().values()) + info(f"Weight ratio for {name}: {weight}/{weights_sum}") + if weights_sum == 0: + # This function is triggered when column is removed; + # Exit early to avoid divide-by-zero. + raise SilentException("weights_sum == 0") + return make_accuracy_histogram( + row_count=row_count, + lower=lower_x, + upper=upper_x, + bin_count=bin_count, + contributions=contributions, + weighted_epsilon=epsilon * weight / weights_sum, + ) + @render.text def card_header(): return name @@ -165,26 +184,31 @@ def column_code(): bin_count=int(input.bins()), ) - @render.plot() - def column_plot(): - lower_x = float(input.lower()) - upper_x = float(input.upper()) - bin_count = int(input.bins()) - weight = float(input.weight()) - weights_sum = sum(float(weight) for weight in weights().values()) - info(f"Weight ratio for {name}: {weight}/{weights_sum}") - if weights_sum == 0: - # This function is triggered when column is removed; - # Exit early to avoid divide-by-zero. - return None - accuracy, histogram = make_accuracy_histogram( - row_count=row_count, - lower=lower_x, - upper=upper_x, - bin_count=bin_count, - contributions=contributions, - weighted_epsilon=epsilon * weight / weights_sum, - ) + @render.ui + def histogram_preview_ui(): + accuracy, histogram = accuracy_histogram() + return [ + ui.output_plot("histogram_preview_plot", height="300px"), + ui.layout_columns( + ui.markdown( + f"The {confidence:.0%} confidence interval is ±{accuracy:.3g}." + ), + details( + summary("Data Table"), + ui.output_data_frame("data_frame"), + ), + output_code_sample("Column Definition", "column_code"), + ), + ] + + @render.data_frame + def data_frame(): + accuracy, histogram = accuracy_histogram() + return render.DataGrid(histogram) + + @render.plot + def histogram_preview_plot(): + accuracy, histogram = accuracy_histogram() s = "s" if contributions > 1 else "" title = ( f"Simulated {name}: normal distribution, " diff --git a/dp_wizard/utils/dp_helper.py b/dp_wizard/utils/dp_helper.py index c85f2e6..90a3786 100644 --- a/dp_wizard/utils/dp_helper.py +++ b/dp_wizard/utils/dp_helper.py @@ -1,5 +1,3 @@ -from typing import Any - import polars as pl import opendp.prelude as dp @@ -19,7 +17,7 @@ def make_accuracy_histogram( bin_count: int, contributions: int, weighted_epsilon: float, -) -> tuple[float, Any]: +) -> tuple[float, pl.DataFrame]: """ Creates fake data between lower and upper, and then returns a DP histogram from it. >>> accuracy, histogram = make_accuracy_histogram( diff --git a/tests/test_app.py b/tests/test_app.py index 7aa2209..283072c 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -114,6 +114,10 @@ def expect_no_error(): page.get_by_label("grade").check() expect_visible(simulation) assert page.get_by_label("Upper").input_value() == new_value + expect_visible("The 95% confidence interval is ±794") + page.get_by_text("Data Table").click() + expect_visible("(0, 2]") + # Add a second column: # page.get_by_label("blank").check() # TODO: Test is flaky?