Skip to content

Commit

Permalink
confidence interval text + histogram table (#211)
Browse files Browse the repository at this point in the history
* Checkpoint: Plot not there on initial load, but after that it works

* reactive calc is the right tool for the job

* render dataframe

* one line

* unused imports

* df typing

* three columns

* wrap table in details

* a little testing

* better names
  • Loading branch information
mccalluc authored Jan 8, 2025
1 parent ec26413 commit bb0e6a2
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 37 deletions.
9 changes: 1 addition & 8 deletions dp_wizard/app/analysis_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from dp_wizard.app.components.inputs import log_slider
from dp_wizard.app.components.column_module import column_ui, column_server
from dp_wizard.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names
from dp_wizard.utils.dp_helper import confidence
from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
from dp_wizard.utils.code_generators import make_privacy_loss_block

Expand Down Expand Up @@ -44,18 +43,12 @@ def analysis_ui():
ui.card(
ui.card_header("Simulation"),
ui.markdown(
f"""
"""
This simulation will assume a normal distribution
between the specified lower and upper bounds.
Until you make a release, your CSV will not be
read except to determine the columns.
The actual value is within the error bar
with {int(confidence * 100)}% confidence.
"""
),
ui.markdown(
"""
What is the approximate number of rows in the dataset?
This number is only used for the simulation
and not the final calculation.
Expand Down
76 changes: 50 additions & 26 deletions dp_wizard/app/components/column_module.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from logging import info

from htmltools.tags import details, summary
from shiny import ui, render, module, reactive, Inputs, Outputs, Session
from shiny.types import SilentException

from dp_wizard.utils.dp_helper import make_accuracy_histogram
from dp_wizard.utils.shared import plot_histogram
from dp_wizard.utils.code_generators import make_column_config_block
from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip, hide_if
from dp_wizard.utils.dp_helper import confidence


default_weight = "2"
Expand Down Expand Up @@ -42,12 +45,7 @@ def column_ui(): # pragma: no cover
),
ui.output_ui("optional_weight_ui"),
],
[
ui.output_plot("column_plot", height="300px"),
# Make plot smaller than default:
# about the same size as the other column.
output_code_sample("Column Definition", "column_code"),
],
ui.output_ui("histogram_preview_ui"),
col_widths=col_widths, # type: ignore
),
)
Expand Down Expand Up @@ -97,6 +95,27 @@ def _set_bins():
def _set_weight():
weights.set({**weights(), name: input.weight()})

@reactive.calc()
def accuracy_histogram():
lower_x = float(input.lower())
upper_x = float(input.upper())
bin_count = int(input.bins())
weight = float(input.weight())
weights_sum = sum(float(weight) for weight in weights().values())
info(f"Weight ratio for {name}: {weight}/{weights_sum}")
if weights_sum == 0:
# This function is triggered when column is removed;
# Exit early to avoid divide-by-zero.
raise SilentException("weights_sum == 0")
return make_accuracy_histogram(
row_count=row_count,
lower=lower_x,
upper=upper_x,
bin_count=bin_count,
contributions=contributions,
weighted_epsilon=epsilon * weight / weights_sum,
)

@render.text
def card_header():
return name
Expand Down Expand Up @@ -165,26 +184,31 @@ def column_code():
bin_count=int(input.bins()),
)

@render.plot()
def column_plot():
lower_x = float(input.lower())
upper_x = float(input.upper())
bin_count = int(input.bins())
weight = float(input.weight())
weights_sum = sum(float(weight) for weight in weights().values())
info(f"Weight ratio for {name}: {weight}/{weights_sum}")
if weights_sum == 0:
# This function is triggered when column is removed;
# Exit early to avoid divide-by-zero.
return None
accuracy, histogram = make_accuracy_histogram(
row_count=row_count,
lower=lower_x,
upper=upper_x,
bin_count=bin_count,
contributions=contributions,
weighted_epsilon=epsilon * weight / weights_sum,
)
@render.ui
def histogram_preview_ui():
accuracy, histogram = accuracy_histogram()
return [
ui.output_plot("histogram_preview_plot", height="300px"),
ui.layout_columns(
ui.markdown(
f"The {confidence:.0%} confidence interval is ±{accuracy:.3g}."
),
details(
summary("Data Table"),
ui.output_data_frame("data_frame"),
),
output_code_sample("Column Definition", "column_code"),
),
]

@render.data_frame
def data_frame():
accuracy, histogram = accuracy_histogram()
return render.DataGrid(histogram)

@render.plot
def histogram_preview_plot():
accuracy, histogram = accuracy_histogram()
s = "s" if contributions > 1 else ""
title = (
f"Simulated {name}: normal distribution, "
Expand Down
4 changes: 1 addition & 3 deletions dp_wizard/utils/dp_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Any

import polars as pl
import opendp.prelude as dp

Expand All @@ -19,7 +17,7 @@ def make_accuracy_histogram(
bin_count: int,
contributions: int,
weighted_epsilon: float,
) -> tuple[float, Any]:
) -> tuple[float, pl.DataFrame]:
"""
Creates fake data between lower and upper, and then returns a DP histogram from it.
>>> accuracy, histogram = make_accuracy_histogram(
Expand Down
4 changes: 4 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def expect_no_error():
page.get_by_label("grade").check()
expect_visible(simulation)
assert page.get_by_label("Upper").input_value() == new_value
expect_visible("The 95% confidence interval is ±794")
page.get_by_text("Data Table").click()
expect_visible("(0, 2]")

# Add a second column:
# page.get_by_label("blank").check()
# TODO: Test is flaky?
Expand Down

0 comments on commit bb0e6a2

Please sign in to comment.