From bb0e6a22111579c2e9c3b3c8e67c95289b4a5275 Mon Sep 17 00:00:00 2001
From: Chuck McCallum <mccalluc@users.noreply.github.com>
Date: Wed, 8 Jan 2025 14:18:18 -0500
Subject: [PATCH] confidence interval text + histogram table (#211)

* Checkpoint: Plot not there on initial load, but after that it works

* reactive calc is the right tool for the job

* render dataframe

* one line

* unused imports

* df typing

* three columns

* wrap table in details

* a little testing

* better names
---
 dp_wizard/app/analysis_panel.py           |  9 +--
 dp_wizard/app/components/column_module.py | 76 +++++++++++++++--------
 dp_wizard/utils/dp_helper.py              |  4 +-
 tests/test_app.py                         |  4 ++
 4 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/dp_wizard/app/analysis_panel.py b/dp_wizard/app/analysis_panel.py
index 8f32705..3e7da3a 100644
--- a/dp_wizard/app/analysis_panel.py
+++ b/dp_wizard/app/analysis_panel.py
@@ -6,7 +6,6 @@
 from dp_wizard.app.components.inputs import log_slider
 from dp_wizard.app.components.column_module import column_ui, column_server
 from dp_wizard.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names
-from dp_wizard.utils.dp_helper import confidence
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
 from dp_wizard.utils.code_generators import make_privacy_loss_block
 
@@ -44,18 +43,12 @@ def analysis_ui():
             ui.card(
                 ui.card_header("Simulation"),
                 ui.markdown(
-                    f"""
+                    """
                     This simulation will assume a normal distribution
                     between the specified lower and upper bounds.
                     Until you make a release, your CSV will not be
                     read except to determine the columns.
 
-                    The actual value is within the error bar
-                    with {int(confidence * 100)}% confidence.
-                    """
-                ),
-                ui.markdown(
-                    """
                     What is the approximate number of rows in the dataset?
                     This number is only used for the simulation
                     and not the final calculation.
diff --git a/dp_wizard/app/components/column_module.py b/dp_wizard/app/components/column_module.py
index d46fe9c..938edc8 100644
--- a/dp_wizard/app/components/column_module.py
+++ b/dp_wizard/app/components/column_module.py
@@ -1,11 +1,14 @@
 from logging import info
 
+from htmltools.tags import details, summary
 from shiny import ui, render, module, reactive, Inputs, Outputs, Session
+from shiny.types import SilentException
 
 from dp_wizard.utils.dp_helper import make_accuracy_histogram
 from dp_wizard.utils.shared import plot_histogram
 from dp_wizard.utils.code_generators import make_column_config_block
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip, hide_if
+from dp_wizard.utils.dp_helper import confidence
 
 
 default_weight = "2"
@@ -42,12 +45,7 @@ def column_ui():  # pragma: no cover
                 ),
                 ui.output_ui("optional_weight_ui"),
             ],
-            [
-                ui.output_plot("column_plot", height="300px"),
-                # Make plot smaller than default:
-                # about the same size as the other column.
-                output_code_sample("Column Definition", "column_code"),
-            ],
+            ui.output_ui("histogram_preview_ui"),
             col_widths=col_widths,  # type: ignore
         ),
     )
@@ -97,6 +95,27 @@ def _set_bins():
     def _set_weight():
         weights.set({**weights(), name: input.weight()})
 
+    @reactive.calc()
+    def accuracy_histogram():
+        lower_x = float(input.lower())
+        upper_x = float(input.upper())
+        bin_count = int(input.bins())
+        weight = float(input.weight())
+        weights_sum = sum(float(weight) for weight in weights().values())
+        info(f"Weight ratio for {name}: {weight}/{weights_sum}")
+        if weights_sum == 0:
+            # This function is triggered when column is removed;
+            # Exit early to avoid divide-by-zero.
+            raise SilentException("weights_sum == 0")
+        return make_accuracy_histogram(
+            row_count=row_count,
+            lower=lower_x,
+            upper=upper_x,
+            bin_count=bin_count,
+            contributions=contributions,
+            weighted_epsilon=epsilon * weight / weights_sum,
+        )
+
     @render.text
     def card_header():
         return name
@@ -165,26 +184,31 @@ def column_code():
             bin_count=int(input.bins()),
         )
 
-    @render.plot()
-    def column_plot():
-        lower_x = float(input.lower())
-        upper_x = float(input.upper())
-        bin_count = int(input.bins())
-        weight = float(input.weight())
-        weights_sum = sum(float(weight) for weight in weights().values())
-        info(f"Weight ratio for {name}: {weight}/{weights_sum}")
-        if weights_sum == 0:
-            # This function is triggered when column is removed;
-            # Exit early to avoid divide-by-zero.
-            return None
-        accuracy, histogram = make_accuracy_histogram(
-            row_count=row_count,
-            lower=lower_x,
-            upper=upper_x,
-            bin_count=bin_count,
-            contributions=contributions,
-            weighted_epsilon=epsilon * weight / weights_sum,
-        )
+    @render.ui
+    def histogram_preview_ui():
+        accuracy, histogram = accuracy_histogram()
+        return [
+            ui.output_plot("histogram_preview_plot", height="300px"),
+            ui.layout_columns(
+                ui.markdown(
+                    f"The {confidence:.0%} confidence interval is ±{accuracy:.3g}."
+                ),
+                details(
+                    summary("Data Table"),
+                    ui.output_data_frame("data_frame"),
+                ),
+                output_code_sample("Column Definition", "column_code"),
+            ),
+        ]
+
+    @render.data_frame
+    def data_frame():
+        accuracy, histogram = accuracy_histogram()
+        return render.DataGrid(histogram)
+
+    @render.plot
+    def histogram_preview_plot():
+        accuracy, histogram = accuracy_histogram()
         s = "s" if contributions > 1 else ""
         title = (
             f"Simulated {name}: normal distribution, "
diff --git a/dp_wizard/utils/dp_helper.py b/dp_wizard/utils/dp_helper.py
index c85f2e6..90a3786 100644
--- a/dp_wizard/utils/dp_helper.py
+++ b/dp_wizard/utils/dp_helper.py
@@ -1,5 +1,3 @@
-from typing import Any
-
 import polars as pl
 import opendp.prelude as dp
 
@@ -19,7 +17,7 @@ def make_accuracy_histogram(
     bin_count: int,
     contributions: int,
     weighted_epsilon: float,
-) -> tuple[float, Any]:
+) -> tuple[float, pl.DataFrame]:
     """
     Creates fake data between lower and upper, and then returns a DP histogram from it.
     >>> accuracy, histogram = make_accuracy_histogram(
diff --git a/tests/test_app.py b/tests/test_app.py
index 7aa2209..283072c 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -114,6 +114,10 @@ def expect_no_error():
     page.get_by_label("grade").check()
     expect_visible(simulation)
     assert page.get_by_label("Upper").input_value() == new_value
+    expect_visible("The 95% confidence interval is ±794")
+    page.get_by_text("Data Table").click()
+    expect_visible("(0, 2]")
+
     # Add a second column:
     # page.get_by_label("blank").check()
     # TODO: Test is flaky?