From 6efa8e5ad539d07bdfc17392b09193f82d15e6bf Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 22 Nov 2024 10:03:09 -0500 Subject: [PATCH 1/8] use original column names in our report --- dp_wizard/utils/code_generators/__init__.py | 9 ++++----- dp_wizard/utils/code_generators/no-tests/_report_kv.py | 2 +- tests/test_app.py | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py index 92d62f1..8b0e23c 100644 --- a/dp_wizard/utils/code_generators/__init__.py +++ b/dp_wizard/utils/code_generators/__init__.py @@ -120,22 +120,21 @@ def _make_context(self): return str(self._make_partial_context().fill_values(CSV_PATH=self.csv_path)) def _make_extra_blocks(self): - identifiers = [name_to_identifier(name) for name in self.columns.keys()] outputs_expression = ( "{" + ",".join( str( Template("report_kv") .fill_values( - IDENTIFIER=id, + NAME=name, CONFIDENCE=confidence, ) .fill_expressions( - IDENTIFIER_HISTOGRAM=f"{id}_histogram", - IDENTIFIER_ACCURACY=f"{id}_accuracy", + IDENTIFIER_HISTOGRAM=f"{name_to_identifier(name)}_histogram", + IDENTIFIER_ACCURACY=f"{name_to_identifier(name)}_accuracy", ) ) - for id in identifiers + for name in self.columns.keys() ) + "}" ) diff --git a/dp_wizard/utils/code_generators/no-tests/_report_kv.py b/dp_wizard/utils/code_generators/no-tests/_report_kv.py index 544c85a..b27bf5f 100644 --- a/dp_wizard/utils/code_generators/no-tests/_report_kv.py +++ b/dp_wizard/utils/code_generators/no-tests/_report_kv.py @@ -1,4 +1,4 @@ -IDENTIFIER: { +NAME: { "histogram": dict(zip(*df_to_columns(IDENTIFIER_HISTOGRAM))), "accuracy": IDENTIFIER_ACCURACY, "confidence": CONFIDENCE, diff --git a/tests/test_app.py b/tests/test_app.py index 93c0ed6..80d9490 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -117,7 +117,7 @@ def expect_no_error(): expect_visible(download_results_text) expect_no_error() - # Notebook: + # Report: with page.expect_download() as report_download_info: page.get_by_text("Download report").click() expect_no_error() From 5412ab21549afc7b59ddab08341a3167bda5caaa Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 22 Nov 2024 14:22:20 -0500 Subject: [PATCH 2/8] flatten util --- dp_wizard/utils/code_generators/__init__.py | 33 ++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py index 8b0e23c..1fe3242 100644 --- a/dp_wizard/utils/code_generators/__init__.py +++ b/dp_wizard/utils/code_generators/__init__.py @@ -1,4 +1,4 @@ -from typing import NamedTuple, Optional, Iterable +from typing import NamedTuple, Optional, Iterable, MutableMapping from abc import ABC, abstractmethod from pathlib import Path import re @@ -220,6 +220,37 @@ def make_column_config_block( # so it's better to keep them out of the class. +# https://stackoverflow.com/a/6027615/10727889 +def _flatten_dict(dictionary, parent_key=""): + """ + Walk tree to return flat dictionary. + >>> from pprint import pp + >>> pp(_flatten_dict({ + ... "inputs": { + ... "data": "fake.csv" + ... }, + ... "outputs": { + ... "a column": { + ... "(0, 1]": 24, + ... "(1, 2]": 42, + ... } + ... } + ... })) + {'inputs: data': 'fake.csv', + 'outputs: a column: (0, 1]': 24, + 'outputs: a column: (1, 2]': 42} + """ + separator = ": " + items = [] + for key, value in dictionary.items(): + new_key = parent_key + separator + key if parent_key else key + if isinstance(value, MutableMapping): + items.extend(_flatten_dict(value, new_key).items()) + else: + items.append((new_key, value)) + return dict(items) + + def _make_query(column_name): indentifier = name_to_identifier(column_name) return str( From 56feb2109eb8d88aa5f2560cbea3288a80dfb98b Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 22 Nov 2024 15:02:32 -0500 Subject: [PATCH 3/8] checkpoint on CSV report; tests not passing --- dp_wizard/tmp/.gitignore | 1 + dp_wizard/utils/code_generators/__init__.py | 39 ++---------------- .../code_generators/no-tests/_reports.py | 41 ++++++++++++++++++- 3 files changed, 45 insertions(+), 36 deletions(-) diff --git a/dp_wizard/tmp/.gitignore b/dp_wizard/tmp/.gitignore index 1e64c47..8f15d7b 100644 --- a/dp_wizard/tmp/.gitignore +++ b/dp_wizard/tmp/.gitignore @@ -1,2 +1,3 @@ demo.csv report.txt +report.csv diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py index 1fe3242..d1e02ca 100644 --- a/dp_wizard/utils/code_generators/__init__.py +++ b/dp_wizard/utils/code_generators/__init__.py @@ -1,4 +1,4 @@ -from typing import NamedTuple, Optional, Iterable, MutableMapping +from typing import NamedTuple, Optional, Iterable from abc import ABC, abstractmethod from pathlib import Path import re @@ -138,6 +138,7 @@ def _make_extra_blocks(self): ) + "}" ) + tmp_path = Path(__file__).parent.parent.parent / "tmp" reports_block = str( Template("reports") .fill_expressions( @@ -145,9 +146,8 @@ def _make_extra_blocks(self): ) .fill_values( CSV_PATH=self.csv_path, - REPORT_PATH=str( - Path(__file__).parent.parent.parent / "tmp" / "report.txt" - ), + TXT_REPORT_PATH=str(tmp_path / "report.txt"), + CSV_REPORT_PATH=str(tmp_path / "report.csv"), ) ) return {"REPORTS_BLOCK": reports_block} @@ -220,37 +220,6 @@ def make_column_config_block( # so it's better to keep them out of the class. -# https://stackoverflow.com/a/6027615/10727889 -def _flatten_dict(dictionary, parent_key=""): - """ - Walk tree to return flat dictionary. - >>> from pprint import pp - >>> pp(_flatten_dict({ - ... "inputs": { - ... "data": "fake.csv" - ... }, - ... "outputs": { - ... "a column": { - ... "(0, 1]": 24, - ... "(1, 2]": 42, - ... } - ... } - ... })) - {'inputs: data': 'fake.csv', - 'outputs: a column: (0, 1]': 24, - 'outputs: a column: (1, 2]': 42} - """ - separator = ": " - items = [] - for key, value in dictionary.items(): - new_key = parent_key + separator + key if parent_key else key - if isinstance(value, MutableMapping): - items.extend(_flatten_dict(value, new_key).items()) - else: - items.append((new_key, value)) - return dict(items) - - def _make_query(column_name): indentifier = name_to_identifier(column_name) return str( diff --git a/dp_wizard/utils/code_generators/no-tests/_reports.py b/dp_wizard/utils/code_generators/no-tests/_reports.py index 3b4c84b..2076151 100644 --- a/dp_wizard/utils/code_generators/no-tests/_reports.py +++ b/dp_wizard/utils/code_generators/no-tests/_reports.py @@ -1,5 +1,38 @@ from yaml import dump from pathlib import Path +import csv + + +# https://stackoverflow.com/a/6027615/10727889 +def flatten_dict(dictionary, parent_key=""): + """ + Walk tree to return flat dictionary. + >>> from pprint import pp + >>> pp(flatten_dict({ + ... "inputs": { + ... "data": "fake.csv" + ... }, + ... "outputs": { + ... "a column": { + ... "(0, 1]": 24, + ... "(1, 2]": 42, + ... } + ... } + ... })) + {'inputs: data': 'fake.csv', + 'outputs: a column: (0, 1]': 24, + 'outputs: a column: (1, 2]': 42} + """ + separator = ": " + items = [] + for key, value in dictionary.items(): + new_key = parent_key + separator + key if parent_key else key + if isinstance(value, dict): + items.extend(flatten_dict(value, new_key).items()) + else: + items.append((new_key, value)) + return dict(items) + report = { "inputs": { @@ -9,4 +42,10 @@ } print(dump(report)) -Path(REPORT_PATH).write_text(dump(report)) +Path(TXT_REPORT_PATH).write_text(dump(report)) + +flat_report = flatten_dict(report) +with Path(CSV_REPORT_PATH).open(mode="w", newline="") as handle: + writer = csv.writer(handle) + for k, v in flat_report.items(): + writer.write_row(k, v) From 8eefe6c1e6ca2105172ea49af7d45a26f92360bc Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Sun, 24 Nov 2024 16:13:26 -0500 Subject: [PATCH 4/8] csv report works --- dp_wizard/app/results_panel.py | 21 +++++++++++++++++-- .../code_generators/no-tests/_reports.py | 4 ++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py index 5d5ce02..e3e55f0 100644 --- a/dp_wizard/app/results_panel.py +++ b/dp_wizard/app/results_panel.py @@ -19,9 +19,13 @@ def results_ui(): "Download results", ui.markdown("You can now make a differentially private release of your data."), ui.download_button( - "download_report", + "download_txt_report", "Download Report (.txt)", ), + ui.download_button( + "download_csv_report", + "Download Report (.csv)", + ), ui.download_button( "download_script", "Download Script (.py)", @@ -98,7 +102,7 @@ async def download_notebook(): filename="dp-wizard-report.txt", media_type="text/plain", ) - async def download_report(): + async def download_txt_report(): with ui.Progress() as progress: progress.set(message=wait_message) notebook_nb() # Evaluate just for the side effect of creating report. @@ -106,3 +110,16 @@ async def download_report(): Path(__file__).parent.parent / "tmp" / "report.txt" ).read_text() yield report_txt + + @render.download( + filename="dp-wizard-report.csv", + media_type="text/plain", + ) + async def download_csv_report(): + with ui.Progress() as progress: + progress.set(message=wait_message) + notebook_nb() # Evaluate just for the side effect of creating report. + report_csv = ( + Path(__file__).parent.parent / "tmp" / "report.csv" + ).read_text() + yield report_csv diff --git a/dp_wizard/utils/code_generators/no-tests/_reports.py b/dp_wizard/utils/code_generators/no-tests/_reports.py index 2076151..bfbbcb1 100644 --- a/dp_wizard/utils/code_generators/no-tests/_reports.py +++ b/dp_wizard/utils/code_generators/no-tests/_reports.py @@ -47,5 +47,5 @@ def flatten_dict(dictionary, parent_key=""): flat_report = flatten_dict(report) with Path(CSV_REPORT_PATH).open(mode="w", newline="") as handle: writer = csv.writer(handle) - for k, v in flat_report.items(): - writer.write_row(k, v) + for kv_pair in flat_report.items(): + writer.writerow(kv_pair) From 3dd1559f3c4f384fb5d7e09ee6472afa79735894 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Sun, 24 Nov 2024 16:21:06 -0500 Subject: [PATCH 5/8] add a test --- tests/test_app.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 80d9490..0c9c06e 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -117,14 +117,23 @@ def expect_no_error(): expect_visible(download_results_text) expect_no_error() - # Report: - with page.expect_download() as report_download_info: - page.get_by_text("Download report").click() + # Text Report: + with page.expect_download() as text_report_download_info: + page.get_by_text("Download report (.txt)").click() expect_no_error() - report_download = report_download_info.value + report_download = text_report_download_info.value report = report_download.path().read_text() - assert "inputs:" in report + assert "confidence: 0.95" in report + + # CSV Report: + with page.expect_download() as csv_report_download_info: + page.get_by_text("Download report (.csv)").click() + expect_no_error() + + report_download = csv_report_download_info.value + report = report_download.path().read_text() + assert "outputs: grade: confidence,0.95" in report # Script: with page.expect_download() as script_download_info: From 0f63b68a0ca66d7276d9ac46032f3945cd3be07f Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Sun, 24 Nov 2024 16:45:43 -0500 Subject: [PATCH 6/8] button grid --- dp_wizard/app/results_panel.py | 62 ++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py index e3e55f0..e970ecc 100644 --- a/dp_wizard/app/results_panel.py +++ b/dp_wizard/app/results_panel.py @@ -1,6 +1,8 @@ from pathlib import Path from shiny import ui, render, reactive, Inputs, Outputs, Session +from faicons import icon_svg +from htmltools.tags import table, tr, td from dp_wizard.utils.code_generators import ( NotebookGenerator, @@ -18,21 +20,51 @@ def results_ui(): return ui.nav_panel( "Download results", ui.markdown("You can now make a differentially private release of your data."), - ui.download_button( - "download_txt_report", - "Download Report (.txt)", - ), - ui.download_button( - "download_csv_report", - "Download Report (.csv)", - ), - ui.download_button( - "download_script", - "Download Script (.py)", - ), - ui.download_button( - "download_notebook", - "Download Notebook (.ipynb)", + table( + tr( + td( + ui.download_button( + "download_notebook", + [ + icon_svg("book", margin_right="0.5em"), + "Download Notebook (.ipynb)", + ], + width="20em", + ) + ), + td( + ui.download_button( + "download_script", + [ + icon_svg("python", margin_right="0.5em"), + "Download Script (.py)", + ], + width="20em", + ) + ), + ), + tr( + td( + ui.download_button( + "download_txt_report", + [ + icon_svg("file-lines", margin_right="0.5em"), + "Download Report (.txt)", + ], + width="20em", + ) + ), + td( + ui.download_button( + "download_csv_report", + [ + icon_svg("file-csv", margin_right="0.5em"), + "Download Report (.csv)", + ], + width="20em", + ) + ), + ), ), value="results_panel", ) From aa53243fe307ee1f50d869fa232e057f4fc55ba3 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Sun, 24 Nov 2024 16:58:41 -0500 Subject: [PATCH 7/8] factor out button function --- dp_wizard/app/results_panel.py | 64 ++++++++++++---------------------- 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py index e970ecc..fa02296 100644 --- a/dp_wizard/app/results_panel.py +++ b/dp_wizard/app/results_panel.py @@ -16,54 +16,34 @@ wait_message = "Please wait." +def td_button(name: str, ext: str, icon: str): + function_name = f'download_{name.lower().replace(" ", "_")}' + return ( + td( + ui.download_button( + function_name, + [ + icon_svg(icon, margin_right="0.5em"), + f"Download {name} ({ext})", + ], + width="20em", + ) + ), + ) + + def results_ui(): return ui.nav_panel( "Download results", ui.markdown("You can now make a differentially private release of your data."), table( tr( - td( - ui.download_button( - "download_notebook", - [ - icon_svg("book", margin_right="0.5em"), - "Download Notebook (.ipynb)", - ], - width="20em", - ) - ), - td( - ui.download_button( - "download_script", - [ - icon_svg("python", margin_right="0.5em"), - "Download Script (.py)", - ], - width="20em", - ) - ), + td_button("Notebook", ".ipynb", "book"), + td_button("Script", ".py", "python"), ), tr( - td( - ui.download_button( - "download_txt_report", - [ - icon_svg("file-lines", margin_right="0.5em"), - "Download Report (.txt)", - ], - width="20em", - ) - ), - td( - ui.download_button( - "download_csv_report", - [ - icon_svg("file-csv", margin_right="0.5em"), - "Download Report (.csv)", - ], - width="20em", - ) - ), + td_button("Report", ".txt", "file-lines"), + td_button("Table", ".csv", "file-csv"), ), ), value="results_panel", @@ -134,7 +114,7 @@ async def download_notebook(): filename="dp-wizard-report.txt", media_type="text/plain", ) - async def download_txt_report(): + async def download_report(): with ui.Progress() as progress: progress.set(message=wait_message) notebook_nb() # Evaluate just for the side effect of creating report. @@ -147,7 +127,7 @@ async def download_txt_report(): filename="dp-wizard-report.csv", media_type="text/plain", ) - async def download_csv_report(): + async def download_table(): with ui.Progress() as progress: progress.set(message=wait_message) notebook_nb() # Evaluate just for the side effect of creating report. From e5b53c7c3aaa37a8a9f819cf7bbfa2f1c9636e92 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Sun, 24 Nov 2024 17:09:13 -0500 Subject: [PATCH 8/8] update test to match --- tests/test_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_app.py b/tests/test_app.py index 0c9c06e..64c46cc 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -128,7 +128,7 @@ def expect_no_error(): # CSV Report: with page.expect_download() as csv_report_download_info: - page.get_by_text("Download report (.csv)").click() + page.get_by_text("Download table (.csv)").click() expect_no_error() report_download = csv_report_download_info.value