Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read CSV columns from uploaded file #45

Merged
merged 12 commits into from
Oct 10, 2024
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# MacOS
.DS_Store

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
29 changes: 18 additions & 11 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
"""DP Creator II makes it easier to get started with Differential Privacy."""

import os
from pathlib import Path
from argparse import ArgumentParser
from argparse import ArgumentParser, ArgumentTypeError

import shiny


__version__ = "0.0.1"


def existing_csv(arg):
path = Path(arg)
if not path.exists():
raise ArgumentTypeError(f"No such file: {arg}")
if path.suffix != ".csv":
ekraffmiller marked this conversation as resolved.
Show resolved Hide resolved
raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
return path


def get_arg_parser():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--csv",
dest="csv_path",
type=Path,
type=existing_csv,
help="Path to CSV containing private data",
)
parser.add_argument(
Expand All @@ -29,13 +37,12 @@ def get_arg_parser():

def main(): # pragma: no cover
# We call parse_args() again inside the app.
# We only call it here so "--help" is handled.
# We only call it here so "--help" is handled,
# and to validate inputs.
get_arg_parser().parse_args()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also have a test that the required arguments were passed? When I run the app with no args, it shows a stack trace of an unhandled error. I read your comment about skipping this for now, but I'm not sure I understand how that helps with debugging?


# run_app() depends on the CWD.
os.chdir(Path(__file__).parent)

run_app_kwargs = {
"reload": True,
}
shiny.run_app(launch_browser=True, **run_app_kwargs)
shiny.run_app(
app="dp_creator_ii.app",
launch_browser=True,
reload=True,
)
32 changes: 28 additions & 4 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@
from shiny import ui, reactive, render

from dp_creator_ii import get_arg_parser
from dp_creator_ii.csv_helper import read_field_names


def dataset_ui():
return ui.nav_panel(
"Select Dataset",
"TODO: Pick dataset",
ui.output_text("csv_path_text"),
ui.input_file("csv_path_from_ui", "Choose CSV file", accept=[".csv"]),
"CSV path from either CLI or UI:",
ui.output_text("csv_path"),
"CSV fields:",
ui.output_text("csv_fields"),
"Unit of privacy:",
ui.output_text("unit_of_privacy_text"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
Expand All @@ -26,12 +32,30 @@ def dataset_server(input, output, session):
arg_csv_path = args.csv_path
arg_unit_of_privacy = args.unit_of_privacy

csv_path = reactive.value(arg_csv_path)
csv_path_from_cli_value = reactive.value(arg_csv_path)
unit_of_privacy = reactive.value(arg_unit_of_privacy)

@reactive.calc
def csv_path_calc():
csv_path_from_ui = input.csv_path_from_ui()
if csv_path_from_ui is not None:
return csv_path_from_ui[0]["datapath"]
return csv_path_from_cli_value.get()

@render.text
def csv_path():
return csv_path_calc()

@reactive.calc
def csv_fields_calc():
path = csv_path_calc()
if path is None:
return None
return read_field_names(path)

@render.text
def csv_path_text():
return str(csv_path.get())
def csv_fields():
return csv_fields_calc()

@render.text
def unit_of_privacy_text():
Expand Down
7 changes: 7 additions & 0 deletions dp_creator_ii/csv_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import csv


def read_field_names(csv_path):
with open(csv_path, newline="") as csv_handle:
reader = csv.DictReader(csv_handle)
return reader.fieldnames
8 changes: 7 additions & 1 deletion dp_creator_ii/tests/fixtures/fake.csv
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
fake-column
student_id,class_year,assignment_type,grade
1234,1,quiz,90
1234,1,quiz,95
1234,1,exam,85
6789,2,quiz,70
6789,2,quiz,100
6789,2,exam,90
15 changes: 15 additions & 0 deletions dp_creator_ii/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

from shiny.run import ShinyAppProc
from playwright.sync_api import Page, expect
from shiny.pytest import create_app_fixture
Expand All @@ -19,24 +21,37 @@ def expect_visible(text):
def expect_not_visible(text):
expect(page.get_by_text(text)).not_to_be_visible()

def expect_no_error():
expect(page.locator(".shiny-output-error")).not_to_be_attached()

page.goto(app.url)
expect(page).to_have_title("DP Creator II")
expect_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_not_visible(download_results_text)
expect_no_error()

csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
page.get_by_label("Choose CSV file").set_input_files(csv_path.resolve())
expect_visible("student_id")
expect_no_error()

page.get_by_role("button", name="Define analysis").click()
expect_not_visible(pick_dataset_text)
expect_visible(perform_analysis_text)
expect_not_visible(download_results_text)
expect_no_error()

page.get_by_role("button", name="Download results").click()
expect_not_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_visible(download_results_text)
expect_no_error()

with page.expect_download() as download_info:
page.get_by_text("Download script").click()
expect_no_error()

download = download_info.value
script = download.path().read_text()
assert "privacy_unit=dp.unit_of(contributions=1)" in script
38 changes: 38 additions & 0 deletions dp_creator_ii/tests/test_arg_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pathlib import Path
from argparse import ArgumentTypeError

import pytest

from dp_creator_ii import get_arg_parser, existing_csv


def test_help():
help = (
get_arg_parser()
.format_help()
# argparse doesn't actually know the name of the script
# and inserts the name of the running program instead.
.replace("__main__.py", "dp-creator-ii")
.replace("pytest", "dp-creator-ii")
# Text is different under Python 3.9:
.replace("optional arguments:", "options:")
)
print(help)

readme_md = (Path(__file__).parent.parent.parent / "README.md").read_text()
assert help in readme_md


def test_arg_validation_no_file():
with pytest.raises(ArgumentTypeError, match="No such file: no-such-file"):
existing_csv("no-such-file")


def test_arg_validation_not_csv():
with pytest.raises(ArgumentTypeError, match='Must have ".csv" extension:'):
existing_csv(Path(__file__).parent / "fixtures" / "fake.ipynb")


def test_arg_validation_works():
path = existing_csv(Path(__file__).parent / "fixtures" / "fake.csv")
assert path.name == "fake.csv"
9 changes: 9 additions & 0 deletions dp_creator_ii/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
import polars.testing
import tempfile
import pytest
from pathlib import Path

from dp_creator_ii.csv_helper import read_field_names


def test_read_field_names():
csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
field_names = read_field_names(csv_path)
assert field_names == ["student_id", "class_year", "assignment_type", "grade"]


@pytest.mark.parametrize("encoding", ["latin1", "utf8"])
Expand Down
20 changes: 0 additions & 20 deletions dp_creator_ii/tests/test_help.py

This file was deleted.