diff --git a/README-PYPI.md b/README-PYPI.md
index 975e12d..1c16f1f 100644
--- a/README-PYPI.md
+++ b/README-PYPI.md
@@ -10,11 +10,27 @@ Output options include:
 ## Usage
-usage: dp-wizard [-h] [--csv CSV_PATH] [--contrib CONTRIB] [--demo]
+usage: dp-wizard [-h] [--public_csv CSV] [--private_csv CSV] [--contrib CONTRIB] [--demo]
+DP Wizard makes it easier to get started with Differential Privacy.
   -h, --help         show this help message and exit
-  --csv CSV_PATH     Path to CSV containing private data
+  --public_csv CSV   Path to public CSV
+  --private_csv CSV  Path to private CSV
   --contrib CONTRIB  How many rows can an individual contribute?
   --demo             Use generated fake CSV for a quick demo
+Use "--public_csv" if you have a public data set, and are curious how
+DP can be applied: The preview visualizations will use your public data.
+Use "--private_csv" if you only have a private data set, and want to
+make a release from it: The preview visualizations will only use
+simulated data, and apart from the headers, the private CSV is not
+read until the release.
+Use "--public_csv" and "--private_csv" together if you have two CSVs
+with the same structure. Perhaps the public CSV is older and no longer
+sensitive. Preview visualizations will be made with the public data,
+but the release will be made with private data.
diff --git a/README.md b/README.md
index 5006e4c..dcb750f 100644
--- a/README.md
+++ b/README.md
@@ -13,13 +13,29 @@ Building on what we've learned from [DP Creator](https://github.com/opendp/dpcre
 ## Usage
-usage: dp-wizard [-h] [--csv CSV_PATH] [--contrib CONTRIB] [--demo]
+usage: dp-wizard [-h] [--public_csv CSV] [--private_csv CSV] [--contrib CONTRIB] [--demo]
+DP Wizard makes it easier to get started with Differential Privacy.
   -h, --help         show this help message and exit
-  --csv CSV_PATH     Path to CSV containing private data
+  --public_csv CSV   Path to public CSV
+  --private_csv CSV  Path to private CSV
   --contrib CONTRIB  How many rows can an individual contribute?
   --demo             Use generated fake CSV for a quick demo
+Use "--public_csv" if you have a public data set, and are curious how
+DP can be applied: The preview visualizations will use your public data.
+Use "--private_csv" if you only have a private data set, and want to
+make a release from it: The preview visualizations will only use
+simulated data, and apart from the headers, the private CSV is not
+read until the release.
+Use "--public_csv" and "--private_csv" together if you have two CSVs
+with the same structure. Perhaps the public CSV is older and no longer
+sensitive. Preview visualizations will be made with the public data,
+but the release will be made with private data.
diff --git a/dp_wizard/app/__init__.py b/dp_wizard/app/__init__.py
index 43e78ff..5310475 100644
--- a/dp_wizard/app/__init__.py
+++ b/dp_wizard/app/__init__.py
@@ -28,8 +28,11 @@ def ctrl_c_reminder():  # pragma: no cover
 def make_server_from_cli_info(cli_info: CLIInfo):
     def server(input: Inputs, output: Outputs, session: Session):  # pragma: no cover
-        cli_csv_path = cli_info.csv_path
-        csv_path = reactive.value("" if cli_csv_path is None else cli_csv_path)
+        public_csv_path = reactive.value(  # noqa: F841 # TODO
+            cli_info.public_csv_path or ""
+        )
+        private_csv_path = reactive.value(cli_info.private_csv_path or "")
         contributions = reactive.value(cli_info.contributions)
         lower_bounds = reactive.value({})
@@ -43,7 +46,8 @@ def server(input: Inputs, output: Outputs, session: Session):  # pragma: no cove
-            csv_path=csv_path,
+            public_csv_path=public_csv_path,
+            private_csv_path=private_csv_path,
@@ -51,7 +55,8 @@ def server(input: Inputs, output: Outputs, session: Session):  # pragma: no cove
-            csv_path=csv_path,
+            public_csv_path=public_csv_path,
+            private_csv_path=private_csv_path,
@@ -63,7 +68,8 @@ def server(input: Inputs, output: Outputs, session: Session):  # pragma: no cove
-            csv_path=csv_path,
+            public_csv_path=public_csv_path,
+            private_csv_path=private_csv_path,
diff --git a/dp_wizard/app/analysis_panel.py b/dp_wizard/app/analysis_panel.py
index 3e7da3a..85aff06 100644
--- a/dp_wizard/app/analysis_panel.py
+++ b/dp_wizard/app/analysis_panel.py
@@ -1,11 +1,16 @@
 from math import pow
 from typing import Iterable, Any
+from pathlib import Path
 from shiny import ui, reactive, render, req, Inputs, Outputs, Session
 from dp_wizard.app.components.inputs import log_slider
 from dp_wizard.app.components.column_module import column_ui, column_server
-from dp_wizard.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names
+from dp_wizard.utils.csv_helper import (
+    read_csv_ids_labels,
+    read_csv_ids_names,
+    get_csv_row_count,
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
 from dp_wizard.utils.code_generators import make_privacy_loss_block
@@ -42,24 +47,7 @@ def analysis_ui():
-                ui.markdown(
-                    """
-                    This simulation will assume a normal distribution
-                    between the specified lower and upper bounds.
-                    Until you make a release, your CSV will not be
-                    read except to determine the columns.
-                    What is the approximate number of rows in the dataset?
-                    This number is only used for the simulation
-                    and not the final calculation.
-                    """
-                ),
-                ui.input_select(
-                    "row_count",
-                    "Estimated Rows",
-                    choices=["100", "1000", "10000"],
-                    selected="100",
-                ),
+                ui.output_ui("simulation_card_ui"),
@@ -82,7 +70,8 @@ def analysis_server(
     input: Inputs,
     output: Outputs,
     session: Session,
-    csv_path: reactive.Value[str],
+    public_csv_path: reactive.Value[str],
+    private_csv_path: reactive.Value[str],
     contributions: reactive.Value[int],
     is_demo: bool,
     lower_bounds: reactive.Value[dict[str, float]],
@@ -124,6 +113,51 @@ def columns_checkbox_group_tooltip_ui():
+    @render.ui
+    def simulation_card_ui():
+        if public_csv_path():
+            row_count = get_csv_row_count(Path(public_csv_path()))
+            return [
+                ui.markdown(
+                    f"""
+                    Because you've provided a public CSV,
+                    it *will be read* to generate previews.
+                    The confidence interval depends on the number of rows.
+                    Your public CSV has {row_count} rows,
+                    but if you believe the private CSV will be
+                    much larger or smaller, please update.
+                    """
+                ),
+                ui.input_select(
+                    "row_count",
+                    "Estimated Rows",
+                    choices=[row_count, "100", "1000", "10000"],
+                    selected=row_count,
+                ),
+            ]
+        else:
+            return [
+                ui.markdown(
+                    """
+                    This simulation will assume a normal distribution
+                    between the specified lower and upper bounds.
+                    Until you make a release, your CSV will not be
+                    read except to determine the columns.
+                    What is the approximate number of rows in the dataset?
+                    This number is only used for the simulation
+                    and not the final calculation.
+                    """
+                ),
+                ui.input_select(
+                    "row_count",
+                    "Estimated Rows",
+                    choices=["100", "1000", "10000"],
+                    selected="100",
+                ),
+            ]
     def columns_ui():
         column_ids = input.columns_checkbox_group()
@@ -131,6 +165,7 @@ def columns_ui():
         for column_id in column_ids:
+                public_csv_path=public_csv_path(),
@@ -146,11 +181,13 @@ def columns_ui():
     def csv_ids_names_calc():
-        return read_csv_ids_names(req(csv_path()))
+        # The previous tab validated that if both public and private are given,
+        # the columns match, so it shouldn't matter which is read.
+        return read_csv_ids_names(Path(req(public_csv_path() or private_csv_path())))
     def csv_ids_labels_calc():
-        return read_csv_ids_labels(req(csv_path()))
+        return read_csv_ids_labels(Path(req(public_csv_path() or private_csv_path())))
     def epsilon_tooltip_ui():
diff --git a/dp_wizard/app/components/column_module.py b/dp_wizard/app/components/column_module.py
index 938edc8..b99d33b 100644
--- a/dp_wizard/app/components/column_module.py
+++ b/dp_wizard/app/components/column_module.py
@@ -3,12 +3,14 @@
 from htmltools.tags import details, summary
 from shiny import ui, render, module, reactive, Inputs, Outputs, Session
 from shiny.types import SilentException
+import polars as pl
 from dp_wizard.utils.dp_helper import make_accuracy_histogram
 from dp_wizard.utils.shared import plot_histogram
 from dp_wizard.utils.code_generators import make_column_config_block
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip, hide_if
 from dp_wizard.utils.dp_helper import confidence
+from dp_wizard.utils.mock_data import mock_data, ColumnDef
 default_weight = "2"
@@ -56,6 +58,7 @@ def column_server(
     input: Inputs,
     output: Outputs,
     session: Session,
+    public_csv_path: str,
     name: str,
     contributions: int,
     epsilon: float,
@@ -107,7 +110,20 @@ def accuracy_histogram():
             # This function is triggered when column is removed;
             # Exit early to avoid divide-by-zero.
             raise SilentException("weights_sum == 0")
+        # Mock data only depends on lower and upper bounds, so it could be cached,
+        # but I'd guess this is dominated by the DP operations,
+        # so not worth optimizing.
+        # TODO: Use real public data, if we have it!
+        if public_csv_path:
+            lf = pl.scan_csv(public_csv_path)
+        else:
+            lf = pl.LazyFrame(
+                mock_data({name: ColumnDef(lower_x, upper_x)}, row_count=row_count)
+            )
         return make_accuracy_histogram(
+            lf=lf,
+            column_name=name,
@@ -210,9 +226,11 @@ def data_frame():
     def histogram_preview_plot():
         accuracy, histogram = accuracy_histogram()
         s = "s" if contributions > 1 else ""
-        title = (
-            f"Simulated {name}: normal distribution, "
-            f"{contributions} contribution{s} / invidual"
+        title = ", ".join(
+            [
+                name if public_csv_path else f"Simulated {name}: normal distribution",
+                f"{contributions} contribution{s} / invidual",
+            ]
         return plot_histogram(
diff --git a/dp_wizard/app/components/outputs.py b/dp_wizard/app/components/outputs.py
index cb0f4b3..bf26475 100644
--- a/dp_wizard/app/components/outputs.py
+++ b/dp_wizard/app/components/outputs.py
@@ -22,3 +22,7 @@ def demo_tooltip(is_demo: bool, text: str):  # pragma: no cover
 def hide_if(condition: bool, el):  # pragma: no cover
     display = "none" if condition else "block"
     return ui.div(el, style=f"display: {display};")
+def info_box(content):  # pragma: no cover
+    return ui.div(content, class_="alert alert-info", role="alert")
diff --git a/dp_wizard/app/dataset_panel.py b/dp_wizard/app/dataset_panel.py
index cdfc9ae..0e8c0cf 100644
--- a/dp_wizard/app/dataset_panel.py
+++ b/dp_wizard/app/dataset_panel.py
@@ -1,38 +1,80 @@
 from pathlib import Path
+from typing import Optional
 from shiny import ui, reactive, render, Inputs, Outputs, Session
-from dp_wizard.utils.argparse_helpers import get_cli_info
-from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
+from dp_wizard.utils.argparse_helpers import (
+    get_cli_info,
+from dp_wizard.utils.csv_helper import get_csv_names_mismatch
+from dp_wizard.app.components.outputs import (
+    output_code_sample,
+    demo_tooltip,
+    hide_if,
+    info_box,
 from dp_wizard.utils.code_generators import make_privacy_unit_block
 def dataset_ui():
     cli_info = get_cli_info()
-    csv_placeholder = "" if cli_info.csv_path is None else Path(cli_info.csv_path).name
+    public_csv_placeholder = (
+        "" if cli_info.public_csv_path is None else Path(cli_info.public_csv_path).name
+    )
+    private_csv_placeholder = (
+        ""
+        if cli_info.private_csv_path is None
+        else Path(cli_info.private_csv_path).name
+    )
     return ui.nav_panel(
         "Select Dataset",
-        # Doesn't seem to be possible to preset the actual value,
-        # but the placeholder string is a good substitute.
-        ui.input_file(
-            "csv_path",
-            ["Choose CSV file", ui.output_ui("choose_csv_demo_tooltip_ui")],
-            accept=[".csv"],
-            placeholder=csv_placeholder,
-        ),
-        ui.markdown(
-            "How many rows of the CSV can one individual contribute to? "
-            'This is the "unit of privacy" which will be protected.'
+        ui.card(
+            ui.card_header("Input CSVs"),
+            ui.markdown(
+                f"""
+Choose **Public CSV** {PUBLIC_TEXT}
+Choose **Private CSV** {PRIVATE_TEXT}
+Choose both **Public CSV** and **Private CSV** {PUBLIC_PRIVATE_TEXT}"""
+            ),
+            ui.row(
+                # Doesn't seem to be possible to preset the actual value,
+                # but the placeholder string is a good substitute.
+                ui.input_file(
+                    "public_csv_path",
+                    ["Choose Public CSV", ui.output_ui("choose_csv_demo_tooltip_ui")],
+                    accept=[".csv"],
+                    placeholder=public_csv_placeholder,
+                ),
+                ui.input_file(
+                    "private_csv_path",
+                    "Choose Private CSV",
+                    accept=[".csv"],
+                    placeholder=private_csv_placeholder,
+                ),
+            ),
+            ui.output_ui("csv_column_match_ui"),
-        ui.input_numeric(
-            "contributions",
-            ["Contributions", ui.output_ui("contributions_demo_tooltip_ui")],
-            cli_info.contributions,
-            min=1,
+        ui.card(
+            ui.card_header("Unit of privacy"),
+            ui.markdown(
+                "How many rows of the CSV can one individual contribute to? "
+                'This is the "unit of privacy" which will be protected.'
+            ),
+            ui.input_numeric(
+                "contributions",
+                ["Contributions", ui.output_ui("contributions_demo_tooltip_ui")],
+                cli_info.contributions,
+                min=1,
+            ),
+            ui.output_ui("python_tooltip_ui"),
+            output_code_sample("Unit of Privacy", "unit_of_privacy_python"),
-        ui.output_ui("python_tooltip_ui"),
-        output_code_sample("Unit of Privacy", "unit_of_privacy_python"),
@@ -42,14 +84,49 @@ def dataset_server(
     input: Inputs,
     output: Outputs,
     session: Session,
-    csv_path: reactive.Value[str],
+    public_csv_path: reactive.Value[str],
+    private_csv_path: reactive.Value[str],
     contributions: reactive.Value[int],
     is_demo: bool,
 ):  # pragma: no cover
-    @reactive.event(input.csv_path)
-    def _on_csv_path_change():
-        csv_path.set(input.csv_path()[0]["datapath"])
+    @reactive.event(input.public_csv_path)
+    def _on_public_csv_path_change():
+        public_csv_path.set(input.public_csv_path()[0]["datapath"])
+    @reactive.effect
+    @reactive.event(input.private_csv_path)
+    def _on_private_csv_path_change():
+        private_csv_path.set(input.private_csv_path()[0]["datapath"])
+    @reactive.calc
+    def csv_column_mismatch_calc() -> Optional[tuple[set, set]]:
+        public = public_csv_path()
+        private = private_csv_path()
+        if public and private:
+            just_public, just_private = get_csv_names_mismatch(
+                Path(public), Path(private)
+            )
+            if just_public or just_private:
+                return just_public, just_private
+    @render.ui
+    def csv_column_match_ui():
+        mismatch = csv_column_mismatch_calc()
+        messages = []
+        if mismatch:
+            just_public, just_private = mismatch
+            if just_public:
+                messages.append(
+                    "- Only the public CSV contains: "
+                    + ", ".join(f"`{name}`" for name in just_public)
+                )
+            if just_private:
+                messages.append(
+                    "- Only the private CSV contains: "
+                    + ", ".join(f"`{name}`" for name in just_private)
+                )
+        return hide_if(not messages, info_box(ui.markdown("\n".join(messages))))
@@ -58,10 +135,16 @@ def _on_contributions_change():
     def button_enabled():
-        contributions_is_set = input.contributions() is not None
+        public_csv_path_is_set = (
+            input.public_csv_path() is not None and len(input.public_csv_path()) > 0
+        )
+        private_csv_path_is_set = (
+            input.private_csv_path() is not None and len(input.private_csv_path()) > 0
+        )
         csv_path_is_set = (
-            input.csv_path() is not None and len(input.csv_path()) > 0
-        ) or is_demo
+            public_csv_path_is_set or private_csv_path_is_set or is_demo
+        ) and not csv_column_mismatch_calc()
+        contributions_is_set = input.contributions() is not None
         return contributions_is_set and csv_path_is_set
diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py
index 992f6ec..eca63ce 100644
--- a/dp_wizard/app/results_panel.py
+++ b/dp_wizard/app/results_panel.py
@@ -54,7 +54,8 @@ def results_server(
     input: Inputs,
     output: Outputs,
     session: Session,
-    csv_path: reactive.Value[str],
+    public_csv_path: reactive.Value[str],
+    private_csv_path: reactive.Value[str],
     contributions: reactive.Value[int],
     lower_bounds: reactive.Value[dict[str, float]],
     upper_bounds: reactive.Value[dict[str, float]],
@@ -77,7 +78,8 @@ def analysis_plan() -> AnalysisPlan:
             for col in weights().keys()
         return AnalysisPlan(
-            csv_path=csv_path(),
+            # Prefer private CSV, if available:
+            csv_path=private_csv_path() or public_csv_path(),
diff --git a/dp_wizard/utils/argparse_helpers.py b/dp_wizard/utils/argparse_helpers.py
index 149b05b..5da58d8 100644
--- a/dp_wizard/utils/argparse_helpers.py
+++ b/dp_wizard/utils/argparse_helpers.py
@@ -1,28 +1,58 @@
 from sys import argv
 from pathlib import Path
-from argparse import ArgumentParser, ArgumentTypeError
+import argparse
 import csv
 import random
-from warnings import warn
 from typing import NamedTuple, Optional
 def _existing_csv_type(arg: str) -> Path:
     path = Path(arg)
     if not path.exists():
-        raise ArgumentTypeError(f"No such file: {arg}")
+        raise argparse.ArgumentTypeError(f"No such file: {arg}")
     if path.suffix != ".csv":
-        raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
+        raise argparse.ArgumentTypeError(f'Must have ".csv" extension: {arg}')
     return path
+PUBLIC_TEXT = """if you have a public data set, and are curious how
+DP can be applied: The preview visualizations will use your public data."""
+PRIVATE_TEXT = """if you only have a private data set, and want to
+make a release from it: The preview visualizations will only use
+simulated data, and apart from the headers, the private CSV is not
+read until the release."""
+PUBLIC_PRIVATE_TEXT = """if you have two CSVs
+with the same structure. Perhaps the public CSV is older and no longer
+sensitive. Preview visualizations will be made with the public data,
+but the release will be made with private data."""
 def _get_arg_parser():
-    parser = ArgumentParser(description=__doc__)
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description="DP Wizard makes it easier to get started with "
+        "Differential Privacy.",
+        epilog=f"""
+Use "--public_csv" {PUBLIC_TEXT}
+Use "--private_csv" {PRIVATE_TEXT}
+Use "--public_csv" and "--private_csv" together {PUBLIC_PRIVATE_TEXT}
+    )
-        "--csv",
-        dest="csv_path",
+        "--public_csv",
+        dest="public_csv_path",
+        metavar="CSV",
-        help="Path to CSV containing private data",
+        help="Path to public CSV",
+    )
+    parser.add_argument(
+        "--private_csv",
+        dest="private_csv_path",
+        metavar="CSV",
+        type=_existing_csv_type,
+        help="Path to private CSV",
@@ -41,17 +71,30 @@ def _get_arg_parser():
 def _get_args():
     >>> _get_args()
-    Namespace(csv_path=None, contributions=1, demo=False)
+    Namespace(public_csv_path=None, private_csv_path=None, contributions=1, demo=False)
     arg_parser = _get_arg_parser()
     if "pytest" in argv[0] or ("shiny" in argv[0] and "run" == argv[1]):
         # We are running a test,
         # and ARGV is polluted, so override:
-        return arg_parser.parse_args([])
+        args = arg_parser.parse_args([])
         # Normal parsing:
-        return arg_parser.parse_args()  # pragma: no cover
+        args = arg_parser.parse_args()  # pragma: no cover
+    if args.demo:  # pragma: no cover
+        other_args = {arg for arg in dir(args) if not arg.startswith("_")} - {
+            "demo",
+            "contributions",
+        }
+        set_args = [k for k in other_args if getattr(args, k) is not None]
+        if set_args:
+            arg_parser.error(
+                "When --demo is set, other arguments should be skipped: "
+                + ", ".join(set_args)
+            )
+    return args
 def _clip(n: float, lower: float, upper: float) -> float:
@@ -67,30 +110,15 @@ def _clip(n: float, lower: float, upper: float) -> float:
 class CLIInfo(NamedTuple):
-    csv_path: Optional[str]
+    public_csv_path: Optional[str]
+    private_csv_path: Optional[str]
     contributions: int
     is_demo: bool
-def _get_demo_csv_contrib() -> CLIInfo:
-    """
-    >>> csv_path, contributions, is_demo = _get_demo_csv_contrib()
-    >>> with open(csv_path, newline="") as csv_handle:
-    ...     reader = csv.DictReader(csv_handle)
-    ...     reader.fieldnames
-    ...     rows = list(reader)
-    ...     rows[0]
-    ...     rows[-1]
-    ['student_id', 'class_year', 'hw_number', 'grade']
-    {'student_id': '1', 'class_year': '2', 'hw_number': '1', 'grade': '73'}
-    {'student_id': '100', 'class_year': '1', 'hw_number': '10', 'grade': '78'}
-    """
+def _make_fake_data(path: Path, contributions):
     random.seed(0)  # So the mock data will be stable across runs.
-    csv_path = Path(__file__).parent.parent / "tmp" / "demo.csv"
-    contributions = 10
-    with csv_path.open("w", newline="") as demo_handle:
+    with path.open("w", newline="") as demo_handle:
         fields = ["student_id", "class_year", "hw_number", "grade"]
         writer = csv.DictWriter(demo_handle, fieldnames=fields)
@@ -109,15 +137,39 @@ def _get_demo_csv_contrib() -> CLIInfo:
-    return CLIInfo(csv_path=str(csv_path), contributions=contributions, is_demo=True)
+def _get_demo_cli_info() -> CLIInfo:
+    """
+    >>> cli_info = _get_demo_cli_info()
+    >>> with open(cli_info.private_csv_path, newline="") as csv_handle:
+    ...     reader = csv.DictReader(csv_handle)
+    ...     reader.fieldnames
+    ...     rows = list(reader)
+    ...     rows[0]
+    ...     rows[-1]
+    ['student_id', 'class_year', 'hw_number', 'grade']
+    {'student_id': '1', 'class_year': '2', 'hw_number': '1', 'grade': '73'}
+    {'student_id': '100', 'class_year': '1', 'hw_number': '10', 'grade': '78'}
+    """
+    private_csv_path = Path(__file__).parent.parent / "tmp" / "demo.csv"
+    contributions = 10
+    _make_fake_data(private_csv_path, contributions)
+    return CLIInfo(
+        public_csv_path=None,
+        private_csv_path=str(private_csv_path),
+        contributions=contributions,
+        is_demo=True,
+    )
-def get_cli_info():  # pragma: no cover
+def get_cli_info() -> CLIInfo:  # pragma: no cover
     args = _get_args()
     if args.demo:
-        if args.csv_path is not None:
-            warn('"--demo" overrides "--csv" and "--contrib"')
-        return _get_demo_csv_contrib()
+        return _get_demo_cli_info()
     return CLIInfo(
-        csv_path=args.csv_path, contributions=args.contributions, is_demo=False
+        public_csv_path=args.public_csv_path,
+        private_csv_path=args.private_csv_path,
+        contributions=args.contributions,
+        is_demo=False,
diff --git a/dp_wizard/utils/converters.py b/dp_wizard/utils/converters.py
index d53d08b..ddfd43c 100644
--- a/dp_wizard/utils/converters.py
+++ b/dp_wizard/utils/converters.py
@@ -14,6 +14,10 @@ def convert_py_to_nb(python_str: str, execute: bool = False):
         temp_dir_path = Path(temp_dir)
         py_path = temp_dir_path / "input.py"
+        # DEBUG:
+        Path("/tmp/script.py").write_text(python_str)
         argv = (
diff --git a/dp_wizard/utils/csv_helper.py b/dp_wizard/utils/csv_helper.py
index edc822a..2055062 100644
--- a/dp_wizard/utils/csv_helper.py
+++ b/dp_wizard/utils/csv_helper.py
@@ -8,9 +8,10 @@
 import re
 import polars as pl
+from pathlib import Path
-def read_csv_names(csv_path: str):
+def read_csv_names(csv_path: Path):
     # Polars is overkill, but it is more robust against
     # variations in encoding than Python stdlib csv.
     # However, it could be slow:
@@ -21,14 +22,27 @@ def read_csv_names(csv_path: str):
     return lf.collect_schema().names()
-def read_csv_ids_labels(csv_path: str):
+def get_csv_names_mismatch(public_csv_path: Path, private_csv_path: Path):
+    public_names = set(read_csv_names(public_csv_path))
+    private_names = set(read_csv_names(private_csv_path))
+    extra_public = public_names - private_names
+    extra_private = private_names - public_names
+    return (extra_public, extra_private)
+def get_csv_row_count(csv_path: Path):
+    lf = pl.scan_csv(csv_path)
+    return lf.select(pl.len()).collect().item()
+def read_csv_ids_labels(csv_path: Path):
     return {
         name_to_id(name): f"{i+1}: {name or '[blank]'}"
         for i, name in enumerate(read_csv_names(csv_path))
-def read_csv_ids_names(csv_path: str):
+def read_csv_ids_names(csv_path: Path):
     return {name_to_id(name): name for name in read_csv_names(csv_path)}
diff --git a/dp_wizard/utils/dp_helper.py b/dp_wizard/utils/dp_helper.py
index 90a3786..99a0cc3 100644
--- a/dp_wizard/utils/dp_helper.py
+++ b/dp_wizard/utils/dp_helper.py
@@ -1,7 +1,6 @@
 import polars as pl
 import opendp.prelude as dp
-from dp_wizard.utils.mock_data import mock_data, ColumnDef
 from dp_wizard.utils.shared import make_cut_points
@@ -11,6 +10,8 @@
 def make_accuracy_histogram(
+    lf: pl.LazyFrame,
+    column_name: str,
     row_count: int,
     lower: float,
     upper: float,
@@ -19,8 +20,16 @@ def make_accuracy_histogram(
     weighted_epsilon: float,
 ) -> tuple[float, pl.DataFrame]:
-    Creates fake data between lower and upper, and then returns a DP histogram from it.
+    Given a LazyFrame and column, and calculate a DP histogram.
+    >>> from dp_wizard.utils.mock_data import mock_data, ColumnDef
+    >>> lower, upper = 0, 10
+    >>> row_count = 100
+    >>> column_name = "value"
+    >>> df = mock_data({column_name: ColumnDef(lower, upper)}, row_count=row_count)
     >>> accuracy, histogram = make_accuracy_histogram(
+    ...     lf=pl.LazyFrame(df),
+    ...     column_name=column_name,
     ...     row_count=100,
     ...     lower=0, upper=10,
     ...     bin_count=5,
@@ -43,20 +52,16 @@ def make_accuracy_histogram(
     │ (8, 10] ┆ ... │
-    # Mock data only depends on lower and upper bounds, so it could be cached,
-    # but I'd guess this is dominated by the DP operations,
-    # so not worth optimizing.
-    df = mock_data({"value": ColumnDef(lower, upper)}, row_count=row_count)
-    # TODO: When this is stable, merge it to templates, so we can be
+    # TODO: https://github.com/opendp/dp-wizard/issues/219
+    # When this is stable, merge it to templates, so we can be
     # sure that we're using the same code in the preview that we
     # use in the generated notebook.
     cut_points = make_cut_points(lower, upper, bin_count)
     context = dp.Context.compositor(
-        data=pl.LazyFrame(df).with_columns(
+        data=lf.with_columns(
             # The cut() method returns a Polars categorical type.
             # Cast to string to get the human-readable label.
-            pl.col("value")
+            pl.col(column_name)
diff --git a/tests/fixtures/default_app.py b/tests/fixtures/default_app.py
index 4ed8b49..a881c1d 100644
--- a/tests/fixtures/default_app.py
+++ b/tests/fixtures/default_app.py
@@ -1,6 +1,5 @@
 from shiny import App
 from dp_wizard.app import app_ui, make_server_from_cli_info
 from dp_wizard.utils.argparse_helpers import CLIInfo
@@ -8,8 +7,9 @@
-            csv_path=None,
-            contributions=None,
+            public_csv_path=None,
+            private_csv_path=None,
+            contributions=1,
diff --git a/tests/fixtures/demo_app.py b/tests/fixtures/demo_app.py
index e1ef64a..747fd89 100644
--- a/tests/fixtures/demo_app.py
+++ b/tests/fixtures/demo_app.py
@@ -8,8 +8,9 @@
-            csv_path=None,
-            contributions=None,
+            public_csv_path=None,
+            private_csv_path=None,
+            contributions=1,
diff --git a/tests/test_app.py b/tests/test_app.py
index 283072c..e53c811 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -18,7 +18,6 @@
 default_app = create_app_fixture(Path(__file__).parent / "fixtures/default_app.py")
 tooltip = "#choose_csv_demo_tooltip_ui svg"
 for_the_demo = "For the demo, we'll imagine"
-simulation = "This simulation will assume a normal distribution"
 # TODO: Why is incomplete coverage reported here?
@@ -30,6 +29,10 @@ def test_demo_app(page: Page, demo_app: ShinyAppProc):  # pragma: no cover
+    # -- Define analysis --
+    page.get_by_role("button", name="Define analysis").click()
+    expect(page.get_by_text("This simulation will assume")).to_be_visible()
 def test_default_app(page: Page, default_app: ShinyAppProc):  # pragma: no cover
     pick_dataset_text = "How many rows of the CSV"
@@ -63,7 +66,7 @@ def expect_no_error():
     # Now upload:
     csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
-    page.get_by_label("Choose CSV file").set_input_files(csv_path.resolve())
+    page.get_by_label("Choose Public CSV").set_input_files(csv_path.resolve())
     # -- Define analysis --
@@ -89,6 +92,8 @@ def expect_no_error():
     expect_visible("Epsilon: 0.316")
     expect_visible("Epsilon: 0.158")
+    # Simulation
+    expect_visible("Because you've provided a public CSV")
     # Button disabled until column selected:
     download_results_button = page.get_by_role("button", name="Download results")
@@ -100,7 +105,6 @@ def expect_no_error():
     # Set column details:
-    expect_visible(simulation)
     # Check that default is set correctly:
     assert page.get_by_label("Upper").input_value() == "10"
@@ -109,14 +113,12 @@ def expect_no_error():
     # Uncheck the column:
-    expect_visible(simulation)
     # Recheck the column:
-    expect_visible(simulation)
     assert page.get_by_label("Upper").input_value() == new_value
     expect_visible("The 95% confidence interval is ±794")
     page.get_by_text("Data Table").click()
-    expect_visible("(0, 2]")
+    expect_visible(f"({new_value}, inf]")  # Because values are well above the bins.
     # Add a second column:
     # page.get_by_label("blank").check()
diff --git a/tests/utils/test_argparse_helpers.py b/tests/utils/test_argparse_helpers.py
index bff0c13..7877a78 100644
--- a/tests/utils/test_argparse_helpers.py
+++ b/tests/utils/test_argparse_helpers.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 from argparse import ArgumentTypeError
+import re
 import pytest
@@ -9,26 +10,50 @@
 fixtures_path = Path(__file__).parent.parent / "fixtures"
+def extract_block(md):
+    '''
+    >>> fake_md = """
+    ... header
+    ... ```
+    ... block
+    ... ```
+    ... footer
+    ... """
+    >>> extract_block(fake_md)
+    'block'
+    >>> extract_block('sorry')
+    Traceback (most recent call last):
+    ...
+    Exception: no match for block
+    '''
+    match = re.search(r"```\n(.*?)\n```", md, flags=re.DOTALL)
+    if match:
+        return match.group(1)
+    raise Exception("no match for block")
 def test_help():
     help = (
-        _get_arg_parser()
-        .format_help()
+        re.sub(
+            r"\]\s+\[",
+            "] [",  # line wrapping of params varies.
+            _get_arg_parser().format_help(),
+        )
         # argparse doesn't actually know the name of the script
         # and inserts the name of the running program instead.
-        .replace("__main__.py", "dp-wizard")
-        .replace("pytest", "dp-wizard")
+        .replace("__main__.py", "dp-wizard").replace("pytest", "dp-wizard")
         # Text is different under Python 3.9:
         .replace("optional arguments:", "options:")
-    )
-    print(help)
+    ).strip()
     root_path = Path(__file__).parent.parent.parent
     readme_md = (root_path / "README.md").read_text()
-    assert help in readme_md
+    assert help == extract_block(readme_md)
     readme_pypi_md = (root_path / "README-PYPI.md").read_text()
-    assert help in readme_pypi_md
+    assert help == extract_block(readme_pypi_md)
 def test_arg_validation_no_file():
@@ -38,9 +63,9 @@ def test_arg_validation_no_file():
 def test_arg_validation_not_csv():
     with pytest.raises(ArgumentTypeError, match='Must have ".csv" extension:'):
-        _existing_csv_type(fixtures_path / "fake.ipynb")
+        _existing_csv_type(str(fixtures_path / "fake.ipynb"))
 def test_arg_validation_works():
-    path = _existing_csv_type(fixtures_path / "fake.csv")
+    path = _existing_csv_type(str(fixtures_path / "fake.csv"))
     assert path.name == "fake.csv"
diff --git a/tests/utils/test_csv_helper.py b/tests/utils/test_csv_helper.py
index 76ac569..727c37b 100644
--- a/tests/utils/test_csv_helper.py
+++ b/tests/utils/test_csv_helper.py
@@ -4,7 +4,32 @@
 import tempfile
 import pytest
-from dp_wizard.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names
+from pathlib import Path
+from dp_wizard.utils.csv_helper import (
+    read_csv_ids_labels,
+    read_csv_ids_names,
+    get_csv_names_mismatch,
+    get_csv_row_count,
+def test_get_csv_names_mismatch():
+    with tempfile.TemporaryDirectory() as tmp:
+        a_path = Path(tmp) / "a.csv"
+        a_path.write_text("a,b,c")
+        b_path = Path(tmp) / "b.csv"
+        b_path.write_text("b,c,d")
+        just_a, just_b = get_csv_names_mismatch(a_path, b_path)
+        assert just_a == {"a"}
+        assert just_b == {"d"}
+def test_get_csv_row_count():
+    with tempfile.TemporaryDirectory() as tmp:
+        path = Path(tmp) / "a.csv"
+        path.write_text("a,b,c\n1,2,3")
+        assert get_csv_row_count(path) == 1
 # We will not reference the encoding when reading: