From 45616487041e3b401d864634d4c57155d1144044 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Tue, 28 Oct 2025 18:24:12 -0400 Subject: [PATCH 1/7] column_names -> all_column_names --- dp_wizard/shiny/__init__.py | 2 +- .../shiny/panels/analysis_panel/__init__.py | 8 ++++---- dp_wizard/shiny/panels/dataset_panel/__init__.py | 16 ++++++++-------- dp_wizard/shiny/panels/results_panel/__init__.py | 2 +- dp_wizard/types.py | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/dp_wizard/shiny/__init__.py b/dp_wizard/shiny/__init__.py index c965cb9e..d7f5ba2a 100644 --- a/dp_wizard/shiny/__init__.py +++ b/dp_wizard/shiny/__init__.py @@ -236,7 +236,7 @@ def server(input: Inputs, output: Outputs, session: Session): # pragma: no cove initial_product=initial_product, product=reactive.value(initial_product), # Analysis choices: - column_names=reactive.value(initial_column_names), + all_column_names=reactive.value(initial_column_names), groups=reactive.value([]), epsilon=reactive.value(1.0), # Per-column choices: diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py index 6d07da81..e87aa5b4 100644 --- a/dp_wizard/shiny/panels/analysis_panel/__init__.py +++ b/dp_wizard/shiny/panels/analysis_panel/__init__.py @@ -155,7 +155,7 @@ def analysis_server( # product = state.product # Analysis choices: - column_names = state.column_names + all_column_names = state.all_column_names groups = state.groups epsilon = state.epsilon @@ -205,7 +205,7 @@ def _on_groups_change(): @render.ui def analysis_requirements_warning_ui(): return hide_if( - bool(column_names()), + bool(all_column_names()), info_md_box( """ Please select your dataset on the previous tab @@ -359,11 +359,11 @@ def columns_ui(): @reactive.calc def csv_ids_names_calc(): - return id_names_dict_from_names(column_names()) + return id_names_dict_from_names(all_column_names()) @reactive.calc def csv_ids_labels_calc(): - return id_labels_dict_from_names(column_names()) + return id_labels_dict_from_names(all_column_names()) @reactive.effect @reactive.event(input.log_epsilon_slider) diff --git a/dp_wizard/shiny/panels/dataset_panel/__init__.py b/dp_wizard/shiny/panels/dataset_panel/__init__.py index 8c67ca55..8a1fa255 100644 --- a/dp_wizard/shiny/panels/dataset_panel/__init__.py +++ b/dp_wizard/shiny/panels/dataset_panel/__init__.py @@ -127,7 +127,7 @@ def dataset_server( product = state.product # Analysis choices: - column_names = state.column_names + all_column_names = state.all_column_names # groups = state.groups # epsilon = state.epsilon @@ -148,22 +148,22 @@ def dataset_server( def _on_public_csv_path_change(): path = input.public_csv_path()[0]["datapath"] public_csv_path.set(path) - column_names.set(read_csv_names(Path(path))) + all_column_names.set(read_csv_names(Path(path))) @reactive.effect @reactive.event(input.private_csv_path) def _on_private_csv_path_change(): path = input.private_csv_path()[0]["datapath"] private_csv_path.set(path) - column_names.set(read_csv_names(Path(path))) + all_column_names.set(read_csv_names(Path(path))) @reactive.effect - @reactive.event(input.column_names) + @reactive.event(input.all_column_names) def _on_column_names_change(): - column_names.set( + all_column_names.set( [ clean - for line in input.column_names().splitlines() + for line in input.all_column_names().splitlines() if (clean := line.strip()) ] ) @@ -239,7 +239,7 @@ def csv_or_columns_ui(): """, responsive=False, ), - ui.input_text_area("column_names", "CSV Column Names", rows=5), + ui.input_text_area("all_column_names", "CSV Column Names", rows=5), ] else: content = [ @@ -452,7 +452,7 @@ def button_enabled(): return ( contributions_valid() and not get_row_count_errors(max_rows()) - and len(column_names()) > 0 + and len(all_column_names()) > 0 and (in_cloud or not csv_column_mismatch_calc()) ) diff --git a/dp_wizard/shiny/panels/results_panel/__init__.py b/dp_wizard/shiny/panels/results_panel/__init__.py index 4a6310a4..09cfdace 100644 --- a/dp_wizard/shiny/panels/results_panel/__init__.py +++ b/dp_wizard/shiny/panels/results_panel/__init__.py @@ -115,7 +115,7 @@ def results_server( product = state.product # Analysis choices: - # column_names = state.column_names + # all_column_names = state.all_column_names groups = state.groups epsilon = state.epsilon diff --git a/dp_wizard/types.py b/dp_wizard/types.py index 39873091..8ae79ed9 100644 --- a/dp_wizard/types.py +++ b/dp_wizard/types.py @@ -98,7 +98,7 @@ class AppState: product: reactive.Value[Product] # Analysis choices: - column_names: reactive.Value[list[ColumnName]] + all_column_names: reactive.Value[list[ColumnName]] groups: reactive.Value[list[ColumnName]] epsilon: reactive.Value[float] From 462dbb7976e85ca1e425387e0f671e81dde1c0c9 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Tue, 28 Oct 2025 19:05:57 -0400 Subject: [PATCH 2/7] add numeric_column_names --- dp_wizard/shiny/__init__.py | 7 +++++- .../shiny/panels/analysis_panel/__init__.py | 1 + .../shiny/panels/dataset_panel/__init__.py | 9 ++++++- .../shiny/panels/results_panel/__init__.py | 1 + dp_wizard/types.py | 1 + dp_wizard/utils/csv_helper.py | 24 +++++++++++++++++++ 6 files changed, 41 insertions(+), 2 deletions(-) diff --git a/dp_wizard/shiny/__init__.py b/dp_wizard/shiny/__init__.py index d7f5ba2a..66a98f6a 100644 --- a/dp_wizard/shiny/__init__.py +++ b/dp_wizard/shiny/__init__.py @@ -18,7 +18,7 @@ from dp_wizard.types import AppState, Product from dp_wizard.utils import config from dp_wizard.utils.argparse_helpers import CLIInfo -from dp_wizard.utils.csv_helper import read_csv_names +from dp_wizard.utils.csv_helper import read_csv_names, read_csv_numeric_names _shiny_root = package_root / "shiny" _assets_root = _shiny_root / "assets" @@ -211,10 +211,14 @@ def server(input: Inputs, output: Outputs, session: Session): # pragma: no cove initial_private_csv_path = package_root / "tmp/sample.csv" _make_sample_csv(initial_private_csv_path, initial_contributions) initial_column_names = read_csv_names(Path(initial_private_csv_path)) + initial_numeric_column_names = read_csv_numeric_names( + Path(initial_private_csv_path) + ) else: initial_contributions = 1 initial_private_csv_path = "" initial_column_names = [] + initial_numeric_column_names = [] initial_product = Product.STATISTICS @@ -237,6 +241,7 @@ def server(input: Inputs, output: Outputs, session: Session): # pragma: no cove product=reactive.value(initial_product), # Analysis choices: all_column_names=reactive.value(initial_column_names), + numeric_column_names=reactive.value(initial_numeric_column_names), groups=reactive.value([]), epsilon=reactive.value(1.0), # Per-column choices: diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py index e87aa5b4..730cae1f 100644 --- a/dp_wizard/shiny/panels/analysis_panel/__init__.py +++ b/dp_wizard/shiny/panels/analysis_panel/__init__.py @@ -156,6 +156,7 @@ def analysis_server( # Analysis choices: all_column_names = state.all_column_names + # numeric_column_names = state.numeric_column_names groups = state.groups epsilon = state.epsilon diff --git a/dp_wizard/shiny/panels/dataset_panel/__init__.py b/dp_wizard/shiny/panels/dataset_panel/__init__.py index 8a1fa255..15a5b040 100644 --- a/dp_wizard/shiny/panels/dataset_panel/__init__.py +++ b/dp_wizard/shiny/panels/dataset_panel/__init__.py @@ -26,7 +26,11 @@ PUBLIC_TEXT, ) from dp_wizard.utils.code_generators import make_privacy_unit_block -from dp_wizard.utils.csv_helper import get_csv_names_mismatch, read_csv_names +from dp_wizard.utils.csv_helper import ( + get_csv_names_mismatch, + read_csv_names, + read_csv_numeric_names, +) dataset_panel_id = "dataset_panel" @@ -128,6 +132,7 @@ def dataset_server( # Analysis choices: all_column_names = state.all_column_names + numeric_column_names = state.numeric_column_names # groups = state.groups # epsilon = state.epsilon @@ -149,6 +154,7 @@ def _on_public_csv_path_change(): path = input.public_csv_path()[0]["datapath"] public_csv_path.set(path) all_column_names.set(read_csv_names(Path(path))) + numeric_column_names.set(read_csv_numeric_names(Path(path))) @reactive.effect @reactive.event(input.private_csv_path) @@ -156,6 +162,7 @@ def _on_private_csv_path_change(): path = input.private_csv_path()[0]["datapath"] private_csv_path.set(path) all_column_names.set(read_csv_names(Path(path))) + numeric_column_names.set(read_csv_numeric_names(Path(path))) @reactive.effect @reactive.event(input.all_column_names) diff --git a/dp_wizard/shiny/panels/results_panel/__init__.py b/dp_wizard/shiny/panels/results_panel/__init__.py index 09cfdace..51a3589d 100644 --- a/dp_wizard/shiny/panels/results_panel/__init__.py +++ b/dp_wizard/shiny/panels/results_panel/__init__.py @@ -116,6 +116,7 @@ def results_server( # Analysis choices: # all_column_names = state.all_column_names + # numeric_column_names = state.numeric_column_names groups = state.groups epsilon = state.epsilon diff --git a/dp_wizard/types.py b/dp_wizard/types.py index 8ae79ed9..f982489a 100644 --- a/dp_wizard/types.py +++ b/dp_wizard/types.py @@ -99,6 +99,7 @@ class AppState: # Analysis choices: all_column_names: reactive.Value[list[ColumnName]] + numeric_column_names: reactive.Value[list[ColumnName]] groups: reactive.Value[list[ColumnName]] epsilon: reactive.Value[float] diff --git a/dp_wizard/utils/csv_helper.py b/dp_wizard/utils/csv_helper.py index f1857606..35c00b48 100644 --- a/dp_wizard/utils/csv_helper.py +++ b/dp_wizard/utils/csv_helper.py @@ -18,6 +18,30 @@ def read_csv_names(csv_path: Path) -> list[ColumnName]: return [ColumnName(name) for name in all_names if name.strip() != ""] +def read_csv_numeric_names(csv_path: Path) -> list[ColumnName]: # pragma: no cover + lf = pl.scan_csv(csv_path) + numeric_names = [ + name + for name, pl_type in lf.collect_schema().items() + if pl_type + in [ + pl.Int8, + pl.Int16, + pl.Int32, + pl.Int64, + pl.Int128, + pl.Float32, + pl.Float64, + pl.UInt8, + pl.UInt16, + pl.UInt32, + pl.UInt64, + ] + ] + # Exclude columns missing names: + return [ColumnName(name) for name in numeric_names if name.strip() != ""] + + def get_csv_names_mismatch( public_csv_path: Path, private_csv_path: Path ) -> tuple[set[ColumnName], set[ColumnName]]: From 1e9434dccd8569831428c42a1165c85eaa525d82 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 29 Oct 2025 10:47:34 -0400 Subject: [PATCH 3/7] limit column selection to numeric columns --- .../shiny/panels/analysis_panel/__init__.py | 21 ++++++++++++------- tests/test_app.py | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py index 730cae1f..507f3f79 100644 --- a/dp_wizard/shiny/panels/analysis_panel/__init__.py +++ b/dp_wizard/shiny/panels/analysis_panel/__init__.py @@ -39,7 +39,7 @@ def analysis_ui(): ui.layout_columns( ui.card( ui.card_header(columns_icon, "Columns"), - ui.markdown("Select columns to calculate statistics on."), + ui.markdown("Select numeric columns to calculate statistics on."), ui.input_selectize( "columns_selectize", "Columns", @@ -156,7 +156,7 @@ def analysis_server( # Analysis choices: all_column_names = state.all_column_names - # numeric_column_names = state.numeric_column_names + numeric_column_names = state.numeric_column_names groups = state.groups epsilon = state.epsilon @@ -180,20 +180,27 @@ def button_enabled(): @reactive.effect def _update_columns(): - csv_ids_labels = { + all_ids_labels = { # Cast to string for type checking. - str(k): v - for k, v in csv_ids_labels_calc().items() + str(col_id): label + for col_id, label in csv_ids_labels_calc().items() } ui.update_selectize( "groups_selectize", label=None, - choices=csv_ids_labels, + choices=all_ids_labels, ) + + numeric_column_ids = id_names_dict_from_names(numeric_column_names()).keys() + numeric_ids_labels = { + col_id: label + for col_id, label in all_ids_labels.items() + if col_id in numeric_column_ids + } ui.update_selectize( "columns_selectize", label=None, - choices=csv_ids_labels, + choices=numeric_ids_labels, ) @reactive.effect diff --git a/tests/test_app.py b/tests/test_app.py index 7ca5e6a9..11d1cd61 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -72,7 +72,7 @@ def test_qa_app(page: Page, qa_app: ShinyAppProc): # pragma: no cover def test_local_app_validations(page: Page, local_app: ShinyAppProc): # pragma: no cover pick_dataset_text = "How many rows of the CSV" - perform_analysis_text = "Select columns to calculate statistics on" + perform_analysis_text = "Select numeric columns to calculate statistics on" download_results_text = "You can now make a differentially private release" # -- Select Dataset -- From 8ca5f6301c3d04f0c50f146f19f6dc10af03b0a5 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 29 Oct 2025 14:39:44 -0400 Subject: [PATCH 4/7] set numeric column names in cloud --- dp_wizard/shiny/panels/dataset_panel/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dp_wizard/shiny/panels/dataset_panel/__init__.py b/dp_wizard/shiny/panels/dataset_panel/__init__.py index 15a5b040..23153ed4 100644 --- a/dp_wizard/shiny/panels/dataset_panel/__init__.py +++ b/dp_wizard/shiny/panels/dataset_panel/__init__.py @@ -167,13 +167,13 @@ def _on_private_csv_path_change(): @reactive.effect @reactive.event(input.all_column_names) def _on_column_names_change(): - all_column_names.set( - [ - clean - for line in input.all_column_names().splitlines() - if (clean := line.strip()) - ] - ) + column_names = [ + clean + for line in input.all_column_names().splitlines() + if (clean := line.strip()) + ] + all_column_names.set(column_names) + numeric_column_names.set(column_names) @reactive.calc def csv_column_mismatch_calc() -> Optional[tuple[set, set]]: From 20bba22e95ff259c2d0f3c847e3acd53f31074d0 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 31 Oct 2025 10:33:24 -0400 Subject: [PATCH 5/7] carry over changes from other PR --- dp_wizard/shiny/components/summaries.py | 16 +++++++++++----- .../shiny/panels/analysis_panel/__init__.py | 10 +++++++--- dp_wizard/shiny/panels/dataset_panel/__init__.py | 9 ++++++--- dp_wizard/shiny/panels/results_panel/__init__.py | 8 +++++--- dp_wizard/types.py | 5 +++-- dp_wizard/utils/code_generators/__init__.py | 3 +++ .../utils/code_generators/abstract_generator.py | 8 ++++++-- .../utils/code_generators/notebook_generator.py | 8 ++++++-- .../utils/code_generators/script_generator.py | 4 +++- tests/utils/test_code_generators.py | 2 ++ 10 files changed, 52 insertions(+), 21 deletions(-) diff --git a/dp_wizard/shiny/components/summaries.py b/dp_wizard/shiny/components/summaries.py index a9118259..4c532e7a 100644 --- a/dp_wizard/shiny/components/summaries.py +++ b/dp_wizard/shiny/components/summaries.py @@ -8,7 +8,7 @@ product_icon, unit_of_privacy_icon, ) -from dp_wizard.types import AppState +from dp_wizard.types import AppState, Product _css = "display: block; padding: 0 1em 1em 1em;" @@ -47,10 +47,16 @@ def analysis_summary(state: AppState): # pragma: no cover budget = state.epsilon() return tags.small( - columns_icon, - f"Columns: {columns}; ", - groups_icon, - f"Groups: {groups}; ", + ( + [] + if state.product() == Product.CSV_DESCRIPTION + else [ + columns_icon, + f"Columns: {columns}; ", + groups_icon, + f"Groups: {groups}; ", + ] + ), budget_icon, f"Privacy Budget: {budget} epsilon.", style=_css, diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py index 507f3f79..d99de0e3 100644 --- a/dp_wizard/shiny/panels/analysis_panel/__init__.py +++ b/dp_wizard/shiny/panels/analysis_panel/__init__.py @@ -21,7 +21,7 @@ ) from dp_wizard.shiny.components.summaries import dataset_summary from dp_wizard.shiny.panels.analysis_panel.column_module import column_server, column_ui -from dp_wizard.types import AppState +from dp_wizard.types import AppState, Product from dp_wizard.utils.code_generators import make_privacy_loss_block from dp_wizard.utils.csv_helper import ( get_csv_row_count, @@ -152,7 +152,7 @@ def analysis_server( # contributions_entity = state.contributions_entity max_rows = state.max_rows # initial_product = state.initial_product - # product = state.product + product = state.product # Analysis choices: all_column_names = state.all_column_names @@ -174,9 +174,13 @@ def analysis_server( @reactive.calc def button_enabled(): + # TODO: Get this in sync with results panel warning: + # https://github.com/opendp/dp-wizard/issues/562 at_least_one_column = bool(weights()) no_errors = not any(analysis_errors().values()) - return at_least_one_column and no_errors + return ( + at_least_one_column and no_errors + ) or product() == Product.CSV_DESCRIPTION @reactive.effect def _update_columns(): diff --git a/dp_wizard/shiny/panels/dataset_panel/__init__.py b/dp_wizard/shiny/panels/dataset_panel/__init__.py index 23153ed4..5780f8b2 100644 --- a/dp_wizard/shiny/panels/dataset_panel/__init__.py +++ b/dp_wizard/shiny/panels/dataset_panel/__init__.py @@ -585,17 +585,20 @@ def product_ui(): ), tutorial_box( is_tutorial_mode(), - """ + f""" Although the underlying OpenDP library is very flexible, DP Wizard offers only a few analysis options: - - The **DP Statistics** option supports + - The **{Product.STATISTICS}** option supports grouping, histograms, mean, median, and count. - - With **DP Synthetic Data**, your privacy budget is used + - With **{Product.SYNTHETIC_DATA}**, your privacy budget is used to infer the distributions of values within the selected columns, and the correlations between columns. This is less accurate than calculating the desired statistics directly, but can be easier to work with downstream. + - The **{Product.CSV_DESCRIPTION}** summarizes the contents of CSVs + with a large number of columns, without revealing details + from individual rows. """, responsive=False, ), diff --git a/dp_wizard/shiny/panels/results_panel/__init__.py b/dp_wizard/shiny/panels/results_panel/__init__.py index 51a3589d..47ae8f6f 100644 --- a/dp_wizard/shiny/panels/results_panel/__init__.py +++ b/dp_wizard/shiny/panels/results_panel/__init__.py @@ -16,7 +16,7 @@ tutorial_box, ) from dp_wizard.shiny.components.summaries import analysis_summary, dataset_summary -from dp_wizard.types import AppState +from dp_wizard.types import AppState, Product from dp_wizard.utils.code_generators import AnalysisPlan, AnalysisPlanColumn from dp_wizard.utils.code_generators.notebook_generator import ( PLACEHOLDER_CSV_NAME, @@ -135,7 +135,9 @@ def results_server( @render.ui def results_requirements_warning_ui(): return hide_if( - bool(weights()), + # TODO: Get this in sync with analysis_panel validation + # https://github.com/opendp/dp-wizard/issues/562 + bool(weights()) or product() == Product.CSV_DESCRIPTION, info_md_box( """ Please define your analysis on the previous tab @@ -196,7 +198,7 @@ def clean_download_stem() -> str: def download_results_ui(): if in_cloud: return None - disabled = not weights() + disabled = not (weights() or product() == Product.CSV_DESCRIPTION) return [ ui.h3("Download Results"), tutorial_box( diff --git a/dp_wizard/types.py b/dp_wizard/types.py index f982489a..3ba9187d 100644 --- a/dp_wizard/types.py +++ b/dp_wizard/types.py @@ -8,19 +8,20 @@ class Product(Enum): STATISTICS = auto() SYNTHETIC_DATA = auto() + CSV_DESCRIPTION = auto() @classmethod def to_dict(cls) -> dict[str, str]: """ >>> Product.to_dict() - {'1': 'DP Statistics', '2': 'DP Synthetic Data'} + {'1': 'DP Statistics', '2': 'DP Synthetic Data', '3': 'DP CSV Description'} """ return { str(member.value): str(member) for (name, member) in cls.__members__.items() } def __str__(self) -> str: - return "DP " + self.name.replace("_", " ").title() + return "DP " + self.name.replace("_", " ").title().replace("Csv", "CSV") class AnalysisName(str): diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py index 024a9b9d..7da81e49 100644 --- a/dp_wizard/utils/code_generators/__init__.py +++ b/dp_wizard/utils/code_generators/__init__.py @@ -48,6 +48,9 @@ class AnalysisPlan(NamedTuple): columns: dict[ColumnName, list[AnalysisPlanColumn]] def __str__(self) -> str: + if self.product == Product.CSV_DESCRIPTION: + return str(self.product) + def md_list(names) -> str: return ", ".join(f"`{name}`" for name in names) diff --git a/dp_wizard/utils/code_generators/abstract_generator.py b/dp_wizard/utils/code_generators/abstract_generator.py index 9ffc200f..16acd829 100644 --- a/dp_wizard/utils/code_generators/abstract_generator.py +++ b/dp_wizard/utils/code_generators/abstract_generator.py @@ -28,12 +28,14 @@ def __init__(self, analysis_plan: AnalysisPlan, note: str): self.analysis_plan = analysis_plan self.note = note - def _get_synth_or_stats(self) -> str: + def _get_product(self) -> str: match self.analysis_plan.product: case Product.STATISTICS: return "stats" case Product.SYNTHETIC_DATA: return "synth" + case Product.CSV_DESCRIPTION: + return "description" case _: # pragma: no cover raise ValueError(self.analysis_plan.product) @@ -46,6 +48,8 @@ def _get_extra(self) -> str: return "polars" case Product.SYNTHETIC_DATA: return "mbi" + case Product.CSV_DESCRIPTION: + return "polars" case _: # pragma: no cover raise ValueError(self.analysis_plan.product) @@ -53,7 +57,7 @@ def _get_extra(self) -> str: def _get_notebook_or_script(self) -> str: ... # pragma: no cover def _get_root_template(self) -> str: - adj = self._get_synth_or_stats() + adj = self._get_product() noun = self._get_notebook_or_script() return f"{adj}_{noun}" diff --git a/dp_wizard/utils/code_generators/notebook_generator.py b/dp_wizard/utils/code_generators/notebook_generator.py index f7ec5322..605c9f07 100644 --- a/dp_wizard/utils/code_generators/notebook_generator.py +++ b/dp_wizard/utils/code_generators/notebook_generator.py @@ -78,11 +78,13 @@ def template(synthetic_data): ) + "}" ) + case Product.CSV_DESCRIPTION: + outputs_expression = "TODO" case _: # pragma: no cover raise ValueError(self.analysis_plan.product) tmp_path = package_root / "tmp" reports_block = ( - Template(f"{self._get_synth_or_stats()}_reports", root) + Template(f"{self._get_product()}_reports", root) .fill_expressions( OUTPUTS=outputs_expression, COLUMNS={ @@ -114,5 +116,7 @@ def _make_extra_blocks(self): "STATS_QUERIES_BLOCK": self._make_stats_queries(), "STATS_REPORTS_BLOCK": self._make_reports_block(), } + case Product.CSV_DESCRIPTION: + return {} # TODO case _: # pragma: no cover - raise ValueError(self.analysis_plan.product) + raise ValueError(_) diff --git a/dp_wizard/utils/code_generators/script_generator.py b/dp_wizard/utils/code_generators/script_generator.py index b750bd9a..ba05effe 100644 --- a/dp_wizard/utils/code_generators/script_generator.py +++ b/dp_wizard/utils/code_generators/script_generator.py @@ -54,5 +54,7 @@ def _make_extra_blocks(self): "STATS_CONTEXT_BLOCK": self._make_stats_context(), "STATS_QUERIES_BLOCK": self._make_stats_queries(), } + case Product.CSV_DESCRIPTION: + return {} # TODO case _: # pragma: no cover - raise ValueError(self.analysis_plan.product) + raise ValueError(_) diff --git a/tests/utils/test_code_generators.py b/tests/utils/test_code_generators.py index c9d43d17..97f0fa12 100644 --- a/tests/utils/test_code_generators.py +++ b/tests/utils/test_code_generators.py @@ -237,6 +237,8 @@ def test_make_notebook(plan): context_global = "synth_context" case Product.STATISTICS: context_global = "stats_context" + case Product.CSV_DESCRIPTION: + context_global = "description_context" case _: # pragma: no cover raise ValueError(plan.product) assert isinstance(globals[context_global], dp.Context) From 46253a6bdc8950fb9a603c16f9b0c60465b04077 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 31 Oct 2025 11:21:27 -0400 Subject: [PATCH 6/7] css hacks! --- WHAT-WE-LEARNED.md | 4 +++ .../shiny/panels/analysis_panel/__init__.py | 25 +++++++++++++++++++ .../code_generators/notebook_generator.py | 2 +- .../utils/code_generators/script_generator.py | 2 +- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/WHAT-WE-LEARNED.md b/WHAT-WE-LEARNED.md index 2b3a1c77..c6cbd864 100644 --- a/WHAT-WE-LEARNED.md +++ b/WHAT-WE-LEARNED.md @@ -119,3 +119,7 @@ Selectize menus should overflow the containing card. */ .card, .card-body { overflow: visible !important; } ``` + +## Hard to make elements with controls display conditionally. + +I tried moving a `ui.input_selectize` out of the top-level UI function because I needed it to be conditional, but the event that should have updated the list no longer worked: I guess it's not visible if it's not part of the static render? diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py index d99de0e3..6b18c3e2 100644 --- a/dp_wizard/shiny/panels/analysis_panel/__init__.py +++ b/dp_wizard/shiny/panels/analysis_panel/__init__.py @@ -36,6 +36,7 @@ def analysis_ui(): ui.output_ui("analysis_requirements_warning_ui"), ui.output_ui("analysis_release_warning_ui"), ui.output_ui("previous_summary_ui"), + ui.output_ui("conditional_css_ui"), ui.layout_columns( ui.card( ui.card_header(columns_icon, "Columns"), @@ -47,6 +48,7 @@ def analysis_ui(): multiple=True, ), ui.output_ui("columns_selectize_tutorial_ui"), + class_="columns-card", ), ui.card( ui.card_header(groups_icon, "Grouping"), @@ -66,6 +68,7 @@ def analysis_ui(): multiple=True, ), ui.output_ui("groups_selectize_tutorial_ui"), + class_="grouping-card", ), ui.card( ui.card_header(budget_icon, "Privacy Budget"), @@ -83,10 +86,12 @@ def analysis_ui(): log_slider("log_epsilon_slider", 0.1, 10.0), ui.output_ui("epsilon_ui"), ui.output_ui("privacy_loss_python_ui"), + class_="budget-card", ), ui.card( ui.card_header(simulation_icon, "Simulation"), ui.output_ui("simulation_card_ui"), + class_="simulation-card", ), col_widths={ "sm": [12, 12, 12, 12], # 4 rows @@ -239,6 +244,26 @@ def analysis_release_warning_ui(): ), ) + @render.ui + def conditional_css_ui(): + # This is hacky, but other approaches for conditional card display + # didn't work for me. + # - Adding a wrapping element caused the card not to fill the whole height. + # - The selectize lists for columns and groups weren't updating. + # If we can find something better, great! + if product() == Product.CSV_DESCRIPTION: + return ui.tags.style( + """ + .bslib-grid-item:has( + .columns-card, + .grouping-card, + .simulation-card + ) { + display: none; + } + """ + ) + @render.ui def previous_summary_ui(): return dataset_summary(state) diff --git a/dp_wizard/utils/code_generators/notebook_generator.py b/dp_wizard/utils/code_generators/notebook_generator.py index 605c9f07..5ee36b56 100644 --- a/dp_wizard/utils/code_generators/notebook_generator.py +++ b/dp_wizard/utils/code_generators/notebook_generator.py @@ -119,4 +119,4 @@ def _make_extra_blocks(self): case Product.CSV_DESCRIPTION: return {} # TODO case _: # pragma: no cover - raise ValueError(_) + raise ValueError(self.analysis_plan.product) diff --git a/dp_wizard/utils/code_generators/script_generator.py b/dp_wizard/utils/code_generators/script_generator.py index ba05effe..be1b0cb4 100644 --- a/dp_wizard/utils/code_generators/script_generator.py +++ b/dp_wizard/utils/code_generators/script_generator.py @@ -57,4 +57,4 @@ def _make_extra_blocks(self): case Product.CSV_DESCRIPTION: return {} # TODO case _: # pragma: no cover - raise ValueError(_) + raise ValueError(self.analysis_plan.product) From 3fc61fc74e611cf33d8634930013cb726a407ed2 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 31 Oct 2025 13:12:58 -0400 Subject: [PATCH 7/7] checkpoint: start to fill in template --- .../code_generators/abstract_generator.py | 22 +++++++- .../no-tests/_description_notebook.py | 54 +++++++++++++++++++ .../no-tests/_description_script.py | 31 +++++++++++ .../no-tests/_stats_notebook.py | 4 +- .../code_generators/no-tests/_stats_script.py | 2 +- .../no-tests/_synth_notebook.py | 2 +- .../code_generators/notebook_generator.py | 40 +++++++++----- .../utils/code_generators/script_generator.py | 20 +++---- 8 files changed, 148 insertions(+), 27 deletions(-) create mode 100644 dp_wizard/utils/code_generators/no-tests/_description_notebook.py create mode 100644 dp_wizard/utils/code_generators/no-tests/_description_script.py diff --git a/dp_wizard/utils/code_generators/abstract_generator.py b/dp_wizard/utils/code_generators/abstract_generator.py index 16acd829..ff2ec1c0 100644 --- a/dp_wizard/utils/code_generators/abstract_generator.py +++ b/dp_wizard/utils/code_generators/abstract_generator.py @@ -96,7 +96,6 @@ def template(): ) .fill_code_blocks( IMPORTS_BLOCK=Template(template).finish(), - UTILS_BLOCK=(package_root / "utils/shared.py").read_text(), **self._make_extra_blocks(), ) .fill_comment_blocks( @@ -324,6 +323,27 @@ def _make_partial_synth_context(self): ) ) + def _make_partial_description_context(self): + privacy_unit_block = make_privacy_unit_block( + contributions=self.analysis_plan.contributions, + contributions_entity=self.analysis_plan.contributions_entity, + ) + privacy_loss_block = make_privacy_loss_block( + pure=False, + epsilon=self.analysis_plan.epsilon, + max_rows=self.analysis_plan.max_rows, + ) + return ( + Template("description_context", template_root) + .fill_expressions( + OPENDP_V_VERSION=f"v{opendp_version}", + ) + .fill_code_blocks( + PRIVACY_UNIT_BLOCK=privacy_unit_block, + PRIVACY_LOSS_BLOCK=privacy_loss_block, + ) + ) + def _make_synth_query(self): def template(synth_context, COLUMNS, CUTS): synth_query = ( diff --git a/dp_wizard/utils/code_generators/no-tests/_description_notebook.py b/dp_wizard/utils/code_generators/no-tests/_description_notebook.py new file mode 100644 index 00000000..00a85a29 --- /dev/null +++ b/dp_wizard/utils/code_generators/no-tests/_description_notebook.py @@ -0,0 +1,54 @@ +# # TITLE +# +# CUSTOM_NOTE +# +# Jump ahead: +# - [Analysis](#Analysis) +# - [Results](#Results) +# +# ## Prerequisites +# +# First install and import the required dependencies: +# WINDOWS_COMMENT_BLOCK + +# + +# %pip install DEPENDENCIES +# - + +# + +IMPORTS_BLOCK +# - + +# ## Analysis +# +# For each column numeric column we'll create a Polars expression +# for a histogram that spans orders of magnitude. + +DESCRIPTION_COLUMNS_BLOCK + +# ### Context +# +# Next, we'll define our Context. This is where we set the privacy budget, +# and set the weight for each query under that overall budget. + +# + +DESCRIPTION_CONTEXT_BLOCK +# - + +# ENCODING_COMMENT_BLOCK +# +# ## Results +# +# Finally, we run the queries. + +DESCRIPTION_QUERIES_BLOCK + +# If we try to run more queries at this point, it will error. Once the privacy budget +# is consumed, the library prevents you from running any more queries. + +# # Coda +# The code below produces a summary report. + +# + +REPORTS_BLOCK +# - diff --git a/dp_wizard/utils/code_generators/no-tests/_description_script.py b/dp_wizard/utils/code_generators/no-tests/_description_script.py new file mode 100644 index 00000000..784128a8 --- /dev/null +++ b/dp_wizard/utils/code_generators/no-tests/_description_script.py @@ -0,0 +1,31 @@ +# TITLE +# +# CUSTOM_NOTE + +# Install the following dependencies, if you haven't already: +# WINDOWS_COMMENT_BLOCK +# +# $ pip install DEPENDENCIES + +from argparse import ArgumentParser + +IMPORTS_BLOCK + +DESCRIPTION_COLUMNS_BLOCK + + +def get_stats_context_contributions(csv_path): + DESCRIPTION_CONTEXT_BLOCK + # ENCODING_COMMENT_BLOCK + return stats_context, contributions + + +if __name__ == "__main__": + parser = ArgumentParser(description="Describes the columns of a csv") + parser.add_argument( + "--csv", required=True, help="Path to csv containing private data" + ) + args = parser.parse_args() + stats_context, contributions = get_stats_context_contributions(csv_path=args.csv) + + DESCRIPTION_QUERIES_BLOCK diff --git a/dp_wizard/utils/code_generators/no-tests/_stats_notebook.py b/dp_wizard/utils/code_generators/no-tests/_stats_notebook.py index 144af0b6..d5669044 100644 --- a/dp_wizard/utils/code_generators/no-tests/_stats_notebook.py +++ b/dp_wizard/utils/code_generators/no-tests/_stats_notebook.py @@ -30,7 +30,7 @@ # Based on the input you provided, for each column we'll create a Polars expression # that describes how we want to summarize that column. -COLUMNS_BLOCK +STATS_COLUMNS_BLOCK # ### Context # @@ -56,5 +56,5 @@ # The code below produces a summary report. # + -STATS_REPORTS_BLOCK +REPORTS_BLOCK # - diff --git a/dp_wizard/utils/code_generators/no-tests/_stats_script.py b/dp_wizard/utils/code_generators/no-tests/_stats_script.py index 4a910972..07a4bd97 100644 --- a/dp_wizard/utils/code_generators/no-tests/_stats_script.py +++ b/dp_wizard/utils/code_generators/no-tests/_stats_script.py @@ -13,7 +13,7 @@ UTILS_BLOCK -COLUMNS_BLOCK +STATS_COLUMNS_BLOCK def get_stats_context_contributions(csv_path): diff --git a/dp_wizard/utils/code_generators/no-tests/_synth_notebook.py b/dp_wizard/utils/code_generators/no-tests/_synth_notebook.py index 73b06a55..64355aa8 100644 --- a/dp_wizard/utils/code_generators/no-tests/_synth_notebook.py +++ b/dp_wizard/utils/code_generators/no-tests/_synth_notebook.py @@ -58,5 +58,5 @@ # The code below produces a summary report. # + -SYNTH_REPORTS_BLOCK +REPORTS_BLOCK # - diff --git a/dp_wizard/utils/code_generators/notebook_generator.py b/dp_wizard/utils/code_generators/notebook_generator.py index 5ee36b56..83140edf 100644 --- a/dp_wizard/utils/code_generators/notebook_generator.py +++ b/dp_wizard/utils/code_generators/notebook_generator.py @@ -102,21 +102,37 @@ def template(synthetic_data): return reports_block def _make_extra_blocks(self): + report_blocks = { + "REPORTS_BLOCK": self._make_reports_block(), + } + utils_blocks = { + "UTILS_BLOCK": (package_root / "utils/shared.py").read_text(), + } match self.analysis_plan.product: case Product.SYNTHETIC_DATA: - return { - "SYNTH_CONTEXT_BLOCK": self._make_synth_context(), - "SYNTH_QUERY_BLOCK": self._make_synth_query(), - "SYNTH_REPORTS_BLOCK": self._make_reports_block(), - } + return ( + report_blocks + | utils_blocks + | { + "SYNTH_CONTEXT_BLOCK": self._make_synth_context(), + "SYNTH_QUERY_BLOCK": self._make_synth_query(), + } + ) case Product.STATISTICS: - return { - "COLUMNS_BLOCK": self._make_columns(), - "STATS_CONTEXT_BLOCK": self._make_stats_context(), - "STATS_QUERIES_BLOCK": self._make_stats_queries(), - "STATS_REPORTS_BLOCK": self._make_reports_block(), - } + return ( + report_blocks + | utils_blocks + | { + "STATS_COLUMNS_BLOCK": self._make_columns(), + "STATS_CONTEXT_BLOCK": self._make_stats_context(), + "STATS_QUERIES_BLOCK": self._make_stats_queries(), + } + ) case Product.CSV_DESCRIPTION: - return {} # TODO + # Doesn't need the shared utils + return report_blocks | { + "DESCRIPTION_CONTEXT_BLOCK": self._make_stats_context(), + "DESCRIPTION_QUERIES_BLOCK": self._make_stats_queries(), + } case _: # pragma: no cover raise ValueError(self.analysis_plan.product) diff --git a/dp_wizard/utils/code_generators/script_generator.py b/dp_wizard/utils/code_generators/script_generator.py index be1b0cb4..92c680bc 100644 --- a/dp_wizard/utils/code_generators/script_generator.py +++ b/dp_wizard/utils/code_generators/script_generator.py @@ -21,17 +21,17 @@ def _make_columns(self): ) def _make_stats_context(self): - return ( - self._make_partial_stats_context() - .fill_expressions(CSV_PATH="csv_path") - .fill_code_blocks(OPTIONAL_CSV_BLOCK="") - .finish() - ) + return self._fill_partial_context(self._make_partial_stats_context()) def _make_synth_context(self): + return self._fill_partial_context(self._make_partial_synth_context()) + + def _make_description_context(self): + return self._fill_partial_context(self._make_partial_description_context()) + + def _fill_partial_context(self, partial_context): return ( - self._make_partial_synth_context() - .fill_expressions(CSV_PATH="csv_path") + partial_context.fill_expressions(CSV_PATH="csv_path") .fill_code_blocks(OPTIONAL_CSV_BLOCK="") .finish() ) @@ -50,11 +50,11 @@ def _make_extra_blocks(self): } case Product.STATISTICS: return { - "COLUMNS_BLOCK": self._make_columns(), + "STATS_COLUMNS_BLOCK": self._make_columns(), "STATS_CONTEXT_BLOCK": self._make_stats_context(), "STATS_QUERIES_BLOCK": self._make_stats_queries(), } case Product.CSV_DESCRIPTION: - return {} # TODO + return {} case _: # pragma: no cover raise ValueError(self.analysis_plan.product)