From 7273a2d1602a1edec8763853ead4f0b308d0c5cd Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 5 Mar 2026 14:33:17 -0500 Subject: [PATCH 1/3] keep defaults in one place --- dp_wizard/shiny/panels/analysis_panel/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py index 63f965f5..b55ec38b 100644 --- a/dp_wizard/shiny/panels/analysis_panel/__init__.py +++ b/dp_wizard/shiny/panels/analysis_panel/__init__.py @@ -322,6 +322,7 @@ def simulation_card_ui(): responsive=False, ), ) + default_choices = ["100", "1000", "10000"] if public_path(): row_count_str = str(get_csv_row_count(Path(public_path()))) return [ @@ -339,7 +340,7 @@ def simulation_card_ui(): ui.input_select( "row_count", "Estimated Rows", - choices=[row_count_str, "100", "1000", "10000"], + choices=[row_count_str] + default_choices, selected=row_count_str, ), help, @@ -356,8 +357,8 @@ def simulation_card_ui(): ui.input_select( "row_count", "Estimated Rows", - choices=["100", "1000", "10000"], - selected="100", + choices=default_choices, + selected=default_choices[0], ), help, ] From 686dd31e8ad0831dbf11ea6af71e6a7e870859c9 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 5 Mar 2026 14:50:57 -0500 Subject: [PATCH 2/3] factor ui out of if-then --- .../shiny/panels/analysis_panel/__init__.py | 63 ++++++++----------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py index b55ec38b..868d5372 100644 --- a/dp_wizard/shiny/panels/analysis_panel/__init__.py +++ b/dp_wizard/shiny/panels/analysis_panel/__init__.py @@ -322,46 +322,35 @@ def simulation_card_ui(): responsive=False, ), ) - default_choices = ["100", "1000", "10000"] + choices = ["100", "1000", "10000"] if public_path(): row_count_str = str(get_csv_row_count(Path(public_path()))) - return [ - ui.markdown( - f""" - Because you've provided public data, - it *will be read* to generate previews. - - The confidence interval depends on the number of rows. - Your public data has {row_count_str} rows, - but if you believe the private data will be - much larger or smaller, please update. - """ - ), - ui.input_select( - "row_count", - "Estimated Rows", - choices=[row_count_str] + default_choices, - selected=row_count_str, - ), - help, - ] + choices.insert(0, row_count_str) + message = f""" + Because you've provided public data, + it *will be read* to generate previews. + + The confidence interval depends on the number of rows. + Your public data has {row_count_str} rows, + but if you believe the private data will be + much larger or smaller, please update. + """ else: - return [ - ui.markdown( - """ - What is the approximate number of rows in the dataset? - This number is only used for the simulation - and not the final calculation. - """ - ), - ui.input_select( - "row_count", - "Estimated Rows", - choices=default_choices, - selected=default_choices[0], - ), - help, - ] + message = """ + What is the approximate number of rows in the dataset? + This number is only used for the simulation + and not the final calculation. + """ + return [ + ui.markdown(message), + ui.input_select( + "row_count", + "Estimated Rows", + choices=choices, + selected=choices[0], + ), + help, + ] @render.ui def columns_ui(): From 2e20116b92ca0ce5d722416ec5133d5b2ad59f4d Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 5 Mar 2026 15:35:49 -0500 Subject: [PATCH 3/3] sample from public data --- dp_wizard/shiny/panels/analysis_panel/column_module.py | 5 ++++- dp_wizard/utils/dp_helper.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dp_wizard/shiny/panels/analysis_panel/column_module.py b/dp_wizard/shiny/panels/analysis_panel/column_module.py index 0824858a..7fbb2cee 100644 --- a/dp_wizard/shiny/panels/analysis_panel/column_module.py +++ b/dp_wizard/shiny/panels/analysis_panel/column_module.py @@ -214,6 +214,9 @@ def accuracy_histogram(): # so not worth optimizing. lf = ( pl.scan_csv(public_path, ignore_errors=True) + .collect() + .sample(n=row_count, with_replacement=True) + .lazy() if public_path else pl.LazyFrame( mock_data({name: ColumnDef(lower_x, upper_x)}, row_count=row_count) @@ -222,7 +225,7 @@ def accuracy_histogram(): return make_accuracy_histogram( lf=lf, column_name=name, - row_count=row_count, + max_length=row_count, lower_bound=lower_x, upper_bound=upper_x, bin_count=bin_count, diff --git a/dp_wizard/utils/dp_helper.py b/dp_wizard/utils/dp_helper.py index f46daddb..a41bcdf3 100644 --- a/dp_wizard/utils/dp_helper.py +++ b/dp_wizard/utils/dp_helper.py @@ -12,7 +12,7 @@ def make_accuracy_histogram( lf: pl.LazyFrame, column_name: str, - row_count: int, + max_length: int, lower_bound: float, upper_bound: float, bin_count: int, @@ -33,7 +33,7 @@ def make_accuracy_histogram( >>> accuracy, histogram = make_accuracy_histogram( ... lf=pl.LazyFrame(df), ... column_name=column_name, - ... row_count=100, + ... max_length=100, ... lower_bound=0, upper_bound=10, ... bin_count=5, ... contributions=1, @@ -76,7 +76,7 @@ def make_accuracy_histogram( margins=[ dp.polars.Margin( # type: ignore by=["bin"], - max_length=row_count, + max_length=max_length, invariant="keys", ), ],