opendp · mccalluc · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 9, 2026
diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py
@@ -322,45 +322,35 @@ def simulation_card_ui():
                 responsive=False,
             ),
         )
+        choices = ["100", "1000", "10000"]
         if public_path():
             row_count_str = str(get_csv_row_count(Path(public_path())))
-            return [
-                ui.markdown(
-                    f"""
-                    Because you've provided public data,
-                    it *will be read* to generate previews.
-
-                    The confidence interval depends on the number of rows.
-                    Your public data has {row_count_str} rows,
-                    but if you believe the private data will be
-                    much larger or smaller, please update.
-                    """
-                ),
-                ui.input_select(
-                    "row_count",
-                    "Estimated Rows",
-                    choices=[row_count_str, "100", "1000", "10000"],
-                    selected=row_count_str,
-                ),
-                help,
-            ]
+            choices.insert(0, row_count_str)
+            message = f"""
+                Because you've provided public data,
+                it *will be read* to generate previews.
+
+                The confidence interval depends on the number of rows.
+                Your public data has {row_count_str} rows,
+                but if you believe the private data will be
+                much larger or smaller, please update.
+            """
         else:
-            return [
-                ui.markdown(
-                    """
-                    What is the approximate number of rows in the dataset?
-                    This number is only used for the simulation
-                    and not the final calculation.
-                    """
-                ),
-                ui.input_select(
-                    "row_count",
-                    "Estimated Rows",
-                    choices=["100", "1000", "10000"],
-                    selected="100",
-                ),
-                help,
-            ]
+            message = """
+                What is the approximate number of rows in the dataset?
+                This number is only used for the simulation
+                and not the final calculation.
+            """
+        return [
+            ui.markdown(message),
+            ui.input_select(
+                "row_count",
+                "Estimated Rows",
+                choices=choices,
+                selected=choices[0],
+            ),
+            help,
+        ]
 
     @render.ui
     def columns_ui():

diff --git a/dp_wizard/shiny/panels/analysis_panel/column_module.py b/dp_wizard/shiny/panels/analysis_panel/column_module.py
@@ -213,6 +213,9 @@ def accuracy_histogram():
         # so not worth optimizing.
         lf = (
             pl.scan_csv(public_path, ignore_errors=True)
+            .collect()
+            .sample(n=row_count, with_replacement=True)
+            .lazy()
             if public_path
             else pl.LazyFrame(
                 mock_data({name: ColumnDef(lower_x, upper_x)}, row_count=row_count)
@@ -221,7 +224,7 @@ def accuracy_histogram():
         return make_accuracy_histogram(
             lf=lf,
             column_name=name,
-            row_count=row_count,
+            max_length=row_count,
             lower_bound=lower_x,
             upper_bound=upper_x,
             bin_count=bin_count,

diff --git a/dp_wizard/utils/dp_helper.py b/dp_wizard/utils/dp_helper.py
@@ -12,7 +12,7 @@
 def make_accuracy_histogram(
     lf: pl.LazyFrame,
     column_name: str,
-    row_count: int,
+    max_length: int,
     lower_bound: float,
     upper_bound: float,
     bin_count: int,
@@ -33,7 +33,7 @@ def make_accuracy_histogram(
     >>> accuracy, histogram = make_accuracy_histogram(
     ...     lf=pl.LazyFrame(df),
     ...     column_name=column_name,
-    ...     row_count=100,
+    ...     max_length=100,
     ...     lower_bound=0, upper_bound=10,
     ...     bin_count=5,
     ...     contributions=1,
@@ -76,7 +76,7 @@ def make_accuracy_histogram(
         margins=[
             dp.polars.Margin(  # type: ignore
                 by=["bin"],
-                max_length=row_count,
+                max_length=max_length,
                 # Range bins names are not private information: This is safe.
                 invariant="keys",
             ),