opendp · mccalluc · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/dp_wizard/shiny/components/summaries.py b/dp_wizard/shiny/components/summaries.py
@@ -8,7 +8,7 @@
     product_icon,
     unit_of_privacy_icon,
 )
-from dp_wizard.types import AppState
+from dp_wizard.types import AppState, Product
 
 _css = "display: block; padding: 0 1em 1em 1em;"
 
@@ -47,10 +47,16 @@ def analysis_summary(state: AppState):  # pragma: no cover
     budget = state.epsilon()
 
     return tags.small(
-        columns_icon,
-        f"Columns: {columns}; ",
-        groups_icon,
-        f"Groups: {groups}; ",
+        (
+            []
+            if state.product() == Product.CSV_DESCRIPTION
+            else [
+                columns_icon,
+                f"Columns: {columns}; ",
+                groups_icon,
+                f"Groups: {groups}; ",
+            ]
+        ),
         budget_icon,
         f"Privacy Budget: {budget} epsilon.",
         style=_css,

diff --git a/dp_wizard/shiny/panels/analysis_panel/__init__.py b/dp_wizard/shiny/panels/analysis_panel/__init__.py
@@ -21,7 +21,7 @@
 )
 from dp_wizard.shiny.components.summaries import dataset_summary
 from dp_wizard.shiny.panels.analysis_panel.column_module import column_server, column_ui
-from dp_wizard.types import AppState
+from dp_wizard.types import AppState, Product
 from dp_wizard.utils.code_generators import make_privacy_loss_block
 from dp_wizard.utils.csv_helper import (
     get_csv_row_count,
@@ -36,64 +36,7 @@ def analysis_ui():
         ui.output_ui("analysis_requirements_warning_ui"),
         ui.output_ui("analysis_release_warning_ui"),
         ui.output_ui("previous_summary_ui"),
-        ui.layout_columns(
-            ui.card(
-                ui.card_header(columns_icon, "Columns"),
-                ui.markdown("Select numeric columns to calculate statistics on."),
-                ui.input_selectize(
-                    "columns_selectize",
-                    "Columns",
-                    [],
-                    multiple=True,
-                ),
-                ui.output_ui("columns_selectize_tutorial_ui"),
-            ),
-            ui.card(
-                ui.card_header(groups_icon, "Grouping"),
-                ui.markdown(
-                    """
-                    Select columns to group by, or leave empty
-                    to calculate statistics across the entire dataset.
-
-                    Groups aren't applied to the previews on this page
-                    but will be used in the final release.
-                    """
-                ),
-                ui.input_selectize(
-                    "groups_selectize",
-                    "Group by",
-                    [],
-                    multiple=True,
-                ),
-                ui.output_ui("groups_selectize_tutorial_ui"),
-            ),
-            ui.card(
-                ui.card_header(budget_icon, "Privacy Budget"),
-                ui.markdown(
-                    f"""
-                    What is your privacy budget, or epsilon, for this release?
-                    Many factors including the sensitivity of your data,
-                    the frequency of DP releases,
-                    and the regulatory landscape can be considered.
-                    Consider how your budget compares to that of
-                    <a href="{registry_url}"
-                       target="_blank">other projects</a>.
-                    """
-                ),
-                log_slider("log_epsilon_slider", 0.1, 10.0),
-                ui.output_ui("epsilon_ui"),
-                ui.output_ui("privacy_loss_python_ui"),
-            ),
-            ui.card(
-                ui.card_header(simulation_icon, "Simulation"),
-                ui.output_ui("simulation_card_ui"),
-            ),
-            col_widths={
-                "sm": [12, 12, 12, 12],  # 4 rows
-                "md": [6, 6, 6, 6],  # 2 rows
-                "xxl": [3, 3, 3, 3],  # 1 row
-            },
-        ),
+        ui.output_ui("top_cards_ui"),
         ui.output_ui("columns_ui"),
         ui.output_ui("download_results_button_ui"),
         value="analysis_panel",
@@ -152,7 +95,7 @@ def analysis_server(
     # contributions_entity = state.contributions_entity
     max_rows = state.max_rows
     # initial_product = state.initial_product
-    # product = state.product
+    product = state.product
 
     # Analysis choices:
     all_column_names = state.all_column_names
@@ -174,9 +117,13 @@ def analysis_server(
 
     @reactive.calc
     def button_enabled():
+        # TODO: Get this in sync with results panel warning:
+        # https://github.com/opendp/dp-wizard/issues/562
         at_least_one_column = bool(weights())
         no_errors = not any(analysis_errors().values())
-        return at_least_one_column and no_errors
+        return (
+            at_least_one_column and no_errors
+        ) or product() == Product.CSV_DESCRIPTION
 
     @reactive.effect
     def _update_columns():
@@ -239,6 +186,89 @@ def analysis_release_warning_ui():
     def previous_summary_ui():
         return dataset_summary(state)
 
+    @render.ui
+    def top_cards_ui():
+        columns_card = (
+            ui.card(
+                ui.card_header(columns_icon, "Columns"),
+                ui.markdown("Select numeric columns to calculate statistics on."),
+                ui.input_selectize(
+                    "columns_selectize",
+                    "Columns",
+                    [],
+                    multiple=True,
+                ),
+                ui.output_ui("columns_selectize_tutorial_ui"),
+            ),
+        )
+        grouping_card = (
+            ui.card(
+                ui.card_header(groups_icon, "Grouping"),
+                ui.markdown(
+                    """
+                    Select columns to group by, or leave empty
+                    to calculate statistics across the entire dataset.
+
+                    Groups aren't applied to the previews on this page
+                    but will be used in the final release.
+                    """
+                ),
+                ui.input_selectize(
+                    "groups_selectize",
+                    "Group by",
+                    [],
+                    multiple=True,
+                ),
+                ui.output_ui("groups_selectize_tutorial_ui"),
+            ),
+        )
+        budget_card = (
+            ui.card(
+                ui.card_header(budget_icon, "Privacy Budget"),
+                ui.markdown(
+                    f"""
+                    What is your privacy budget, or epsilon, for this release?
+                    Many factors including the sensitivity of your data,
+                    the frequency of DP releases,
+                    and the regulatory landscape can be considered.
+                    Consider how your budget compares to that of
+                    <a href="{registry_url}"
+                       target="_blank">other projects</a>.
+                    """
+                ),
+                log_slider("log_epsilon_slider", 0.1, 10.0),
+                ui.output_ui("epsilon_ui"),
+                ui.output_ui("privacy_loss_python_ui"),
+            ),
+        )
+        simulation_card = (
+            ui.card(
+                ui.card_header(simulation_icon, "Simulation"),
+                ui.output_ui("simulation_card_ui"),
+            ),
+        )
+
+        if product() == Product.CSV_DESCRIPTION:
+            return (
+                ui.layout_columns(
+                    budget_card,
+                    col_widths={"md": [12], "lg": [6]},
+                ),
+            )
+        return (
+            ui.layout_columns(
+                columns_card,
+                grouping_card,
+                budget_card,
+                simulation_card,
+                col_widths={
+                    "sm": [12, 12, 12, 12],  # 4 rows
+                    "md": [6, 6, 6, 6],  # 2 rows
+                    "xxl": [3, 3, 3, 3],  # 1 row
+                },
+            ),
+        )
+
     @reactive.effect
     @reactive.event(input.columns_selectize)
     def _on_columns_change():

diff --git a/dp_wizard/shiny/panels/dataset_panel/__init__.py b/dp_wizard/shiny/panels/dataset_panel/__init__.py
@@ -585,17 +585,20 @@ def product_ui():
             ),
             tutorial_box(
                 is_tutorial_mode(),
-                """
+                f"""
                 Although the underlying OpenDP library is very flexible,
-                DP Wizard offers only a few analysis options:
+                DP Wizard offers a few analysis options to help you get started:
 
-                - The **DP Statistics** option supports
+                - The **{Product.STATISTICS}** option supports
                   grouping, histograms, mean, median, and count.
-                - With **DP Synthetic Data**, your privacy budget is used
+                - With **{Product.SYNTHETIC_DATA}**, your privacy budget is used
                   to infer the distributions of values within the
                   selected columns, and the correlations between columns.
                   This is less accurate than calculating the desired
                   statistics directly, but can be easier to work with downstream.
+                - The **{Product.CSV_DESCRIPTION}** summarizes the contents of CSVs
+                  with a large number of columns, without revealing details
+                  from individual rows.
                 """,
                 responsive=False,
             ),

diff --git a/dp_wizard/shiny/panels/results_panel/__init__.py b/dp_wizard/shiny/panels/results_panel/__init__.py
@@ -16,7 +16,7 @@
     tutorial_box,
 )
 from dp_wizard.shiny.components.summaries import analysis_summary, dataset_summary
-from dp_wizard.types import AppState
+from dp_wizard.types import AppState, Product
 from dp_wizard.utils.code_generators import AnalysisPlan, AnalysisPlanColumn
 from dp_wizard.utils.code_generators.notebook_generator import (
     PLACEHOLDER_CSV_NAME,
@@ -135,7 +135,9 @@ def results_server(
     @render.ui
     def results_requirements_warning_ui():
         return hide_if(
-            bool(weights()),
+            # TODO: Get this in sync with analysis_panel validation
+            # https://github.com/opendp/dp-wizard/issues/562
+            bool(weights()) or product() == Product.CSV_DESCRIPTION,
             info_md_box(
                 """
                 Please define your analysis on the previous tab
@@ -196,7 +198,7 @@ def clean_download_stem() -> str:
     def download_results_ui():
         if in_cloud:
             return None
-        disabled = not weights()
+        disabled = not (weights() or product() == Product.CSV_DESCRIPTION)
         return [
             ui.h3("Download Results"),
             tutorial_box(

diff --git a/dp_wizard/types.py b/dp_wizard/types.py
@@ -8,19 +8,20 @@
 class Product(Enum):
     STATISTICS = auto()
     SYNTHETIC_DATA = auto()
+    CSV_DESCRIPTION = auto()
 
     @classmethod
     def to_dict(cls) -> dict[str, str]:
         """
         >>> Product.to_dict()
-        {'1': 'DP Statistics', '2': 'DP Synthetic Data'}
+        {'1': 'DP Statistics', '2': 'DP Synthetic Data', '3': 'DP Codebook'}
         """
         return {
             str(member.value): str(member) for (name, member) in cls.__members__.items()
         }
 
     def __str__(self) -> str:
-        return "DP " + self.name.replace("_", " ").title()
+        return "DP " + self.name.replace("_", " ").title().replace("Csv", "CSV")
 
 
 class AnalysisName(str):

diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py
@@ -34,7 +34,7 @@ class AnalysisPlan(NamedTuple):
     >>> print(plan.to_stem())
     dp_statistics_for_data_col_grouped_by_grouping_col
     >>> print(plan.to_note())
-    This demonstrates how to calculate ...
+    This demonstrates how to create ...
     Generated by DP Wizard ...
     """
 
@@ -48,6 +48,9 @@ class AnalysisPlan(NamedTuple):
     columns: dict[ColumnName, list[AnalysisPlanColumn]]
 
     def __str__(self) -> str:
+        if self.product == Product.CSV_DESCRIPTION:
+            return str(self.product)
+
         def md_list(names) -> str:
             return ", ".join(f"`{name}`" for name in names)
 
@@ -62,7 +65,7 @@ def to_stem(self) -> str:
     def to_note(self) -> str:
         now = datetime.now().strftime("%b %d, %Y at %I:%M%p")
         return f"""
-This demonstrates how to calculate {self} using OpenDP (https://docs.opendp.org).
+This demonstrates how to create {self} using OpenDP (https://docs.opendp.org).
 Generated by DP Wizard v{__version__} (https://github.com/opendp/dp-wizard) on {now}.
         """.strip()
 

diff --git a/dp_wizard/utils/code_generators/abstract_generator.py b/dp_wizard/utils/code_generators/abstract_generator.py
@@ -28,12 +28,14 @@ def __init__(self, analysis_plan: AnalysisPlan, note: str):
         self.analysis_plan = analysis_plan
         self.note = note
 
-    def _get_synth_or_stats(self) -> str:
+    def _get_product(self) -> str:
         match self.analysis_plan.product:
             case Product.STATISTICS:
                 return "stats"
             case Product.SYNTHETIC_DATA:
                 return "synth"
+            case Product.CSV_DESCRIPTION:
+                return "codebook"
             case _:  # pragma: no cover
                 raise ValueError(self.analysis_plan.product)
 
@@ -46,14 +48,16 @@ def _get_extra(self) -> str:
                 return "polars"
             case Product.SYNTHETIC_DATA:
                 return "mbi"
+            case Product.CSV_DESCRIPTION:
+                return "polars"
             case _:  # pragma: no cover
                 raise ValueError(self.analysis_plan.product)
 
     @abstractmethod
     def _get_notebook_or_script(self) -> str: ...  # pragma: no cover
 
     def _get_root_template(self) -> str:
-        adj = self._get_synth_or_stats()
+        adj = self._get_product()
         noun = self._get_notebook_or_script()
         return f"{adj}_{noun}"
 

diff --git a/dp_wizard/utils/code_generators/notebook_generator.py b/dp_wizard/utils/code_generators/notebook_generator.py
@@ -78,11 +78,13 @@ def template(synthetic_data):
                     )
                     + "}"
                 )
+            case Product.CSV_DESCRIPTION:
+                outputs_expression = "TODO"
             case _:  # pragma: no cover
                 raise ValueError(self.analysis_plan.product)
         tmp_path = package_root / "tmp"
         reports_block = (
-            Template(f"{self._get_synth_or_stats()}_reports", root)
+            Template(f"{self._get_product()}_reports", root)
             .fill_expressions(
                 OUTPUTS=outputs_expression,
                 COLUMNS={
@@ -114,5 +116,7 @@ def _make_extra_blocks(self):
                     "STATS_QUERIES_BLOCK": self._make_stats_queries(),
                     "STATS_REPORTS_BLOCK": self._make_reports_block(),
                 }
+            case Product.CSV_DESCRIPTION:
+                return {}  # TODO
             case _:  # pragma: no cover
                 raise ValueError(self.analysis_plan.product)
diff --git a/dp_wizard/utils/code_generators/script_generator.py b/dp_wizard/utils/code_generators/script_generator.py
@@ -54,5 +54,7 @@ def _make_extra_blocks(self):
                     "STATS_CONTEXT_BLOCK": self._make_stats_context(),
                     "STATS_QUERIES_BLOCK": self._make_stats_queries(),
                 }
+            case Product.CSV_DESCRIPTION:
+                return {}  # TODO
             case _:  # pragma: no cover
                 raise ValueError(self.analysis_plan.product)
diff --git a/tests/utils/test_code_generators.py b/tests/utils/test_code_generators.py
@@ -237,6 +237,8 @@ def test_make_notebook(plan):
             context_global = "synth_context"
         case Product.STATISTICS:
             context_global = "stats_context"
+        case Product.CSV_DESCRIPTION:
+            context_global = "codebook_context"
         case _:  # pragma: no cover
             raise ValueError(plan.product)
     assert isinstance(globals[context_global], dp.Context)