Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions dp_wizard/shiny/components/summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
product_icon,
unit_of_privacy_icon,
)
from dp_wizard.types import AppState
from dp_wizard.types import AppState, Product

_css = "display: block; padding: 0 1em 1em 1em;"

Expand Down Expand Up @@ -47,10 +47,16 @@ def analysis_summary(state: AppState): # pragma: no cover
budget = state.epsilon()

return tags.small(
columns_icon,
f"Columns: {columns}; ",
groups_icon,
f"Groups: {groups}; ",
(
[]
if state.product() == Product.CSV_DESCRIPTION
else [
columns_icon,
f"Columns: {columns}; ",
groups_icon,
f"Groups: {groups}; ",
]
),
budget_icon,
f"Privacy Budget: {budget} epsilon.",
style=_css,
Expand Down
152 changes: 91 additions & 61 deletions dp_wizard/shiny/panels/analysis_panel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from dp_wizard.shiny.components.summaries import dataset_summary
from dp_wizard.shiny.panels.analysis_panel.column_module import column_server, column_ui
from dp_wizard.types import AppState
from dp_wizard.types import AppState, Product
from dp_wizard.utils.code_generators import make_privacy_loss_block
from dp_wizard.utils.csv_helper import (
get_csv_row_count,
Expand All @@ -36,64 +36,7 @@ def analysis_ui():
ui.output_ui("analysis_requirements_warning_ui"),
ui.output_ui("analysis_release_warning_ui"),
ui.output_ui("previous_summary_ui"),
ui.layout_columns(
ui.card(
ui.card_header(columns_icon, "Columns"),
ui.markdown("Select numeric columns to calculate statistics on."),
ui.input_selectize(
"columns_selectize",
"Columns",
[],
multiple=True,
),
ui.output_ui("columns_selectize_tutorial_ui"),
),
ui.card(
ui.card_header(groups_icon, "Grouping"),
ui.markdown(
"""
Select columns to group by, or leave empty
to calculate statistics across the entire dataset.

Groups aren't applied to the previews on this page
but will be used in the final release.
"""
),
ui.input_selectize(
"groups_selectize",
"Group by",
[],
multiple=True,
),
ui.output_ui("groups_selectize_tutorial_ui"),
),
ui.card(
ui.card_header(budget_icon, "Privacy Budget"),
ui.markdown(
f"""
What is your privacy budget, or epsilon, for this release?
Many factors including the sensitivity of your data,
the frequency of DP releases,
and the regulatory landscape can be considered.
Consider how your budget compares to that of
<a href="{registry_url}"
target="_blank">other projects</a>.
"""
),
log_slider("log_epsilon_slider", 0.1, 10.0),
ui.output_ui("epsilon_ui"),
ui.output_ui("privacy_loss_python_ui"),
),
ui.card(
ui.card_header(simulation_icon, "Simulation"),
ui.output_ui("simulation_card_ui"),
),
col_widths={
"sm": [12, 12, 12, 12], # 4 rows
"md": [6, 6, 6, 6], # 2 rows
"xxl": [3, 3, 3, 3], # 1 row
},
),
ui.output_ui("top_cards_ui"),
ui.output_ui("columns_ui"),
ui.output_ui("download_results_button_ui"),
value="analysis_panel",
Expand Down Expand Up @@ -152,7 +95,7 @@ def analysis_server(
# contributions_entity = state.contributions_entity
max_rows = state.max_rows
# initial_product = state.initial_product
# product = state.product
product = state.product

# Analysis choices:
all_column_names = state.all_column_names
Expand All @@ -174,9 +117,13 @@ def analysis_server(

@reactive.calc
def button_enabled():
# TODO: Get this in sync with results panel warning:
# https://github.com/opendp/dp-wizard/issues/562
at_least_one_column = bool(weights())
no_errors = not any(analysis_errors().values())
return at_least_one_column and no_errors
return (
at_least_one_column and no_errors
) or product() == Product.CSV_DESCRIPTION

@reactive.effect
def _update_columns():
Expand Down Expand Up @@ -239,6 +186,89 @@ def analysis_release_warning_ui():
def previous_summary_ui():
return dataset_summary(state)

@render.ui
def top_cards_ui():
columns_card = (
ui.card(
ui.card_header(columns_icon, "Columns"),
ui.markdown("Select numeric columns to calculate statistics on."),
ui.input_selectize(
"columns_selectize",
"Columns",
[],
multiple=True,
),
ui.output_ui("columns_selectize_tutorial_ui"),
),
)
grouping_card = (
ui.card(
ui.card_header(groups_icon, "Grouping"),
ui.markdown(
"""
Select columns to group by, or leave empty
to calculate statistics across the entire dataset.

Groups aren't applied to the previews on this page
but will be used in the final release.
"""
),
ui.input_selectize(
"groups_selectize",
"Group by",
[],
multiple=True,
),
ui.output_ui("groups_selectize_tutorial_ui"),
),
)
budget_card = (
ui.card(
ui.card_header(budget_icon, "Privacy Budget"),
ui.markdown(
f"""
What is your privacy budget, or epsilon, for this release?
Many factors including the sensitivity of your data,
the frequency of DP releases,
and the regulatory landscape can be considered.
Consider how your budget compares to that of
<a href="{registry_url}"
target="_blank">other projects</a>.
"""
),
log_slider("log_epsilon_slider", 0.1, 10.0),
ui.output_ui("epsilon_ui"),
ui.output_ui("privacy_loss_python_ui"),
),
)
simulation_card = (
ui.card(
ui.card_header(simulation_icon, "Simulation"),
ui.output_ui("simulation_card_ui"),
),
)

if product() == Product.CSV_DESCRIPTION:
return (
ui.layout_columns(
budget_card,
col_widths={"md": [12], "lg": [6]},
),
)
return (
ui.layout_columns(
columns_card,
grouping_card,
budget_card,
simulation_card,
col_widths={
"sm": [12, 12, 12, 12], # 4 rows
"md": [6, 6, 6, 6], # 2 rows
"xxl": [3, 3, 3, 3], # 1 row
},
),
)

@reactive.effect
@reactive.event(input.columns_selectize)
def _on_columns_change():
Expand Down
11 changes: 7 additions & 4 deletions dp_wizard/shiny/panels/dataset_panel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,17 +585,20 @@ def product_ui():
),
tutorial_box(
is_tutorial_mode(),
"""
f"""
Although the underlying OpenDP library is very flexible,
DP Wizard offers only a few analysis options:
DP Wizard offers a few analysis options to help you get started:

- The **DP Statistics** option supports
- The **{Product.STATISTICS}** option supports
grouping, histograms, mean, median, and count.
- With **DP Synthetic Data**, your privacy budget is used
- With **{Product.SYNTHETIC_DATA}**, your privacy budget is used
to infer the distributions of values within the
selected columns, and the correlations between columns.
This is less accurate than calculating the desired
statistics directly, but can be easier to work with downstream.
- The **{Product.CSV_DESCRIPTION}** summarizes the contents of CSVs
with a large number of columns, without revealing details
from individual rows.
""",
responsive=False,
),
Expand Down
8 changes: 5 additions & 3 deletions dp_wizard/shiny/panels/results_panel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
tutorial_box,
)
from dp_wizard.shiny.components.summaries import analysis_summary, dataset_summary
from dp_wizard.types import AppState
from dp_wizard.types import AppState, Product
from dp_wizard.utils.code_generators import AnalysisPlan, AnalysisPlanColumn
from dp_wizard.utils.code_generators.notebook_generator import (
PLACEHOLDER_CSV_NAME,
Expand Down Expand Up @@ -135,7 +135,9 @@ def results_server(
@render.ui
def results_requirements_warning_ui():
return hide_if(
bool(weights()),
# TODO: Get this in sync with analysis_panel validation
# https://github.com/opendp/dp-wizard/issues/562
bool(weights()) or product() == Product.CSV_DESCRIPTION,
info_md_box(
"""
Please define your analysis on the previous tab
Expand Down Expand Up @@ -196,7 +198,7 @@ def clean_download_stem() -> str:
def download_results_ui():
if in_cloud:
return None
disabled = not weights()
disabled = not (weights() or product() == Product.CSV_DESCRIPTION)
return [
ui.h3("Download Results"),
tutorial_box(
Expand Down
5 changes: 3 additions & 2 deletions dp_wizard/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
class Product(Enum):
STATISTICS = auto()
SYNTHETIC_DATA = auto()
CSV_DESCRIPTION = auto()

@classmethod
def to_dict(cls) -> dict[str, str]:
"""
>>> Product.to_dict()
{'1': 'DP Statistics', '2': 'DP Synthetic Data'}
{'1': 'DP Statistics', '2': 'DP Synthetic Data', '3': 'DP Codebook'}
"""
return {
str(member.value): str(member) for (name, member) in cls.__members__.items()
}

def __str__(self) -> str:
return "DP " + self.name.replace("_", " ").title()
return "DP " + self.name.replace("_", " ").title().replace("Csv", "CSV")


class AnalysisName(str):
Expand Down
7 changes: 5 additions & 2 deletions dp_wizard/utils/code_generators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class AnalysisPlan(NamedTuple):
>>> print(plan.to_stem())
dp_statistics_for_data_col_grouped_by_grouping_col
>>> print(plan.to_note())
This demonstrates how to calculate ...
This demonstrates how to create ...
Generated by DP Wizard ...
"""

Expand All @@ -48,6 +48,9 @@ class AnalysisPlan(NamedTuple):
columns: dict[ColumnName, list[AnalysisPlanColumn]]

def __str__(self) -> str:
if self.product == Product.CSV_DESCRIPTION:
return str(self.product)

def md_list(names) -> str:
return ", ".join(f"`{name}`" for name in names)

Expand All @@ -62,7 +65,7 @@ def to_stem(self) -> str:
def to_note(self) -> str:
now = datetime.now().strftime("%b %d, %Y at %I:%M%p")
return f"""
This demonstrates how to calculate {self} using OpenDP (https://docs.opendp.org).
This demonstrates how to create {self} using OpenDP (https://docs.opendp.org).
Generated by DP Wizard v{__version__} (https://github.com/opendp/dp-wizard) on {now}.
""".strip()

Expand Down
8 changes: 6 additions & 2 deletions dp_wizard/utils/code_generators/abstract_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ def __init__(self, analysis_plan: AnalysisPlan, note: str):
self.analysis_plan = analysis_plan
self.note = note

def _get_synth_or_stats(self) -> str:
def _get_product(self) -> str:
match self.analysis_plan.product:
case Product.STATISTICS:
return "stats"
case Product.SYNTHETIC_DATA:
return "synth"
case Product.CSV_DESCRIPTION:
return "codebook"
case _: # pragma: no cover
raise ValueError(self.analysis_plan.product)

Expand All @@ -46,14 +48,16 @@ def _get_extra(self) -> str:
return "polars"
case Product.SYNTHETIC_DATA:
return "mbi"
case Product.CSV_DESCRIPTION:
return "polars"
case _: # pragma: no cover
raise ValueError(self.analysis_plan.product)

@abstractmethod
def _get_notebook_or_script(self) -> str: ... # pragma: no cover

def _get_root_template(self) -> str:
adj = self._get_synth_or_stats()
adj = self._get_product()
noun = self._get_notebook_or_script()
return f"{adj}_{noun}"

Expand Down
6 changes: 5 additions & 1 deletion dp_wizard/utils/code_generators/notebook_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,13 @@ def template(synthetic_data):
)
+ "}"
)
case Product.CSV_DESCRIPTION:
outputs_expression = "TODO"
case _: # pragma: no cover
raise ValueError(self.analysis_plan.product)
tmp_path = package_root / "tmp"
reports_block = (
Template(f"{self._get_synth_or_stats()}_reports", root)
Template(f"{self._get_product()}_reports", root)
.fill_expressions(
OUTPUTS=outputs_expression,
COLUMNS={
Expand Down Expand Up @@ -114,5 +116,7 @@ def _make_extra_blocks(self):
"STATS_QUERIES_BLOCK": self._make_stats_queries(),
"STATS_REPORTS_BLOCK": self._make_reports_block(),
}
case Product.CSV_DESCRIPTION:
return {} # TODO
case _: # pragma: no cover
raise ValueError(self.analysis_plan.product)
2 changes: 2 additions & 0 deletions dp_wizard/utils/code_generators/script_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,7 @@ def _make_extra_blocks(self):
"STATS_CONTEXT_BLOCK": self._make_stats_context(),
"STATS_QUERIES_BLOCK": self._make_stats_queries(),
}
case Product.CSV_DESCRIPTION:
return {} # TODO
case _: # pragma: no cover
raise ValueError(self.analysis_plan.product)
2 changes: 2 additions & 0 deletions tests/utils/test_code_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ def test_make_notebook(plan):
context_global = "synth_context"
case Product.STATISTICS:
context_global = "stats_context"
case Product.CSV_DESCRIPTION:
context_global = "codebook_context"
case _: # pragma: no cover
raise ValueError(plan.product)
assert isinstance(globals[context_global], dp.Context)
Expand Down
Loading