Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,8 @@ <h1>Results</h1>
&gt;&gt;&gt; grade_query = (
... stats_context.query().group_by(groups).agg(pl.len().dp.noise().alias(&quot;count&quot;))
... )
&gt;&gt;&gt; grade_accuracy = grade_query.summarize(alpha=1 - confidence)[&quot;accuracy&quot;].item()
&gt;&gt;&gt; summary = grade_query.summarize(alpha=1 - confidence)
&gt;&gt;&gt; grade_accuracy = summary[&quot;accuracy&quot;].item()
&gt;&gt;&gt; grade_stats = grade_query.release().collect()
</code></pre>
<p>If we try to run more queries at this point, it will error. Once the
Expand Down
3 changes: 2 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,8 @@ Query for grade:
>>> grade_query = (
... stats_context.query().group_by(groups).agg(pl.len().dp.noise().alias("count"))
... )
>>> grade_accuracy = grade_query.summarize(alpha=1 - confidence)["accuracy"].item()
>>> summary = grade_query.summarize(alpha=1 - confidence)
>>> grade_accuracy = summary["accuracy"].item()
>>> grade_stats = grade_query.release().collect()

```
Expand Down
Binary file modified docs/screenshots/download-results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/screenshots/select-dataset.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 17 additions & 3 deletions dp_wizard/utils/code_generators/analyses/histogram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,23 @@ def template(BIN_NAME, GROUP_NAMES, stats_context, confidence):
.agg(pl.len().dp.noise().alias("count")) # type: ignore
.WITH_KEYS
)
ACCURACY_NAME = QUERY_NAME.summarize(alpha=1 - confidence)[ # noqa: F841
"accuracy"
].item()

# + [markdown] tags=["tutorial"]
# We can summarize the statistic to get the accuracy.
# More on [`summarize()` in the OpenDP
# docs](https://docs.opendp.org/en/OPENDP_V_VERSION/api/python/opendp.extras.polars.html#opendp.extras.polars.LazyFrameQuery.summarize).
# -

# + tags=["tutorial"]
summary = QUERY_NAME.summarize(alpha=1 - confidence)
summary
# -

# + [markdown] tags=["tutorial"]
# Proceding to the DP release:
# -

ACCURACY_NAME = summary["accuracy"].item() # noqa: F841
STATS_NAME = QUERY_NAME.release().collect()
STATS_NAME # type: ignore

Expand Down
18 changes: 17 additions & 1 deletion dp_wizard/utils/code_generators/analyses/mean/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,29 @@


def make_query(code_gen, identifier, accuracy_name, stats_name):
def template(GROUP_NAMES, stats_context, EXPR_NAME):
def template(GROUP_NAMES, stats_context, EXPR_NAME, confidence):
groups = GROUP_NAMES
QUERY_NAME = (
stats_context.query().group_by(groups).agg(EXPR_NAME).WITH_KEYS
if groups
else stats_context.query().select(EXPR_NAME)
)

# + [markdown] tags=["tutorial"]
# If we summarize the statistic, we see that a mean is composed
# of a sum and a length, each with their own accuracy.
# More on [`summarize()` in the OpenDP
# docs](https://docs.opendp.org/en/OPENDP_V_VERSION/api/python/opendp.extras.polars.html#opendp.extras.polars.LazyFrameQuery.summarize).
# -

# + tags=["tutorial"]
QUERY_NAME.summarize(alpha=1 - confidence)
# -

# + [markdown] tags=["tutorial"]
# Proceding to the DP release:
# -

STATS_NAME = QUERY_NAME.release().collect()
STATS_NAME # type: ignore

Expand Down
18 changes: 17 additions & 1 deletion dp_wizard/utils/code_generators/analyses/median/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,29 @@


def make_query(code_gen, identifier, accuracy_name, stats_name):
def template(GROUP_NAMES, stats_context, EXPR_NAME):
def template(GROUP_NAMES, stats_context, EXPR_NAME, confidence):
groups = GROUP_NAMES
QUERY_NAME = (
stats_context.query().group_by(groups).agg(EXPR_NAME).WITH_KEYS
if groups
else stats_context.query().select(EXPR_NAME)
)

# + tags=["tutorial"]
# Because the median is based on selection from candidate values,
# it does not have an accuracy, unlike histogram and mean.
# More on [`summarize()` in the OpenDP
# docs](https://docs.opendp.org/en/OPENDP_V_VERSION/api/python/opendp.extras.polars.html#opendp.extras.polars.LazyFrameQuery.summarize).
# -

# + tags=["tutorial"]
QUERY_NAME.summarize(alpha=1 - confidence)
# -

# + tags=["tutorial"]
# Proceding to the DP release:
# -

STATS_NAME = QUERY_NAME.release().collect()
STATS_NAME # type: ignore

Expand Down
4 changes: 3 additions & 1 deletion dp_wizard/utils/code_generators/script_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ def _get_notebook_or_script(self):

def _clean_up_py(self, py: str):
# The output is passed through black, so we don't need to overdo this regex.
py = re.sub(r"# [+-]", "", py)
# Strip jupytext light annotations.
py = re.sub(r"# \+.*", "", py)
py = re.sub(r"# -$", "", py, flags=re.MULTILINE)
return py

def _make_columns(self):
Expand Down
17 changes: 9 additions & 8 deletions tests/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ def test_doc_examples_up_to_date():
)
expected_code = NotebookGenerator(plan, "Note goes here!").make_py(reformat=True)

if any(
# csv_path is expanded to an absolute path, so ignore it:
line not in expected_code and csv_path not in line
unexpected_lines = [
line
for line in doc_code.splitlines()
):
# It's fine for the docs to be a subset of the generated code,
# but if a line is missing, the "pytest -vv" diff
# will give us context to fix it.
assert expected_code == doc_code # pragma: no cover
# csv_path is absolute and it will have local information
# that shouldn't be checked in.
if line not in expected_code and csv_path not in line
]
assert (
not unexpected_lines
), f"These lines are missing from {index_md}:\n" + "\n".join(unexpected_lines)
1 change: 1 addition & 0 deletions tests/utils/test_code_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def id_for_plan(plan: AnalysisPlan):
expected_urls = [
"https://docs.opendp.org/",
"https://github.com/opendp/dp-wizard",
"https://docs.opendp.org/en/v0.14.1/api/python/opendp.extras.polars.html#opendp.extras.polars.LazyFrameQuery.summarize",
"https://docs.opendp.org/en/v0.14.1/api/python/opendp.extras.mbi.html#opendp.extras.mbi.ContingencyTable.synthesize",
"https://docs.opendp.org/en/v0.14.1/api/python/opendp.extras.mbi.html#opendp.extras.mbi.ContingencyTable.project_melted",
]
Expand Down
Loading