From 472a71e21c9312358fc3467e93683cbefe0c67e6 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Wed, 18 Feb 2026 12:04:57 -0500 Subject: [PATCH 1/3] add marimo artifacts to gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 3faba4e613..308df4a122 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,8 @@ package-lock.yml # uv uv.lock + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ From 205dc43f69e0d458ae8088ca13091d3551723c4a Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Fri, 20 Feb 2026 14:19:10 -0500 Subject: [PATCH 2/3] refresh distribution notebook --- .../distribution/bytes_socrata_versions.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/notebooks/marimo/lifecycle/distribution/bytes_socrata_versions.py b/notebooks/marimo/lifecycle/distribution/bytes_socrata_versions.py index e54c133b79..5dced1dd11 100644 --- a/notebooks/marimo/lifecycle/distribution/bytes_socrata_versions.py +++ b/notebooks/marimo/lifecycle/distribution/bytes_socrata_versions.py @@ -1,6 +1,6 @@ import marimo -__generated_with = "0.18.3" +__generated_with = "0.19.7" app = marimo.App(width="full") @@ -14,25 +14,41 @@ def _(mo): return -@app.cell +@app.cell(hide_code=True) def _(): + import marimo as mo from dcpy.lifecycle.scripts import version_compare + return mo, version_compare + +@app.cell(hide_code=True) +def _(version_compare): versions = version_compare.run() return (versions,) +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Helpful links + - Github action to distribute data: https://github.com/NYCPlanning/data-engineering/actions/workflows/distribute_socrata_from_bytes.yml + - OpenData page to sign in and publish revisions: https://opendata.cityofnewyork.us/ + - Product Metadata repo: https://github.com/NYCPlanning/product-metadata + """) + return + + @app.cell def _(versions): - versions + versions.reset_index() return @app.cell def _(): - import marimo as mo - - return (mo,) + # all_open_data_keys = version_compare.get_all_open_data_keys() + # all_open_data_keys + return if __name__ == "__main__": From 85b8802c2005cee231231a7cebd7503470021996 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Fri, 20 Feb 2026 14:19:25 -0500 Subject: [PATCH 3/3] try adding links to open data pages --- dcpy/lifecycle/scripts/version_compare.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dcpy/lifecycle/scripts/version_compare.py b/dcpy/lifecycle/scripts/version_compare.py index 26ec439ba6..036f07f231 100644 --- a/dcpy/lifecycle/scripts/version_compare.py +++ b/dcpy/lifecycle/scripts/version_compare.py @@ -113,6 +113,8 @@ def __eq__(self, other): def __hash__(self): return hash(self.original) +def open_data_page_url(four_four: str) -> str: + return f"https://data.cityofnewyork.us/d/{four_four}" def sort_by_outdated_products(df): """ @@ -161,7 +163,7 @@ def sort_by_outdated_products(df): return df_sorted.set_index(["product", "dataset"]) -def get_all_open_data_keys(): +def get_all_open_data_keys() -> list[str]: """retrieve all product.dataset.destination_ids""" return product_metadata.load(version="dummy").query_product_dataset_destinations( destination_filter={"types": {"open_data"}}, @@ -198,6 +200,11 @@ def make_comparison_dataframe(bytes_versions, open_data_versions): product, dataset, destination_id = key.split(".") bytes_version = bytes_versions.get(f"{product}.{dataset}") open_data_vers = open_data_versions.get(key, []) + open_data_con = connectors["open_data"] + # socrata_dest = soc_pub.SocrataDestination(metadata, dataset_destination_id) + four_four = "idk_yet" + open_data_url = open_data_page_url(four_four) + # Determine if versions are up to date using fuzzy comparison up_to_date = False @@ -216,6 +223,7 @@ def make_comparison_dataframe(bytes_versions, open_data_versions): "bytes_version": bytes_version, "open_data_versions": open_data_vers, "up_to_date": up_to_date, + "open_data_url": open_data_url, } ) df = pd.DataFrame(rows).set_index(["product", "dataset"]).sort_index()