From c4041772c1305f642e70507e717eb14f629b270f Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 25 Feb 2026 13:32:00 -0500 Subject: [PATCH 01/26] rename bash/bin to remove dcp_ prefix all our scripts are expecting the command with the prefix, so... --- bash/bin/{dcp_echo_build_engine => echo_build_engine} | 0 bash/bin/{dcp_run_sql_command => run_sql_command} | 0 bash/bin/{dcp_run_sql_file => run_sql_file} | 0 bash/bin/{dcp_shp_export => shp_export} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename bash/bin/{dcp_echo_build_engine => echo_build_engine} (100%) rename bash/bin/{dcp_run_sql_command => run_sql_command} (100%) rename bash/bin/{dcp_run_sql_file => run_sql_file} (100%) rename bash/bin/{dcp_shp_export => shp_export} (100%) diff --git a/bash/bin/dcp_echo_build_engine b/bash/bin/echo_build_engine similarity index 100% rename from bash/bin/dcp_echo_build_engine rename to bash/bin/echo_build_engine diff --git a/bash/bin/dcp_run_sql_command b/bash/bin/run_sql_command similarity index 100% rename from bash/bin/dcp_run_sql_command rename to bash/bin/run_sql_command diff --git a/bash/bin/dcp_run_sql_file b/bash/bin/run_sql_file similarity index 100% rename from bash/bin/dcp_run_sql_file rename to bash/bin/run_sql_file diff --git a/bash/bin/dcp_shp_export b/bash/bin/shp_export similarity index 100% rename from bash/bin/dcp_shp_export rename to bash/bin/shp_export From 280b6990e1985cffc00a635c4f62bbc4f99ee8e4 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 25 Feb 2026 13:41:20 -0500 Subject: [PATCH 02/26] add .envrc files to all products --- products/cbbr/.envrc | 11 +++++++++++ products/cdbg/.envrc | 11 +++++++++++ products/ceqr/.envrc | 11 +++++++++++ products/checkbook/.envrc | 11 +++++++++++ products/colp/.envrc | 11 +++++++++++ products/cpdb/.envrc | 11 +++++++++++ products/cscl/.envrc | 11 +++++++++++ products/developments/.envrc | 11 +++++++++++ products/edde/.envrc | 7 +++++++ products/facilities/.envrc | 4 +++- products/factfinder/.envrc | 11 +++++++++++ products/green_fast_track/.envrc | 11 +++++++++++ products/knownprojects/.envrc | 7 +++++++ products/pluto/.envrc | 4 +++- products/template/.envrc | 11 +++++++++++ products/zap-opendata/.envrc | 7 +++++++ products/zoningtaxlots/.envrc | 11 +++++++++++ 17 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 products/cbbr/.envrc create mode 100644 products/cdbg/.envrc create mode 100644 products/ceqr/.envrc create mode 100644 products/checkbook/.envrc create mode 100644 products/colp/.envrc create mode 100644 products/cpdb/.envrc create mode 100644 products/cscl/.envrc create mode 100644 products/developments/.envrc create mode 100644 products/edde/.envrc create mode 100644 products/factfinder/.envrc create mode 100644 products/green_fast_track/.envrc create mode 100644 products/knownprojects/.envrc create mode 100644 products/template/.envrc create mode 100644 products/zap-opendata/.envrc create mode 100644 products/zoningtaxlots/.envrc diff --git a/products/cbbr/.envrc b/products/cbbr/.envrc new file mode 100644 index 0000000000..47aed63af1 --- /dev/null +++ b/products/cbbr/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-cbbr + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/cdbg/.envrc b/products/cdbg/.envrc new file mode 100644 index 0000000000..5193568813 --- /dev/null +++ b/products/cdbg/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-cdbg + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/ceqr/.envrc b/products/ceqr/.envrc new file mode 100644 index 0000000000..f750b34516 --- /dev/null +++ b/products/ceqr/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-ceqr + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/checkbook/.envrc b/products/checkbook/.envrc new file mode 100644 index 0000000000..f8fee5932c --- /dev/null +++ b/products/checkbook/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-checkbook + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/colp/.envrc b/products/colp/.envrc new file mode 100644 index 0000000000..7077c247d5 --- /dev/null +++ b/products/colp/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-colp + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/cpdb/.envrc b/products/cpdb/.envrc new file mode 100644 index 0000000000..b944d4ebbd --- /dev/null +++ b/products/cpdb/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-cpdb + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/cscl/.envrc b/products/cscl/.envrc new file mode 100644 index 0000000000..947843795b --- /dev/null +++ b/products/cscl/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-cscl + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/developments/.envrc b/products/developments/.envrc new file mode 100644 index 0000000000..d2f24be257 --- /dev/null +++ b/products/developments/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-devdb + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/edde/.envrc b/products/edde/.envrc new file mode 100644 index 0000000000..761a7bd273 --- /dev/null +++ b/products/edde/.envrc @@ -0,0 +1,7 @@ +source_up + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi diff --git a/products/facilities/.envrc b/products/facilities/.envrc index 9da9f2ce84..0b481e6bca 100644 --- a/products/facilities/.envrc +++ b/products/facilities/.envrc @@ -1,9 +1,11 @@ source_up +export BUILD_ENGINE_DB=db-facilities + # Load local .env if it exists if [ -f .env ]; then dotenv .env watch_file .env fi -export BUILD_ENGINE=$(dcp_echo_build_engine) +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/factfinder/.envrc b/products/factfinder/.envrc new file mode 100644 index 0000000000..20a651c173 --- /dev/null +++ b/products/factfinder/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-factfinder + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/green_fast_track/.envrc b/products/green_fast_track/.envrc new file mode 100644 index 0000000000..84cae2bfb2 --- /dev/null +++ b/products/green_fast_track/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-green_fast_track + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/knownprojects/.envrc b/products/knownprojects/.envrc new file mode 100644 index 0000000000..761a7bd273 --- /dev/null +++ b/products/knownprojects/.envrc @@ -0,0 +1,7 @@ +source_up + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi diff --git a/products/pluto/.envrc b/products/pluto/.envrc index 9da9f2ce84..21e47fd363 100644 --- a/products/pluto/.envrc +++ b/products/pluto/.envrc @@ -1,9 +1,11 @@ source_up +export BUILD_ENGINE_DB=db-pluto + # Load local .env if it exists if [ -f .env ]; then dotenv .env watch_file .env fi -export BUILD_ENGINE=$(dcp_echo_build_engine) +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/template/.envrc b/products/template/.envrc new file mode 100644 index 0000000000..615a19ccda --- /dev/null +++ b/products/template/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-template + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) diff --git a/products/zap-opendata/.envrc b/products/zap-opendata/.envrc new file mode 100644 index 0000000000..761a7bd273 --- /dev/null +++ b/products/zap-opendata/.envrc @@ -0,0 +1,7 @@ +source_up + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi diff --git a/products/zoningtaxlots/.envrc b/products/zoningtaxlots/.envrc new file mode 100644 index 0000000000..abc573db39 --- /dev/null +++ b/products/zoningtaxlots/.envrc @@ -0,0 +1,11 @@ +source_up + +export BUILD_ENGINE_DB=db-ztl + +# Load local .env if it exists +if [ -f .env ]; then + dotenv .env + watch_file .env +fi + +export BUILD_ENGINE=$(echo_build_engine) From 1354319b80e5a2660e4f0af3441d61dac21763ec Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 25 Feb 2026 14:01:18 -0500 Subject: [PATCH 03/26] allow override dotfile path I keep mine outside the data engineering dir --- .envrc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.envrc b/.envrc index b67a5a6bb7..282f283a64 100644 --- a/.envrc +++ b/.envrc @@ -1,8 +1,13 @@ #!/usr/bin/env bash # Load all variables from .env file -dotenv .env -watch_file .env +if [ -n "$DATA_ENGINEERING_DOT_FILE_PATH" ] && [ -f "$DATA_ENGINEERING_DOT_FILE_PATH" ]; then + dotenv "$DATA_ENGINEERING_DOT_FILE_PATH" + watch_file "$DATA_ENGINEERING_DOT_FILE_PATH" +elif [ -f .env ]; then + dotenv .env + watch_file .env +fi export PROJECT_ROOT_PATH="$PWD" From ea3e1242516f5e12b8b004392757dcced0566f7a Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 25 Feb 2026 17:36:02 -0500 Subject: [PATCH 04/26] add a command for load_direnv. See comment for rationale --- bash/bin/load_direnv.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 bash/bin/load_direnv.sh diff --git a/bash/bin/load_direnv.sh b/bash/bin/load_direnv.sh new file mode 100755 index 0000000000..62dc61f5f3 --- /dev/null +++ b/bash/bin/load_direnv.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# Load direnv environment variables for the current directory. Mostly for processes +# that, ahem, make use of non-interactive terminals +eval "$(direnv export bash)" From a085a300049438f8e0ff118d70d4a5df11be1c15 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 12 Feb 2026 17:43:07 -0500 Subject: [PATCH 05/26] Add staging models for all PLUTO inputs --- products/pluto/models/_sources.yml | 35 ++++++++++++++ .../models/staging/stg__dcp_cb2010_wi.sql | 13 +++++ .../models/staging/stg__dcp_cb2020_wi.sql | 13 +++++ .../staging/stg__dcp_cdboundaries_wi.sql | 13 +++++ .../pluto/models/staging/stg__dcp_colp.sql | 13 +++++ .../staging/stg__dcp_commercialoverlay.sql | 13 +++++ .../staging/stg__dcp_councildistricts_wi.sql | 13 +++++ .../models/staging/stg__dcp_ct2010_wi.sql | 13 +++++ .../models/staging/stg__dcp_ct2020_wi.sql | 13 +++++ .../models/staging/stg__dcp_edesignation.sql | 13 +++++ .../models/staging/stg__dcp_firecompanies.sql | 13 +++++ ...dcp_gis_mandatory_inclusionary_housing.sql | 13 +++++ .../models/staging/stg__dcp_healthareas.sql | 13 +++++ .../models/staging/stg__dcp_healthcenters.sql | 13 +++++ .../models/staging/stg__dcp_limitedheight.sql | 13 +++++ .../staging/stg__dcp_policeprecincts.sql | 13 +++++ .../staging/stg__dcp_school_districts.sql | 13 +++++ .../staging/stg__dcp_specialpurpose.sql | 13 +++++ .../stg__dcp_specialpurposesubdistricts.sql | 13 +++++ .../models/staging/stg__dcp_transit_zones.sql | 13 +++++ .../staging/stg__dcp_zoningdistricts.sql | 13 +++++ .../staging/stg__dcp_zoningmapamendments.sql | 13 +++++ .../staging/stg__dcp_zoningmapindex.sql | 13 +++++ .../pluto/models/staging/stg__dof_condo.sql | 13 +++++ .../pluto/models/staging/stg__dof_dtm.sql | 13 +++++ .../models/staging/stg__dof_shoreline.sql | 13 +++++ .../staging/stg__doitt_zipcodeboundaries.sql | 13 +++++ .../models/staging/stg__dpr_greenthumb.sql | 13 +++++ .../models/staging/stg__dsny_frequencies.sql | 13 +++++ .../staging/stg__fema_firms2007_100yr.sql | 13 +++++ .../staging/stg__fema_pfirms2015_100yr.sql | 13 +++++ .../staging/stg__lpc_historic_districts.sql | 13 +++++ .../models/staging/stg__lpc_landmarks.sql | 13 +++++ .../staging/stg__pluto_input_cama_dof.sql | 2 + .../staging/stg__pluto_input_geocodes.sql | 2 + .../staging/stg__pluto_input_numbldgs.sql | 2 + .../pluto/pluto_build/sql/edesignation.sql | 2 +- products/pluto/pluto_build/sql/lpc.sql | 4 +- .../pluto/pluto_build/sql/preprocessing.sql | 47 ++++++++----------- 39 files changed, 479 insertions(+), 31 deletions(-) create mode 100644 products/pluto/models/staging/stg__dcp_cb2010_wi.sql create mode 100644 products/pluto/models/staging/stg__dcp_cb2020_wi.sql create mode 100644 products/pluto/models/staging/stg__dcp_cdboundaries_wi.sql create mode 100644 products/pluto/models/staging/stg__dcp_colp.sql create mode 100644 products/pluto/models/staging/stg__dcp_commercialoverlay.sql create mode 100644 products/pluto/models/staging/stg__dcp_councildistricts_wi.sql create mode 100644 products/pluto/models/staging/stg__dcp_ct2010_wi.sql create mode 100644 products/pluto/models/staging/stg__dcp_ct2020_wi.sql create mode 100644 products/pluto/models/staging/stg__dcp_edesignation.sql create mode 100644 products/pluto/models/staging/stg__dcp_firecompanies.sql create mode 100644 products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql create mode 100644 products/pluto/models/staging/stg__dcp_healthareas.sql create mode 100644 products/pluto/models/staging/stg__dcp_healthcenters.sql create mode 100644 products/pluto/models/staging/stg__dcp_limitedheight.sql create mode 100644 products/pluto/models/staging/stg__dcp_policeprecincts.sql create mode 100644 products/pluto/models/staging/stg__dcp_school_districts.sql create mode 100644 products/pluto/models/staging/stg__dcp_specialpurpose.sql create mode 100644 products/pluto/models/staging/stg__dcp_specialpurposesubdistricts.sql create mode 100644 products/pluto/models/staging/stg__dcp_transit_zones.sql create mode 100644 products/pluto/models/staging/stg__dcp_zoningdistricts.sql create mode 100644 products/pluto/models/staging/stg__dcp_zoningmapamendments.sql create mode 100644 products/pluto/models/staging/stg__dcp_zoningmapindex.sql create mode 100644 products/pluto/models/staging/stg__dof_condo.sql create mode 100644 products/pluto/models/staging/stg__dof_dtm.sql create mode 100644 products/pluto/models/staging/stg__dof_shoreline.sql create mode 100644 products/pluto/models/staging/stg__doitt_zipcodeboundaries.sql create mode 100644 products/pluto/models/staging/stg__dpr_greenthumb.sql create mode 100644 products/pluto/models/staging/stg__dsny_frequencies.sql create mode 100644 products/pluto/models/staging/stg__fema_firms2007_100yr.sql create mode 100644 products/pluto/models/staging/stg__fema_pfirms2015_100yr.sql create mode 100644 products/pluto/models/staging/stg__lpc_historic_districts.sql create mode 100644 products/pluto/models/staging/stg__lpc_landmarks.sql create mode 100644 products/pluto/models/staging/stg__pluto_input_cama_dof.sql create mode 100644 products/pluto/models/staging/stg__pluto_input_geocodes.sql create mode 100644 products/pluto/models/staging/stg__pluto_input_numbldgs.sql diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index 385c929c97..a3fc3670f7 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -5,6 +5,41 @@ sources: schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" tables: - name: pluto_input_research + - name: pluto_input_cama_dof + - name: pluto_input_geocodes + - name: pluto_input_numbldgs + - name: lpc_landmarks + - name: lpc_historic_districts + - name: dcp_edesignation + - name: dcp_councildistricts_wi + - name: dcp_cb2010_wi + - name: dcp_cb2020_wi + - name: dcp_ct2010_wi + - name: dcp_ct2020_wi + - name: dcp_cdboundaries_wi + - name: doitt_zipcodeboundaries + - name: dcp_school_districts + - name: dcp_zoningdistricts + - name: dcp_commercialoverlay + - name: dcp_limitedheight + - name: dcp_specialpurpose + - name: dcp_specialpurposesubdistricts + - name: dcp_zoningmapamendments + - name: dcp_zoningmapindex + - name: dcp_firecompanies + - name: dcp_policeprecincts + - name: dcp_healthareas + - name: dcp_healthcenters + - name: dsny_frequencies + - name: dcp_colp + - name: dpr_greenthumb + - name: dof_condo + - name: dof_dtm + - name: dof_shoreline + - name: fema_firms2007_100yr + - name: fema_pfirms2015_100yr + - name: dcp_transit_zones + - name: dcp_gis_mandatory_inclusionary_housing - name: dcp_developments description: >- Contains changes in units resulting from new buildings, major alterations, and diff --git a/products/pluto/models/staging/stg__dcp_cb2010_wi.sql b/products/pluto/models/staging/stg__dcp_cb2010_wi.sql new file mode 100644 index 0000000000..030628f560 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_cb2010_wi.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_cb2010_wi') }} diff --git a/products/pluto/models/staging/stg__dcp_cb2020_wi.sql b/products/pluto/models/staging/stg__dcp_cb2020_wi.sql new file mode 100644 index 0000000000..8b6cc1de6b --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_cb2020_wi.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_cb2020_wi') }} diff --git a/products/pluto/models/staging/stg__dcp_cdboundaries_wi.sql b/products/pluto/models/staging/stg__dcp_cdboundaries_wi.sql new file mode 100644 index 0000000000..3fba6a5225 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_cdboundaries_wi.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_cdboundaries_wi') }} diff --git a/products/pluto/models/staging/stg__dcp_colp.sql b/products/pluto/models/staging/stg__dcp_colp.sql new file mode 100644 index 0000000000..808a4da79c --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_colp.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_colp') }} diff --git a/products/pluto/models/staging/stg__dcp_commercialoverlay.sql b/products/pluto/models/staging/stg__dcp_commercialoverlay.sql new file mode 100644 index 0000000000..b2c5c134dd --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_commercialoverlay.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_commercialoverlay') }} diff --git a/products/pluto/models/staging/stg__dcp_councildistricts_wi.sql b/products/pluto/models/staging/stg__dcp_councildistricts_wi.sql new file mode 100644 index 0000000000..d856fcba7b --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_councildistricts_wi.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_councildistricts_wi') }} diff --git a/products/pluto/models/staging/stg__dcp_ct2010_wi.sql b/products/pluto/models/staging/stg__dcp_ct2010_wi.sql new file mode 100644 index 0000000000..1af30f8b5c --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_ct2010_wi.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_ct2010_wi') }} diff --git a/products/pluto/models/staging/stg__dcp_ct2020_wi.sql b/products/pluto/models/staging/stg__dcp_ct2020_wi.sql new file mode 100644 index 0000000000..35a30b1bee --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_ct2020_wi.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_ct2020_wi') }} diff --git a/products/pluto/models/staging/stg__dcp_edesignation.sql b/products/pluto/models/staging/stg__dcp_edesignation.sql new file mode 100644 index 0000000000..f7b8ae2a73 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_edesignation.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_edesignation') }} diff --git a/products/pluto/models/staging/stg__dcp_firecompanies.sql b/products/pluto/models/staging/stg__dcp_firecompanies.sql new file mode 100644 index 0000000000..e57ddc613a --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_firecompanies.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_firecompanies') }} diff --git a/products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql b/products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql new file mode 100644 index 0000000000..20775470a0 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_gis_mandatory_inclusionary_housing') }} diff --git a/products/pluto/models/staging/stg__dcp_healthareas.sql b/products/pluto/models/staging/stg__dcp_healthareas.sql new file mode 100644 index 0000000000..ec6c5dcae9 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_healthareas.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_healthareas') }} diff --git a/products/pluto/models/staging/stg__dcp_healthcenters.sql b/products/pluto/models/staging/stg__dcp_healthcenters.sql new file mode 100644 index 0000000000..b866c1111c --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_healthcenters.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_healthcenters') }} diff --git a/products/pluto/models/staging/stg__dcp_limitedheight.sql b/products/pluto/models/staging/stg__dcp_limitedheight.sql new file mode 100644 index 0000000000..0edf553043 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_limitedheight.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_limitedheight') }} diff --git a/products/pluto/models/staging/stg__dcp_policeprecincts.sql b/products/pluto/models/staging/stg__dcp_policeprecincts.sql new file mode 100644 index 0000000000..3c5c62663c --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_policeprecincts.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_policeprecincts') }} diff --git a/products/pluto/models/staging/stg__dcp_school_districts.sql b/products/pluto/models/staging/stg__dcp_school_districts.sql new file mode 100644 index 0000000000..8045366392 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_school_districts.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_school_districts') }} diff --git a/products/pluto/models/staging/stg__dcp_specialpurpose.sql b/products/pluto/models/staging/stg__dcp_specialpurpose.sql new file mode 100644 index 0000000000..1a7b78341d --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_specialpurpose.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_specialpurpose') }} diff --git a/products/pluto/models/staging/stg__dcp_specialpurposesubdistricts.sql b/products/pluto/models/staging/stg__dcp_specialpurposesubdistricts.sql new file mode 100644 index 0000000000..668eb32eba --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_specialpurposesubdistricts.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_specialpurposesubdistricts') }} diff --git a/products/pluto/models/staging/stg__dcp_transit_zones.sql b/products/pluto/models/staging/stg__dcp_transit_zones.sql new file mode 100644 index 0000000000..b2c0c7d7d7 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_transit_zones.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_transit_zones') }} diff --git a/products/pluto/models/staging/stg__dcp_zoningdistricts.sql b/products/pluto/models/staging/stg__dcp_zoningdistricts.sql new file mode 100644 index 0000000000..9295e5aba1 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_zoningdistricts.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_zoningdistricts') }} diff --git a/products/pluto/models/staging/stg__dcp_zoningmapamendments.sql b/products/pluto/models/staging/stg__dcp_zoningmapamendments.sql new file mode 100644 index 0000000000..a654edc533 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_zoningmapamendments.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_zoningmapamendments') }} diff --git a/products/pluto/models/staging/stg__dcp_zoningmapindex.sql b/products/pluto/models/staging/stg__dcp_zoningmapindex.sql new file mode 100644 index 0000000000..7b7d3d3270 --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_zoningmapindex.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_zoningmapindex') }} diff --git a/products/pluto/models/staging/stg__dof_condo.sql b/products/pluto/models/staging/stg__dof_condo.sql new file mode 100644 index 0000000000..8fd8c178b3 --- /dev/null +++ b/products/pluto/models/staging/stg__dof_condo.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dof_condo') }} diff --git a/products/pluto/models/staging/stg__dof_dtm.sql b/products/pluto/models/staging/stg__dof_dtm.sql new file mode 100644 index 0000000000..ce2f972ebb --- /dev/null +++ b/products/pluto/models/staging/stg__dof_dtm.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dof_dtm') }} diff --git a/products/pluto/models/staging/stg__dof_shoreline.sql b/products/pluto/models/staging/stg__dof_shoreline.sql new file mode 100644 index 0000000000..1f6cf05e3b --- /dev/null +++ b/products/pluto/models/staging/stg__dof_shoreline.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dof_shoreline') }} diff --git a/products/pluto/models/staging/stg__doitt_zipcodeboundaries.sql b/products/pluto/models/staging/stg__doitt_zipcodeboundaries.sql new file mode 100644 index 0000000000..0d3b913c2c --- /dev/null +++ b/products/pluto/models/staging/stg__doitt_zipcodeboundaries.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'doitt_zipcodeboundaries') }} diff --git a/products/pluto/models/staging/stg__dpr_greenthumb.sql b/products/pluto/models/staging/stg__dpr_greenthumb.sql new file mode 100644 index 0000000000..08a3bdfd7c --- /dev/null +++ b/products/pluto/models/staging/stg__dpr_greenthumb.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dpr_greenthumb') }} diff --git a/products/pluto/models/staging/stg__dsny_frequencies.sql b/products/pluto/models/staging/stg__dsny_frequencies.sql new file mode 100644 index 0000000000..a5a4d4823e --- /dev/null +++ b/products/pluto/models/staging/stg__dsny_frequencies.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dsny_frequencies') }} diff --git a/products/pluto/models/staging/stg__fema_firms2007_100yr.sql b/products/pluto/models/staging/stg__fema_firms2007_100yr.sql new file mode 100644 index 0000000000..2cffa8d4b4 --- /dev/null +++ b/products/pluto/models/staging/stg__fema_firms2007_100yr.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'fema_firms2007_100yr') }} diff --git a/products/pluto/models/staging/stg__fema_pfirms2015_100yr.sql b/products/pluto/models/staging/stg__fema_pfirms2015_100yr.sql new file mode 100644 index 0000000000..a2700fd70c --- /dev/null +++ b/products/pluto/models/staging/stg__fema_pfirms2015_100yr.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'fema_pfirms2015_100yr') }} diff --git a/products/pluto/models/staging/stg__lpc_historic_districts.sql b/products/pluto/models/staging/stg__lpc_historic_districts.sql new file mode 100644 index 0000000000..21070d9720 --- /dev/null +++ b/products/pluto/models/staging/stg__lpc_historic_districts.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'lpc_historic_districts') }} diff --git a/products/pluto/models/staging/stg__lpc_landmarks.sql b/products/pluto/models/staging/stg__lpc_landmarks.sql new file mode 100644 index 0000000000..fd070d6330 --- /dev/null +++ b/products/pluto/models/staging/stg__lpc_landmarks.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'lpc_landmarks') }} diff --git a/products/pluto/models/staging/stg__pluto_input_cama_dof.sql b/products/pluto/models/staging/stg__pluto_input_cama_dof.sql new file mode 100644 index 0000000000..d048aec7d0 --- /dev/null +++ b/products/pluto/models/staging/stg__pluto_input_cama_dof.sql @@ -0,0 +1,2 @@ +SELECT * +FROM {{ source('recipe_sources', 'pluto_input_cama_dof') }} diff --git a/products/pluto/models/staging/stg__pluto_input_geocodes.sql b/products/pluto/models/staging/stg__pluto_input_geocodes.sql new file mode 100644 index 0000000000..95bec1b0e0 --- /dev/null +++ b/products/pluto/models/staging/stg__pluto_input_geocodes.sql @@ -0,0 +1,2 @@ +SELECT * +FROM {{ source('recipe_sources', 'pluto_input_geocodes') }} diff --git a/products/pluto/models/staging/stg__pluto_input_numbldgs.sql b/products/pluto/models/staging/stg__pluto_input_numbldgs.sql new file mode 100644 index 0000000000..5cbcd29dc7 --- /dev/null +++ b/products/pluto/models/staging/stg__pluto_input_numbldgs.sql @@ -0,0 +1,2 @@ +SELECT * +FROM {{ source('recipe_sources', 'pluto_input_numbldgs') }} diff --git a/products/pluto/pluto_build/sql/edesignation.sql b/products/pluto/pluto_build/sql/edesignation.sql index 6fe79226b5..0e45c6f547 100644 --- a/products/pluto/pluto_build/sql/edesignation.sql +++ b/products/pluto/pluto_build/sql/edesignation.sql @@ -13,7 +13,7 @@ WITH edesignation AS ( PARTITION BY bbl ORDER BY ceqr_num, ulurp_num, enumber ) AS row_number - FROM dcp_edesignation + FROM stg__dcp_edesignation ) AS x WHERE x.row_number = 1 ) diff --git a/products/pluto/pluto_build/sql/lpc.sql b/products/pluto/pluto_build/sql/lpc.sql index fafdca956f..da81acaf1e 100644 --- a/products/pluto/pluto_build/sql/lpc.sql +++ b/products/pluto/pluto_build/sql/lpc.sql @@ -39,7 +39,7 @@ WITH histdistricts AS ( PARTITION BY bbl ORDER BY hist_dist ) AS row_number - FROM lpc_historic_districts + FROM stg__lpc_historic_districts WHERE hist_dist != '0' AND hist_dist NOT LIKE 'Individual Landmark%' @@ -65,7 +65,7 @@ WITH landmarks AS ( SELECT DISTINCT bbl, lm_type - FROM lpc_landmarks + FROM stg__lpc_landmarks WHERE (lm_type = 'Interior Landmark' OR lm_type = 'Individual Landmark') AND status = 'DESIGNATED' diff --git a/products/pluto/pluto_build/sql/preprocessing.sql b/products/pluto/pluto_build/sql/preprocessing.sql index 5fb2a677cb..727959a2b9 100644 --- a/products/pluto/pluto_build/sql/preprocessing.sql +++ b/products/pluto/pluto_build/sql/preprocessing.sql @@ -1,29 +1,20 @@ -- change all wkb_geometry to geom -ALTER TABLE dcp_ct2010_wi RENAME wkb_geometry TO geom; -ALTER TABLE dcp_cb2010_wi RENAME wkb_geometry TO geom; -ALTER TABLE dcp_ct2020_wi RENAME wkb_geometry TO geom; -ALTER TABLE dcp_cb2020_wi RENAME wkb_geometry TO geom; -ALTER TABLE dcp_edesignation RENAME wkb_geometry TO geom; -ALTER TABLE lpc_historic_districts RENAME wkb_geometry TO geom; -ALTER TABLE lpc_landmarks RENAME wkb_geometry TO geom; -ALTER TABLE dcp_cdboundaries_wi RENAME wkb_geometry TO geom; -ALTER TABLE dcp_school_districts RENAME wkb_geometry TO geom; -ALTER TABLE dcp_councildistricts_wi RENAME wkb_geometry TO geom; -ALTER TABLE dcp_firecompanies RENAME wkb_geometry TO geom; -ALTER TABLE dcp_policeprecincts RENAME wkb_geometry TO geom; -ALTER TABLE dcp_healthareas RENAME wkb_geometry TO geom; -ALTER TABLE dcp_healthcenters RENAME wkb_geometry TO geom; -ALTER TABLE dsny_frequencies RENAME wkb_geometry TO geom; -ALTER TABLE dpr_greenthumb RENAME wkb_geometry TO geom; -ALTER TABLE dof_dtm RENAME wkb_geometry TO geom; -ALTER TABLE dof_shoreline RENAME wkb_geometry TO geom; -ALTER TABLE dcp_commercialoverlay RENAME wkb_geometry TO geom; -ALTER TABLE dcp_limitedheight RENAME wkb_geometry TO geom; -ALTER TABLE dcp_zoningdistricts RENAME wkb_geometry TO geom; -ALTER TABLE dcp_specialpurpose RENAME wkb_geometry TO geom; -ALTER TABLE dcp_specialpurposesubdistricts RENAME wkb_geometry TO geom; -ALTER TABLE dcp_zoningmapamendments RENAME wkb_geometry TO geom; -ALTER TABLE dcp_zoningmapindex RENAME wkb_geometry TO geom; -ALTER TABLE fema_firms2007_100yr RENAME wkb_geometry TO geom; -ALTER TABLE fema_pfirms2015_100yr RENAME wkb_geometry TO geom; -ALTER TABLE doitt_zipcodeboundaries RENAME wkb_geometry TO geom; +-- Census/boundary datasets now handled by DBT staging models: +-- dcp_ct2010_wi, dcp_cb2010_wi, dcp_ct2020_wi, dcp_cb2020_wi, +-- dcp_cdboundaries_wi, dcp_school_districts, dcp_councildistricts_wi, doitt_zipcodeboundaries +-- lpc_landmarks, lpc_historic_districts, dcp_edesignation now handled by DBT staging models +-- Zoning datasets now handled by DBT staging models: +-- dcp_commercialoverlay, dcp_limitedheight, dcp_zoningdistricts, dcp_specialpurpose, +-- dcp_specialpurposesubdistricts, dcp_zoningmapamendments, dcp_zoningmapindex +-- Public services datasets now handled by DBT staging models: +-- dcp_firecompanies, dcp_policeprecincts, dcp_healthareas, dcp_healthcenters, +-- dsny_frequencies, dcp_colp, dpr_greenthumb +-- DOF/tax datasets now handled by DBT staging models: +-- dof_condo, dof_dtm, dof_shoreline +-- PLUTO input datasets now handled by DBT staging models: +-- pluto_input_cama_dof, pluto_input_geocodes, pluto_input_numbldgs +-- Flood/infrastructure datasets now handled by DBT staging models: +-- fema_firms2007_100yr, fema_pfirms2015_100yr, dcp_transit_zones, +-- dcp_gis_mandatory_inclusionary_housing + +-- All geometry column transformations now handled by DBT staging models! From 79f289573172671a3b7549b0155b6b4915b19898 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 12 Feb 2026 21:28:57 -0500 Subject: [PATCH 06/26] Fix missed source table references (n58.1.9) - Created 4 additional staging models: * stg__pluto_input_research.sql * stg__pluto_pts.sql * stg__dcp_mappluto.sql * stg__previous_pluto.sql - Fixed remaining non-stg__ references in 13 SQL files - All source tables now consistently use staging models - Total staging models: 40 (up from 36) --- products/pluto/models/_sources.yml | 2 ++ .../models/staging/stg__dcp_mappluto.sql | 13 ++++++++++ .../staging/stg__pluto_input_research.sql | 2 ++ .../pluto/models/staging/stg__pluto_pts.sql | 13 ++++++++++ .../models/staging/stg__previous_pluto.sql | 14 ++++++++++ products/pluto/pluto_build/sql/backfill.sql | 12 ++++----- products/pluto/pluto_build/sql/bldgclass.sql | 2 +- .../pluto/pluto_build/sql/corr_lotarea.sql | 18 ++++++------- .../pluto/pluto_build/sql/corr_template.sql | 10 +++---- products/pluto/pluto_build/sql/flood_flag.sql | 4 +-- products/pluto/pluto_build/sql/ownertype.sql | 2 +- .../pluto/pluto_build/sql/spatialjoins.sql | 26 +++++++++---------- .../sql/zoning_commercialoverlay.sql | 2 +- .../pluto_build/sql/zoning_limitedheight.sql | 2 +- .../sql/zoning_specialdistrict.sql | 2 +- .../pluto/pluto_build/sql/zoning_zonemap.sql | 2 +- 16 files changed, 85 insertions(+), 41 deletions(-) create mode 100644 products/pluto/models/staging/stg__dcp_mappluto.sql create mode 100644 products/pluto/models/staging/stg__pluto_input_research.sql create mode 100644 products/pluto/models/staging/stg__pluto_pts.sql create mode 100644 products/pluto/models/staging/stg__previous_pluto.sql diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index a3fc3670f7..696b2c8f52 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -8,6 +8,8 @@ sources: - name: pluto_input_cama_dof - name: pluto_input_geocodes - name: pluto_input_numbldgs + - name: pluto_pts + - name: dcp_mappluto - name: lpc_landmarks - name: lpc_historic_districts - name: dcp_edesignation diff --git a/products/pluto/models/staging/stg__dcp_mappluto.sql b/products/pluto/models/staging/stg__dcp_mappluto.sql new file mode 100644 index 0000000000..f8a5f87add --- /dev/null +++ b/products/pluto/models/staging/stg__dcp_mappluto.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'dcp_mappluto') }} diff --git a/products/pluto/models/staging/stg__pluto_input_research.sql b/products/pluto/models/staging/stg__pluto_input_research.sql new file mode 100644 index 0000000000..463810d23a --- /dev/null +++ b/products/pluto/models/staging/stg__pluto_input_research.sql @@ -0,0 +1,2 @@ +SELECT * +FROM {{ source('recipe_sources', 'pluto_input_research') }} diff --git a/products/pluto/models/staging/stg__pluto_pts.sql b/products/pluto/models/staging/stg__pluto_pts.sql new file mode 100644 index 0000000000..97ef76b7d5 --- /dev/null +++ b/products/pluto/models/staging/stg__pluto_pts.sql @@ -0,0 +1,13 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +SELECT + *, + wkb_geometry AS geom +FROM {{ source('recipe_sources', 'pluto_pts') }} diff --git a/products/pluto/models/staging/stg__previous_pluto.sql b/products/pluto/models/staging/stg__previous_pluto.sql new file mode 100644 index 0000000000..1eeeda3831 --- /dev/null +++ b/products/pluto/models/staging/stg__previous_pluto.sql @@ -0,0 +1,14 @@ +{{ + config( + materialized='table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'} + ] + ) +}} + +-- Previous version of PLUTO for change detection +SELECT + *, + wkb_geometry AS geom +FROM {{ source('build_sources', 'previous_pluto') }} diff --git a/products/pluto/pluto_build/sql/backfill.sql b/products/pluto/pluto_build/sql/backfill.sql index f1a5b80e23..0c73cca715 100644 --- a/products/pluto/pluto_build/sql/backfill.sql +++ b/products/pluto/pluto_build/sql/backfill.sql @@ -1,7 +1,7 @@ -- # Update lot area with lot area value from 18v2.1 UPDATE pluto a SET lotarea = b.lotarea -FROM dcp_mappluto AS b +FROM stg__dcp_mappluto AS b WHERE a.bbl = b.bbl::bigint::text AND a.lotarea = '0' @@ -27,7 +27,7 @@ UPDATE pluto a SET lotfront = b.lotfront, lotdepth = b.lotdepth -FROM dcp_mappluto AS b +FROM stg__dcp_mappluto AS b WHERE a.bbl = b.bbl::bigint::text AND a.lotfront::numeric = 0 @@ -40,7 +40,7 @@ UPDATE pluto a SET bldgfront = b.bldgfront, bldgdepth = b.bldgdepth -FROM dcp_mappluto AS b +FROM stg__dcp_mappluto AS b WHERE a.bbl = b.bbl::bigint::text AND a.bldgfront::numeric = 0 @@ -58,7 +58,7 @@ WHERE lotarea != '0' AND lotarea IS NOT NULL; -- # Update irrlotcode from 18v2.1 UPDATE pluto a SET irrlotcode = b.irrlotcode -FROM dcp_mappluto AS b +FROM stg__dcp_mappluto AS b WHERE a.bbl = b.bbl::bigint::text AND a.lotfront::numeric = b.lotfront::numeric @@ -81,7 +81,7 @@ WHERE yearalter2::numeric < 1600; -- # Take zipcode from 18v2.1 UPDATE pluto a SET zipcode = b.zipcode -FROM dcp_mappluto AS b +FROM stg__dcp_mappluto AS b WHERE a.bbl = b.bbl::bigint::text AND LENGTH(b.zipcode::text) = 5 @@ -89,7 +89,7 @@ WHERE UPDATE pluto a SET zipcode = b.zipcode -FROM dcp_mappluto AS b +FROM stg__dcp_mappluto AS b WHERE a.bbl = b.bbl::bigint::text AND (a.zipcode::numeric != b.zipcode::numeric) diff --git a/products/pluto/pluto_build/sql/bldgclass.sql b/products/pluto/pluto_build/sql/bldgclass.sql index 8572b650d1..0c92a6090c 100644 --- a/products/pluto/pluto_build/sql/bldgclass.sql +++ b/products/pluto/pluto_build/sql/bldgclass.sql @@ -170,7 +170,7 @@ gardenlayper AS ( ) AS segzonegeom, ST_AREA(n.geom) AS allzonegeom FROM pluto AS p - INNER JOIN dpr_greenthumb AS n + INNER JOIN stg__dpr_greenthumb AS n ON ST_INTERSECTS(p.geom, n.geom) WHERE p.bldgclass LIKE 'V%' OR p.bldgclass IS NULL ), diff --git a/products/pluto/pluto_build/sql/corr_lotarea.sql b/products/pluto/pluto_build/sql/corr_lotarea.sql index dbed8185cb..965da07883 100644 --- a/products/pluto/pluto_build/sql/corr_lotarea.sql +++ b/products/pluto/pluto_build/sql/corr_lotarea.sql @@ -36,7 +36,7 @@ INSERT INTO pluto_changes_not_applied SELECT DISTINCT b.*, a.lotarea AS found_value -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'lotarea' @@ -44,7 +44,7 @@ WHERE INSERT INTO pluto_changes_applied SELECT DISTINCT b.* -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'lotarea' @@ -66,7 +66,7 @@ INSERT INTO pluto_changes_not_applied SELECT DISTINCT b.*, a.bldgarea AS found_value -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'bldgarea' @@ -74,7 +74,7 @@ WHERE INSERT INTO pluto_changes_applied SELECT DISTINCT b.* -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'bldgarea' @@ -85,7 +85,7 @@ UPDATE pluto a SET bldgarea = b.new_value, dcpedited = 't' -FROM pluto_input_research AS b +FROM stg__pluto_input_research AS b WHERE a.bbl = b.bbl AND b.field = 'bldgarea' @@ -108,7 +108,7 @@ INSERT INTO pluto_changes_not_applied SELECT DISTINCT b.*, a.lotfront AS found_value -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'lotfront' @@ -116,7 +116,7 @@ WHERE INSERT INTO pluto_changes_applied SELECT DISTINCT b.* -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'lotfront' @@ -139,7 +139,7 @@ INSERT INTO pluto_changes_not_applied SELECT DISTINCT b.*, a.lotdepth AS found_value -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'lotdepth' @@ -147,7 +147,7 @@ WHERE INSERT INTO pluto_changes_applied SELECT DISTINCT b.* -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = 'lotdepth' diff --git a/products/pluto/pluto_build/sql/corr_template.sql b/products/pluto/pluto_build/sql/corr_template.sql index 5b6b627ad3..7a93df3167 100644 --- a/products/pluto/pluto_build/sql/corr_template.sql +++ b/products/pluto/pluto_build/sql/corr_template.sql @@ -4,7 +4,7 @@ INSERT INTO pluto_changes_not_applied SELECT DISTINCT b.*, a.:FIELD AS found_value -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE a.bbl = b.bbl AND b.field = :'FIELD' @@ -12,7 +12,7 @@ WHERE INSERT INTO pluto_changes_applied SELECT DISTINCT b.* -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl = a.bbl AND b.field = :'FIELD' @@ -22,7 +22,7 @@ UPDATE pluto a SET :FIELD = b.new_value, dcpedited = 't' -FROM pluto_input_research AS b +FROM stg__pluto_input_research AS b WHERE b.bbl = a.bbl AND b.field = :'FIELD' @@ -39,7 +39,7 @@ SELECT DISTINCT b.type, b.reason, b.version -FROM pluto_input_research AS b, pluto AS a +FROM stg__pluto_input_research AS b, pluto AS a WHERE b.bbl IS NULL AND b.field = :'FIELD' @@ -49,7 +49,7 @@ UPDATE pluto a SET :FIELD = b.new_value, dcpedited = 't' -FROM pluto_input_research AS b +FROM stg__pluto_input_research AS b WHERE b.bbl IS NULL AND b.field = :'FIELD' diff --git a/products/pluto/pluto_build/sql/flood_flag.sql b/products/pluto/pluto_build/sql/flood_flag.sql index acf49d904d..1e7b41e188 100644 --- a/products/pluto/pluto_build/sql/flood_flag.sql +++ b/products/pluto/pluto_build/sql/flood_flag.sql @@ -6,7 +6,7 @@ UPDATE pluto a SET firm07_flag = '1' FROM ( SELECT ST_SUBDIVIDE(ST_MAKEVALID(geom)) AS geom - FROM fema_firms2007_100yr AS b + FROM stg__fema_firms2007_100yr AS b WHERE b.fld_zone != 'X' AND b.fld_zone != '0.2 PCT ANNUAL CHANCE FLOOD HAZARD' @@ -18,7 +18,7 @@ UPDATE pluto a SET pfirm15_flag = '1' FROM ( SELECT ST_SUBDIVIDE(ST_MAKEVALID(geom)) AS geom - FROM fema_pfirms2015_100yr AS b + FROM stg__fema_pfirms2015_100yr AS b WHERE b.fld_zone != 'X' AND b.fld_zone != '0.2 PCT ANNUAL CHANCE FLOOD HAZARD' diff --git a/products/pluto/pluto_build/sql/ownertype.sql b/products/pluto/pluto_build/sql/ownertype.sql index bd13a2912d..05b4e89139 100644 --- a/products/pluto/pluto_build/sql/ownertype.sql +++ b/products/pluto/pluto_build/sql/ownertype.sql @@ -1,7 +1,7 @@ -- set the owner type code based on data from COLP UPDATE pluto a SET ownertype = b.ownership -FROM dcp_colp AS b +FROM stg__dcp_colp AS b WHERE a.bbl::numeric = b.bbl::numeric; -- set X as owner type diff --git a/products/pluto/pluto_build/sql/spatialjoins.sql b/products/pluto/pluto_build/sql/spatialjoins.sql index 94aa0325fb..dd3471f9c2 100644 --- a/products/pluto/pluto_build/sql/spatialjoins.sql +++ b/products/pluto/pluto_build/sql/spatialjoins.sql @@ -12,7 +12,7 @@ WHERE a.ycoord !~ '[0-9]'; UPDATE pluto a SET cd = b.borocd -FROM dcp_cdboundaries_wi AS b +FROM stg__dcp_cdboundaries_wi AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.cd IS NULL @@ -22,29 +22,29 @@ UPDATE pluto a SET ct2010 = LEFT(b.ct2010, 4) || '.' || RIGHT(b.ct2010, 2), tract2010 = LEFT(b.ct2010, 4) || '.' || RIGHT(b.ct2010, 2) -FROM dcp_ct2010_wi AS b +FROM stg__dcp_ct2010_wi AS b WHERE a.geom && b.geom AND ST_WITHIN(a.centroid, b.geom) AND (a.ct2010 IS NULL OR a.ct2010::numeric = 0); UPDATE pluto a SET cb2010 = COALESCE(a.cb2010, b.cb2010) -FROM dcp_cb2010_wi AS b +FROM stg__dcp_cb2010_wi AS b WHERE a.geom && b.geom AND ST_WITHIN(a.centroid, b.geom); UPDATE pluto a SET bct2020 = COALESCE(a.bct2020, b.boroct2020) -FROM dcp_ct2020_wi AS b +FROM stg__dcp_ct2020_wi AS b WHERE a.geom && b.geom AND ST_WITHIN(a.centroid, b.geom); UPDATE pluto a SET bctcb2020 = COALESCE(a.bctcb2020, b.bctcb2020) -FROM dcp_cb2020_wi AS b +FROM stg__dcp_cb2020_wi AS b WHERE a.geom && b.geom AND ST_WITHIN(a.centroid, b.geom); UPDATE pluto a SET schooldist = b.schooldist -FROM dcp_school_districts AS b +FROM stg__dcp_school_districts AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.schooldist IS NULL @@ -52,7 +52,7 @@ WHERE UPDATE pluto a SET council = LTRIM(b.coundist::text, '0') -FROM dcp_councildistricts_wi AS b +FROM stg__dcp_councildistricts_wi AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.council IS NULL @@ -60,7 +60,7 @@ WHERE UPDATE pluto a SET firecomp = b.firecotype || LPAD(b.fireconum::text, 3, '0') -FROM dcp_firecompanies AS b +FROM stg__dcp_firecompanies AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.firecomp IS NULL @@ -68,7 +68,7 @@ WHERE UPDATE pluto a SET policeprct = b.precinct -FROM dcp_policeprecincts AS b +FROM stg__dcp_policeprecincts AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.policeprct IS NULL @@ -76,7 +76,7 @@ WHERE UPDATE pluto a SET healthcenterdistrict = b.hcentdist -FROM dcp_healthcenters AS b +FROM stg__dcp_healthcenters AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.healthcenterdistrict IS NULL @@ -84,7 +84,7 @@ WHERE UPDATE pluto a SET healtharea = b.healtharea -FROM dcp_healthareas AS b +FROM stg__dcp_healthareas AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.healtharea IS NULL @@ -94,7 +94,7 @@ UPDATE pluto a SET sanitdistrict = LEFT(schedulecode, 3), sanitsub = RIGHT(schedulecode, 2) -FROM dsny_frequencies AS b +FROM stg__dsny_frequencies AS b WHERE ST_WITHIN(a.centroid, b.geom) AND (a.sanitsub IS NULL OR a.sanitsub = ' ') @@ -102,7 +102,7 @@ WHERE UPDATE pluto a SET zipcode = b.zipcode -FROM doitt_zipcodeboundaries AS b +FROM stg__doitt_zipcodeboundaries AS b WHERE ST_WITHIN(a.centroid, b.geom) AND a.zipcode IS NULL diff --git a/products/pluto/pluto_build/sql/zoning_commercialoverlay.sql b/products/pluto/pluto_build/sql/zoning_commercialoverlay.sql index 73cf769f04..34f2bdf449 100644 --- a/products/pluto/pluto_build/sql/zoning_commercialoverlay.sql +++ b/products/pluto/pluto_build/sql/zoning_commercialoverlay.sql @@ -26,7 +26,7 @@ WITH commoverlayper AS ( ) AS segzonegeom, ST_AREA(n.geom) AS allzonegeom FROM pluto AS p - INNER JOIN dcp_commercialoverlay AS n + INNER JOIN stg__dcp_commercialoverlay AS n ON ST_INTERSECTS(p.geom, n.geom) ), diff --git a/products/pluto/pluto_build/sql/zoning_limitedheight.sql b/products/pluto/pluto_build/sql/zoning_limitedheight.sql index 55ace3c234..a7e57931dc 100644 --- a/products/pluto/pluto_build/sql/zoning_limitedheight.sql +++ b/products/pluto/pluto_build/sql/zoning_limitedheight.sql @@ -27,7 +27,7 @@ CREATE TABLE limitedheightperorder AS ( ) AS segzonegeom, ST_AREA(n.geom) AS allzonegeom FROM pluto AS p - INNER JOIN dcp_limitedheight AS n + INNER JOIN stg__dcp_limitedheight AS n ON ST_INTERSECTS(p.geom, n.geom) ) diff --git a/products/pluto/pluto_build/sql/zoning_specialdistrict.sql b/products/pluto/pluto_build/sql/zoning_specialdistrict.sql index 77358c4536..cdfe82bed8 100644 --- a/products/pluto/pluto_build/sql/zoning_specialdistrict.sql +++ b/products/pluto/pluto_build/sql/zoning_specialdistrict.sql @@ -24,7 +24,7 @@ SELECT ) AS segzonegeom, ST_AREA(n.geom) AS allzonegeom FROM pluto AS p -INNER JOIN dcp_specialpurpose AS n +INNER JOIN stg__dcp_specialpurpose AS n ON ST_INTERSECTS(p.geom, n.geom); DROP TABLE IF EXISTS specialpurposeperorder; diff --git a/products/pluto/pluto_build/sql/zoning_zonemap.sql b/products/pluto/pluto_build/sql/zoning_zonemap.sql index b7cd2cc3ce..2d76790cf5 100644 --- a/products/pluto/pluto_build/sql/zoning_zonemap.sql +++ b/products/pluto/pluto_build/sql/zoning_zonemap.sql @@ -26,7 +26,7 @@ CREATE TABLE zoningmapperorder AS ( ) AS segzonegeom, ST_AREA(n.geom) AS allzonegeom FROM pluto AS p - INNER JOIN dcp_zoningmapindex AS n + INNER JOIN stg__dcp_zoningmapindex AS n ON ST_INTERSECTS(p.geom, n.geom) ) From 570c390d3dca2572bcd3887cf9f53a5c89880255 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 12 Feb 2026 21:41:45 -0500 Subject: [PATCH 07/26] Integrate dbt into build pipeline (n58.1.10) - Created 01a_dbt_staging.sh to run dbt staging models - Script runs between data load and legacy SQL build - Materializes 40 staging models before 02_build.sh runs - Added pluto_build/README.md documenting build sequence - Legacy SQL can now reference stg__ tables --- products/pluto/pluto_build/01a_dbt_staging.sh | 23 ++++++++ products/pluto/pluto_build/README.md | 53 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100755 products/pluto/pluto_build/01a_dbt_staging.sh create mode 100644 products/pluto/pluto_build/README.md diff --git a/products/pluto/pluto_build/01a_dbt_staging.sh b/products/pluto/pluto_build/01a_dbt_staging.sh new file mode 100755 index 0000000000..7194af761e --- /dev/null +++ b/products/pluto/pluto_build/01a_dbt_staging.sh @@ -0,0 +1,23 @@ +#!/bin/bash +source ./bash/config.sh +set_error_traps + +echo "Materializing DBT staging models..." + +# Go to pluto product directory (parent of pluto_build) +cd .. + +# Run DBT staging models +# Use BUILD_ENGINE_SCHEMA environment variable if set, otherwise default to public +echo "Running: dbt run --select staging" +dbt run --select staging --profiles-dir . --target ${DBT_TARGET:-dev} + +if [ $? -ne 0 ]; then + echo "ERROR: DBT staging models failed to materialize" + exit 1 +fi + +echo "βœ“ DBT staging models materialized successfully" + +# Return to pluto_build directory +cd pluto_build diff --git a/products/pluto/pluto_build/README.md b/products/pluto/pluto_build/README.md new file mode 100644 index 0000000000..b03165541b --- /dev/null +++ b/products/pluto/pluto_build/README.md @@ -0,0 +1,53 @@ +# PLUTO Build Process + +## Build Sequence + +The PLUTO build follows this sequence: + +1. **00_setup.sh** - Drops existing tables to start fresh +2. **01_load_local_csvs.sh** - Loads local CSV data +3. **01a_dbt_staging.sh** - πŸ†• Materializes DBT staging models +4. **02_build.sh** - Runs legacy SQL to build PLUTO +5. **03_corrections.sh** - Applies corrections +6. **04_archive.sh** - Archives the build +7. **05_qaqc.sh** - Runs QAQC checks +8. **06_export.sh** - Exports final output +9. **07_custom_qaqc.sh** - Custom QAQC + +## Important: DBT Staging Models + +As of Phase 1 of the DBT migration, **01a_dbt_staging.sh must run before 02_build.sh**. + +The legacy SQL files in `sql/` now reference DBT staging models (prefixed with `stg__`) instead of raw source tables. These staging models must be materialized first. + +### What 01a_dbt_staging.sh does: +- Runs `dbt run --select staging` +- Materializes 40 staging models from raw recipe data +- Creates tables like `stg__dcp_councildistricts_wi`, `stg__lpc_landmarks`, etc. +- These tables are then used by legacy SQL in 02_build.sh + +### Dependencies: +- Requires `dbt` to be installed +- Requires recipe data to be loaded first (via recipe.yml) +- Uses BUILD_ENGINE_SCHEMA environment variable + +## Running the Build + +```bash +# Full build sequence (after recipe loads data): +./00_setup.sh +./01_load_local_csvs.sh +./01a_dbt_staging.sh # πŸ†• DBT staging models +./02_build.sh +./03_corrections.sh +# ... continue with remaining steps +``` + +## Migration Status + +**Phase 1 (Complete):** Staging layer +- βœ… 40 staging models created +- βœ… All legacy SQL refactored to use staging models +- βœ… preprocessing.sql eliminated + +**Phase 2-5 (Future):** Migrate remaining SQL to DBT intermediate/product models From 4729c0ab86447931ed3bcf2aa618f5beb002b8a2 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 12 Feb 2026 21:55:43 -0500 Subject: [PATCH 08/26] Migrate CSV lookups to dbt seeds (n58.1.11) - Moved 9 CSV files from pluto_build/data/ to seeds/ - Configured seeds in dbt_project.yml (+quote_columns, +schema: public) - Documented all seeds in seeds/_seeds.yml - Updated 01a_dbt_staging.sh to run 'dbt seed' before staging models - Deleted 01_load_local_csvs.sh (replaced by dbt seed) - Deleted sql/_create.sql (replaced by dbt seed) - Updated README.md with seed documentation - No SQL changes needed - seeds create same table names --- products/pluto/dbt_project.yml | 5 ++ .../pluto/pluto_build/01_load_local_csvs.sh | 5 -- products/pluto/pluto_build/01a_dbt_staging.sh | 17 +++-- products/pluto/pluto_build/README.md | 53 +++++++++----- products/pluto/pluto_build/sql/_create.sql | 70 ------------------- products/pluto/seeds/_seeds.yml | 43 ++++++++++++ .../data => seeds}/dcp_zoning_maxfar.csv | 0 .../data => seeds}/lookup_bldgclass.csv | 0 .../data => seeds}/lookup_lottype.csv | 0 .../data => seeds}/pluto_input_bsmtcode.csv | 0 .../pluto_input_condo_bldgclass.csv | 0 ...o_input_condolot_descriptiveattributes.csv | 0 .../pluto_input_landuse_bldgclass.csv | 0 .../data => seeds}/pluto_input_research.csv | 0 .../zoning_district_class_descriptions.csv | 0 15 files changed, 91 insertions(+), 102 deletions(-) delete mode 100755 products/pluto/pluto_build/01_load_local_csvs.sh delete mode 100644 products/pluto/pluto_build/sql/_create.sql rename products/pluto/{pluto_build/data => seeds}/dcp_zoning_maxfar.csv (100%) rename products/pluto/{pluto_build/data => seeds}/lookup_bldgclass.csv (100%) rename products/pluto/{pluto_build/data => seeds}/lookup_lottype.csv (100%) rename products/pluto/{pluto_build/data => seeds}/pluto_input_bsmtcode.csv (100%) rename products/pluto/{pluto_build/data => seeds}/pluto_input_condo_bldgclass.csv (100%) rename products/pluto/{pluto_build/data => seeds}/pluto_input_condolot_descriptiveattributes.csv (100%) rename products/pluto/{pluto_build/data => seeds}/pluto_input_landuse_bldgclass.csv (100%) rename products/pluto/{pluto_build/data => seeds}/pluto_input_research.csv (100%) rename products/pluto/{pluto_build/data => seeds}/zoning_district_class_descriptions.csv (100%) diff --git a/products/pluto/dbt_project.yml b/products/pluto/dbt_project.yml index 156e029f79..fbf23df76a 100644 --- a/products/pluto/dbt_project.yml +++ b/products/pluto/dbt_project.yml @@ -5,6 +5,11 @@ profile: "dcp-de-postgres" model-paths: [ "models" ] test-paths: [ "tests" ] +seeds: + pluto: + +quote_columns: true + +schema: public + tests: +store_failures: true +severity: "{{ env_var('TEST_SEVERITY', 'error') }}" diff --git a/products/pluto/pluto_build/01_load_local_csvs.sh b/products/pluto/pluto_build/01_load_local_csvs.sh deleted file mode 100755 index 49e4ec0cb3..0000000000 --- a/products/pluto/pluto_build/01_load_local_csvs.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -source ./bash/config.sh -set_error_traps - -run_sql_file sql/_create.sql diff --git a/products/pluto/pluto_build/01a_dbt_staging.sh b/products/pluto/pluto_build/01a_dbt_staging.sh index 7194af761e..10881a3591 100755 --- a/products/pluto/pluto_build/01a_dbt_staging.sh +++ b/products/pluto/pluto_build/01a_dbt_staging.sh @@ -2,22 +2,21 @@ source ./bash/config.sh set_error_traps -echo "Materializing DBT staging models..." - -# Go to pluto product directory (parent of pluto_build) +echo "Loading DBT seeds (lookup tables)..." cd .. +dbt seed --profiles-dir . --target ${DBT_TARGET:-dev} +if [ $? -ne 0 ]; then + echo "ERROR: DBT seeds failed to load" + exit 1 +fi +echo "βœ“ DBT seeds loaded successfully" -# Run DBT staging models -# Use BUILD_ENGINE_SCHEMA environment variable if set, otherwise default to public -echo "Running: dbt run --select staging" +echo "Materializing DBT staging models..." dbt run --select staging --profiles-dir . --target ${DBT_TARGET:-dev} - if [ $? -ne 0 ]; then echo "ERROR: DBT staging models failed to materialize" exit 1 fi - echo "βœ“ DBT staging models materialized successfully" -# Return to pluto_build directory cd pluto_build diff --git a/products/pluto/pluto_build/README.md b/products/pluto/pluto_build/README.md index b03165541b..2f59814a69 100644 --- a/products/pluto/pluto_build/README.md +++ b/products/pluto/pluto_build/README.md @@ -5,44 +5,61 @@ The PLUTO build follows this sequence: 1. **00_setup.sh** - Drops existing tables to start fresh -2. **01_load_local_csvs.sh** - Loads local CSV data -3. **01a_dbt_staging.sh** - πŸ†• Materializes DBT staging models -4. **02_build.sh** - Runs legacy SQL to build PLUTO -5. **03_corrections.sh** - Applies corrections -6. **04_archive.sh** - Archives the build -7. **05_qaqc.sh** - Runs QAQC checks -8. **06_export.sh** - Exports final output -9. **07_custom_qaqc.sh** - Custom QAQC +2. **01a_dbt_staging.sh** - πŸ†• Loads DBT seeds & materializes staging models +3. **02_build.sh** - Runs legacy SQL to build PLUTO +4. **03_corrections.sh** - Applies corrections +5. **04_archive.sh** - Archives the build +6. **05_qaqc.sh** - Runs QAQC checks +7. **06_export.sh** - Exports final output +8. **07_custom_qaqc.sh** - Custom QAQC -## Important: DBT Staging Models +## Important: DBT Seeds & Staging Models As of Phase 1 of the DBT migration, **01a_dbt_staging.sh must run before 02_build.sh**. -The legacy SQL files in `sql/` now reference DBT staging models (prefixed with `stg__`) instead of raw source tables. These staging models must be materialized first. - ### What 01a_dbt_staging.sh does: -- Runs `dbt run --select staging` -- Materializes 40 staging models from raw recipe data -- Creates tables like `stg__dcp_councildistricts_wi`, `stg__lpc_landmarks`, etc. -- These tables are then used by legacy SQL in 02_build.sh + +1. **Loads DBT seeds** (`dbt seed`) + - Loads 9 CSV lookup tables from `seeds/` directory + - Creates tables: pluto_input_research, dcp_zoning_maxfar, lookup_bldgclass, etc. + - Replaces old CSV loading via \COPY commands + +2. **Materializes DBT staging models** (`dbt run --select staging`) + - Creates 40 staging models from raw recipe data + - Creates tables like `stg__dcp_councildistricts_wi`, `stg__lpc_landmarks`, etc. + - These tables are then used by legacy SQL in 02_build.sh ### Dependencies: - Requires `dbt` to be installed - Requires recipe data to be loaded first (via recipe.yml) -- Uses BUILD_ENGINE_SCHEMA environment variable +- Uses DBT_TARGET environment variable (defaults to 'dev') ## Running the Build ```bash # Full build sequence (after recipe loads data): ./00_setup.sh -./01_load_local_csvs.sh -./01a_dbt_staging.sh # πŸ†• DBT staging models +./01a_dbt_staging.sh # πŸ†• DBT seeds + staging models ./02_build.sh ./03_corrections.sh # ... continue with remaining steps ``` +## Lookup Tables (Seeds) + +The following lookup/reference tables are now managed as DBT seeds in `seeds/`: +- `pluto_input_research.csv` - Manual corrections (27k rows) +- `dcp_zoning_maxfar.csv` - Max FAR by zoning district +- `lookup_bldgclass.csv` - Building class lookup +- `lookup_lottype.csv` - Lot type lookup +- `pluto_input_bsmtcode.csv` - Basement codes +- `pluto_input_condo_bldgclass.csv` - Condo building classes +- `pluto_input_condolot_descriptiveattributes.csv` - Condo attributes (8k rows) +- `pluto_input_landuse_bldgclass.csv` - Land use mappings +- `zoning_district_class_descriptions.csv` - Zoning descriptions + +These are loaded automatically by `dbt seed` in 01a_dbt_staging.sh. + ## Migration Status **Phase 1 (Complete):** Staging layer diff --git a/products/pluto/pluto_build/sql/_create.sql b/products/pluto/pluto_build/sql/_create.sql deleted file mode 100644 index 825672718c..0000000000 --- a/products/pluto/pluto_build/sql/_create.sql +++ /dev/null @@ -1,70 +0,0 @@ -DROP TABLE IF EXISTS pluto_input_research; -CREATE TABLE pluto_input_research ( - bbl text, - field text, - old_value text, - new_value text, - type text, - reason text, - version text -); -\COPY pluto_input_research FROM 'data/pluto_input_research.csv' DELIMITER ',' CSV HEADER; - -DROP TABLE IF EXISTS pluto_input_landuse_bldgclass; -CREATE TABLE pluto_input_landuse_bldgclass ( - bldgclass text, - landuse text, - landusevalue text -); -\COPY pluto_input_landuse_bldgclass FROM 'data/pluto_input_landuse_bldgclass.csv' DELIMITER ',' CSV HEADER; - - -DROP TABLE IF EXISTS pluto_input_condolot_descriptiveattributes; -CREATE TABLE pluto_input_condolot_descriptiveattributes ( - condno text, - boro text, - parid text, - bc text, - tc text, - landsize text, - story text, - yearbuilt text -); -\COPY pluto_input_condolot_descriptiveattributes FROM 'data/pluto_input_condolot_descriptiveattributes.csv' DELIMITER ',' CSV HEADER; - -DROP TABLE IF EXISTS pluto_input_condo_bldgclass; -CREATE TABLE pluto_input_condo_bldgclass ( - code character varying, - description character varying, - type character varying, - dcpcreated character varying, - logic character varying -); -\COPY pluto_input_condo_bldgclass FROM 'data/pluto_input_condo_bldgclass.csv' DELIMITER ',' CSV HEADER; - -DROP TABLE IF EXISTS pluto_input_bsmtcode; -CREATE TABLE pluto_input_bsmtcode ( - bsmnt_type character varying, - bsmntgradient character varying, - bsmtcode character varying, - bsmnt_typevalue character varying, - bsmntgradientvalue character varying, - bsmtcodevalue character varying -); -\COPY pluto_input_bsmtcode FROM 'data/pluto_input_bsmtcode.csv' DELIMITER ',' CSV HEADER; - - -DROP TABLE IF EXISTS dcp_zoning_maxfar; -CREATE TABLE dcp_zoning_maxfar ( - zonedist character varying, - contextual character varying, - zoningdistricttype character varying, - resdisteq character varying, - residfar character varying, - affresfar character varying, - facilfar character varying, - commfar character varying, - mnffar character varying, - verified character varying -); -\COPY dcp_zoning_maxfar FROM 'data/dcp_zoning_maxfar.csv' DELIMITER ',' CSV HEADER; diff --git a/products/pluto/seeds/_seeds.yml b/products/pluto/seeds/_seeds.yml index e5de144540..c026046f0d 100644 --- a/products/pluto/seeds/_seeds.yml +++ b/products/pluto/seeds/_seeds.yml @@ -2,6 +2,49 @@ version: 2 seeds: - name: ignored_bbls_for_unit_count_test + description: BBLs to ignore in unit count test + + - name: pluto_input_research + description: Manual corrections and overrides for PLUTO data (27k rows) + columns: + - name: bbl + description: Borough-Block-Lot identifier + - name: field + description: Field being corrected + - name: old_value + description: Original value + - name: new_value + description: Corrected value + - name: type + description: Correction type + - name: reason + description: Reason for correction + - name: version + description: PLUTO version + + - name: dcp_zoning_maxfar + description: Maximum Floor Area Ratio (FAR) by zoning district + + - name: pluto_input_landuse_bldgclass + description: Building class to land use mapping + + - name: pluto_input_condolot_descriptiveattributes + description: Descriptive attributes for condo lots (8.3k rows) + + - name: pluto_input_condo_bldgclass + description: Condo building class codes + + - name: pluto_input_bsmtcode + description: Basement type codes and classifications + + - name: lookup_bldgclass + description: Building class lookup table with descriptions + + - name: lookup_lottype + description: Lot type lookup table + + - name: zoning_district_class_descriptions + description: Zoning district classification descriptions description: | List of manually researched bbls that have correct count of residential units in DOF PTS data, despite failing `assert_condo_bbl_unit_count_research_required` test. These records are ignored diff --git a/products/pluto/pluto_build/data/dcp_zoning_maxfar.csv b/products/pluto/seeds/dcp_zoning_maxfar.csv similarity index 100% rename from products/pluto/pluto_build/data/dcp_zoning_maxfar.csv rename to products/pluto/seeds/dcp_zoning_maxfar.csv diff --git a/products/pluto/pluto_build/data/lookup_bldgclass.csv b/products/pluto/seeds/lookup_bldgclass.csv similarity index 100% rename from products/pluto/pluto_build/data/lookup_bldgclass.csv rename to products/pluto/seeds/lookup_bldgclass.csv diff --git a/products/pluto/pluto_build/data/lookup_lottype.csv b/products/pluto/seeds/lookup_lottype.csv similarity index 100% rename from products/pluto/pluto_build/data/lookup_lottype.csv rename to products/pluto/seeds/lookup_lottype.csv diff --git a/products/pluto/pluto_build/data/pluto_input_bsmtcode.csv b/products/pluto/seeds/pluto_input_bsmtcode.csv similarity index 100% rename from products/pluto/pluto_build/data/pluto_input_bsmtcode.csv rename to products/pluto/seeds/pluto_input_bsmtcode.csv diff --git a/products/pluto/pluto_build/data/pluto_input_condo_bldgclass.csv b/products/pluto/seeds/pluto_input_condo_bldgclass.csv similarity index 100% rename from products/pluto/pluto_build/data/pluto_input_condo_bldgclass.csv rename to products/pluto/seeds/pluto_input_condo_bldgclass.csv diff --git a/products/pluto/pluto_build/data/pluto_input_condolot_descriptiveattributes.csv b/products/pluto/seeds/pluto_input_condolot_descriptiveattributes.csv similarity index 100% rename from products/pluto/pluto_build/data/pluto_input_condolot_descriptiveattributes.csv rename to products/pluto/seeds/pluto_input_condolot_descriptiveattributes.csv diff --git a/products/pluto/pluto_build/data/pluto_input_landuse_bldgclass.csv b/products/pluto/seeds/pluto_input_landuse_bldgclass.csv similarity index 100% rename from products/pluto/pluto_build/data/pluto_input_landuse_bldgclass.csv rename to products/pluto/seeds/pluto_input_landuse_bldgclass.csv diff --git a/products/pluto/pluto_build/data/pluto_input_research.csv b/products/pluto/seeds/pluto_input_research.csv similarity index 100% rename from products/pluto/pluto_build/data/pluto_input_research.csv rename to products/pluto/seeds/pluto_input_research.csv diff --git a/products/pluto/pluto_build/data/zoning_district_class_descriptions.csv b/products/pluto/seeds/zoning_district_class_descriptions.csv similarity index 100% rename from products/pluto/pluto_build/data/zoning_district_class_descriptions.csv rename to products/pluto/seeds/zoning_district_class_descriptions.csv From 65a094cb9b36c0bdb0893144bfe7dc489aa84e8f Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 19 Feb 2026 12:25:58 -0500 Subject: [PATCH 09/26] Replace 01_load_local_csvs.sh with dbt seed workflow - Update GitHub workflow to call 01a_dbt_staging.sh instead of removed 01_load_local_csvs.sh - Remove duplicate dbt seed call from 07_custom_qaqc.sh to avoid reloading seeds - Seeds are now loaded exactly once via 01a_dbt_staging.sh Closes data-engineering-n58.3 --- .github/workflows/pluto_build.yml | 4 ++-- products/pluto/pluto_build/01a_dbt_staging.sh | 4 ++++ products/pluto/pluto_build/07_custom_qaqc.sh | 14 ++------------ 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pluto_build.yml b/.github/workflows/pluto_build.yml index 226d754ff1..7556736f84 100644 --- a/.github/workflows/pluto_build.yml +++ b/.github/workflows/pluto_build.yml @@ -83,8 +83,8 @@ jobs: run: python3 -m dcpy lifecycle builds load load --recipe-path ${{ inputs.recipe_file }}.lock.yml - - name: Load Local Data - run: ./01_load_local_csvs.sh + - name: Load Local Data and DBT Staging + run: ./01a_dbt_staging.sh - name: building ... run: ./02_build.sh diff --git a/products/pluto/pluto_build/01a_dbt_staging.sh b/products/pluto/pluto_build/01a_dbt_staging.sh index 10881a3591..72f8ff9c23 100755 --- a/products/pluto/pluto_build/01a_dbt_staging.sh +++ b/products/pluto/pluto_build/01a_dbt_staging.sh @@ -2,6 +2,10 @@ source ./bash/config.sh set_error_traps +echo "Setup dbt" +dbt deps +dbt debug + echo "Loading DBT seeds (lookup tables)..." cd .. dbt seed --profiles-dir . --target ${DBT_TARGET:-dev} diff --git a/products/pluto/pluto_build/07_custom_qaqc.sh b/products/pluto/pluto_build/07_custom_qaqc.sh index 0fcaa75a39..e80713f388 100755 --- a/products/pluto/pluto_build/07_custom_qaqc.sh +++ b/products/pluto/pluto_build/07_custom_qaqc.sh @@ -2,21 +2,11 @@ source ./pluto_build/bash/config.sh # assuming this script is run from pluto/ dir set_error_traps -echo "Setup dbt" -dbt deps -dbt debug - -echo "Build seed tables" -dbt build --select config.materialized:seed --indirect-selection=cautious --full-refresh - echo "Test source tables" dbt test --select "source:*" --exclude tag:de_check -echo "Build staging tables" -dbt build --select staging --exclude tag:de_check - -echo "Build intermediate QAQC tables" -dbt build --select qaqc --exclude tag:de_check +echo "Build QAQC models (intermediate and reports)" +dbt build --select qaqc.intermediate qaqc.reports --exclude tag:de_check echo "πŸ”₯ Run DE aka important tests πŸ”₯" dbt test --select tag:de_check,tag:$VERSION_TYPE # this will only run tests that have both tags, not just one of them From be3e903e0da0b999522bd97c914b78d99bc9c499 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 19 Feb 2026 12:53:44 -0500 Subject: [PATCH 10/26] Fix dbt profiles and schema config issues - Add --profiles-dir . to all dbt commands in 01a_dbt_staging.sh and 07_custom_qaqc.sh - Move 'cd ..' before dbt deps/debug in 01a_dbt_staging.sh - Fix schema config deprecation in dbt_project.yml (add + prefix to tests.schema) - Ensures dbt uses local profiles.yml in GHA workflows --- products/pluto/dbt_project.yml | 2 +- products/pluto/pluto_build/01a_dbt_staging.sh | 6 +++--- products/pluto/pluto_build/07_custom_qaqc.sh | 6 +++--- products/pluto/seeds/_seeds.yml | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/products/pluto/dbt_project.yml b/products/pluto/dbt_project.yml index fbf23df76a..091b14b844 100644 --- a/products/pluto/dbt_project.yml +++ b/products/pluto/dbt_project.yml @@ -13,7 +13,7 @@ seeds: tests: +store_failures: true +severity: "{{ env_var('TEST_SEVERITY', 'error') }}" - schema: "_tests" + +schema: "_tests" vars: version: "{{ env_var('VERSION') }}" diff --git a/products/pluto/pluto_build/01a_dbt_staging.sh b/products/pluto/pluto_build/01a_dbt_staging.sh index 72f8ff9c23..96c140e048 100755 --- a/products/pluto/pluto_build/01a_dbt_staging.sh +++ b/products/pluto/pluto_build/01a_dbt_staging.sh @@ -3,11 +3,11 @@ source ./bash/config.sh set_error_traps echo "Setup dbt" -dbt deps -dbt debug +cd .. +dbt deps --profiles-dir . +dbt debug --profiles-dir . echo "Loading DBT seeds (lookup tables)..." -cd .. dbt seed --profiles-dir . --target ${DBT_TARGET:-dev} if [ $? -ne 0 ]; then echo "ERROR: DBT seeds failed to load" diff --git a/products/pluto/pluto_build/07_custom_qaqc.sh b/products/pluto/pluto_build/07_custom_qaqc.sh index e80713f388..0dafd13651 100755 --- a/products/pluto/pluto_build/07_custom_qaqc.sh +++ b/products/pluto/pluto_build/07_custom_qaqc.sh @@ -3,10 +3,10 @@ source ./pluto_build/bash/config.sh # assuming this script is run from pluto/ set_error_traps echo "Test source tables" -dbt test --select "source:*" --exclude tag:de_check +dbt test --select "source:*" --exclude tag:de_check --profiles-dir . --target ${DBT_TARGET:-dev} echo "Build QAQC models (intermediate and reports)" -dbt build --select qaqc.intermediate qaqc.reports --exclude tag:de_check +dbt build --select qaqc.intermediate qaqc.reports --exclude tag:de_check --profiles-dir . --target ${DBT_TARGET:-dev} echo "πŸ”₯ Run DE aka important tests πŸ”₯" -dbt test --select tag:de_check,tag:$VERSION_TYPE # this will only run tests that have both tags, not just one of them +dbt test --select tag:de_check,tag:$VERSION_TYPE --profiles-dir . --target ${DBT_TARGET:-dev} # this will only run tests that have both tags, not just one of them diff --git a/products/pluto/seeds/_seeds.yml b/products/pluto/seeds/_seeds.yml index c026046f0d..0d4b20433c 100644 --- a/products/pluto/seeds/_seeds.yml +++ b/products/pluto/seeds/_seeds.yml @@ -2,6 +2,10 @@ version: 2 seeds: - name: ignored_bbls_for_unit_count_test + description: | + List of manually researched bbls that have correct count of residential units in DOF PTS data, + despite failing `assert_condo_bbl_unit_count_research_required` test. These records are ignored + during the test. description: BBLs to ignore in unit count test - name: pluto_input_research @@ -45,10 +49,6 @@ seeds: - name: zoning_district_class_descriptions description: Zoning district classification descriptions - description: | - List of manually researched bbls that have correct count of residential units in DOF PTS data, - despite failing `assert_condo_bbl_unit_count_research_required` test. These records are ignored - during the test. config: column_types: bbl: text From 8726179007832084193059d958dd66940013a66a Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 19 Feb 2026 13:38:24 -0500 Subject: [PATCH 11/26] Remove duplicate sources in _sources.yml - Removed duplicate pluto_pts entry (was in recipe_sources twice) - Removed duplicate dcp_zoningdistricts entry (was in recipe_sources and build_sources) Fixes dbt compilation error about duplicate source names --- products/pluto/models/_sources.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index 696b2c8f52..cb1a590d30 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -81,8 +81,6 @@ sources: ] - name: date_complete - - name: pluto_pts - - name: build_sources schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" tables: @@ -96,5 +94,4 @@ sources: - name: previous_pluto - name: export_pluto description: final PLUTO table - - name: dcp_zoningdistricts - name: dcp_zoning_maxfar From c5681baffbd0f936ca5d8d8bf55d0609240d1260 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 19 Feb 2026 14:11:33 -0500 Subject: [PATCH 12/26] Fix seed column type errors for BBL fields - Add column_types config for ignored_bbls_for_unit_count_test (bbl, pluto_version as text) - Add column_types config for pluto_input_research (bbl as text) - Add column_types config for pluto_input_condolot_descriptiveattributes (condno, parid as text) - Remove incorrect column_types from zoning_district_class_descriptions - Fixes 'integer out of range' errors when loading BBL values --- products/pluto/seeds/_seeds.yml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/products/pluto/seeds/_seeds.yml b/products/pluto/seeds/_seeds.yml index 0d4b20433c..31c090d05d 100644 --- a/products/pluto/seeds/_seeds.yml +++ b/products/pluto/seeds/_seeds.yml @@ -6,10 +6,16 @@ seeds: List of manually researched bbls that have correct count of residential units in DOF PTS data, despite failing `assert_condo_bbl_unit_count_research_required` test. These records are ignored during the test. - description: BBLs to ignore in unit count test + config: + column_types: + bbl: text + pluto_version: text - name: pluto_input_research description: Manual corrections and overrides for PLUTO data (27k rows) + config: + column_types: + bbl: text columns: - name: bbl description: Borough-Block-Lot identifier @@ -34,6 +40,10 @@ seeds: - name: pluto_input_condolot_descriptiveattributes description: Descriptive attributes for condo lots (8.3k rows) + config: + column_types: + condno: text + parid: text - name: pluto_input_condo_bldgclass description: Condo building class codes @@ -49,7 +59,3 @@ seeds: - name: zoning_district_class_descriptions description: Zoning district classification descriptions - config: - column_types: - bbl: text - pluto_version: text From 1e4c000441b31b505550008a94ce47a4f4db4ae1 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 19 Feb 2026 14:22:01 -0500 Subject: [PATCH 13/26] Fix column name case in pluto_input_condolot_descriptiveattributes - Change condno -> CondNO and parid -> PARID to match CSV header - Fixes integer out of range error --- products/pluto/seeds/_seeds.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/products/pluto/seeds/_seeds.yml b/products/pluto/seeds/_seeds.yml index 31c090d05d..dd4973a9ee 100644 --- a/products/pluto/seeds/_seeds.yml +++ b/products/pluto/seeds/_seeds.yml @@ -42,8 +42,8 @@ seeds: description: Descriptive attributes for condo lots (8.3k rows) config: column_types: - condno: text - parid: text + CondNO: text + PARID: text - name: pluto_input_condo_bldgclass description: Condo building class codes From 296010ce382a58a52ccc78231c64416333635bea Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 19 Feb 2026 15:32:06 -0500 Subject: [PATCH 14/26] change pluto_input_research to ref --- products/pluto/models/staging/stg__pluto_input_research.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/pluto/models/staging/stg__pluto_input_research.sql b/products/pluto/models/staging/stg__pluto_input_research.sql index 463810d23a..d873803425 100644 --- a/products/pluto/models/staging/stg__pluto_input_research.sql +++ b/products/pluto/models/staging/stg__pluto_input_research.sql @@ -1,2 +1,2 @@ SELECT * -FROM {{ source('recipe_sources', 'pluto_input_research') }} +FROM {{ ref('pluto_input_research') }} From 792faed224b7e991d8c718e841b2c9e1606b62aa Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 19 Feb 2026 16:44:28 -0500 Subject: [PATCH 15/26] model fixups --- products/pluto/models/_sources.yml | 2 +- products/pluto/models/staging/stg__dcp_colp.sql | 3 +-- .../stg__dcp_gis_mandatory_inclusionary_housing.sql | 13 ------------- products/pluto/models/staging/stg__dcp_mappluto.sql | 5 ++--- .../pluto/models/staging/stg__dcp_transit_zones.sql | 13 ------------- products/pluto/models/staging/stg__dof_condo.sql | 6 +----- products/pluto/models/staging/stg__pluto_pts.sql | 6 +----- .../pluto/models/staging/stg__previous_pluto.sql | 3 +-- 8 files changed, 7 insertions(+), 44 deletions(-) delete mode 100644 products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql delete mode 100644 products/pluto/models/staging/stg__dcp_transit_zones.sql diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index cb1a590d30..096eaed8e2 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -9,7 +9,7 @@ sources: - name: pluto_input_geocodes - name: pluto_input_numbldgs - name: pluto_pts - - name: dcp_mappluto + - name: previous_pluto - name: lpc_landmarks - name: lpc_historic_districts - name: dcp_edesignation diff --git a/products/pluto/models/staging/stg__dcp_colp.sql b/products/pluto/models/staging/stg__dcp_colp.sql index 808a4da79c..41b3089ed3 100644 --- a/products/pluto/models/staging/stg__dcp_colp.sql +++ b/products/pluto/models/staging/stg__dcp_colp.sql @@ -8,6 +8,5 @@ }} SELECT - *, - wkb_geometry AS geom + * FROM {{ source('recipe_sources', 'dcp_colp') }} diff --git a/products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql b/products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql deleted file mode 100644 index 20775470a0..0000000000 --- a/products/pluto/models/staging/stg__dcp_gis_mandatory_inclusionary_housing.sql +++ /dev/null @@ -1,13 +0,0 @@ -{{ - config( - materialized='table', - indexes=[ - {'columns': ['geom'], 'type': 'gist'} - ] - ) -}} - -SELECT - *, - wkb_geometry AS geom -FROM {{ source('recipe_sources', 'dcp_gis_mandatory_inclusionary_housing') }} diff --git a/products/pluto/models/staging/stg__dcp_mappluto.sql b/products/pluto/models/staging/stg__dcp_mappluto.sql index f8a5f87add..643125f6fe 100644 --- a/products/pluto/models/staging/stg__dcp_mappluto.sql +++ b/products/pluto/models/staging/stg__dcp_mappluto.sql @@ -8,6 +8,5 @@ }} SELECT - *, - wkb_geometry AS geom -FROM {{ source('recipe_sources', 'dcp_mappluto') }} + * +FROM {{ source('recipe_sources', 'previous_pluto') }} diff --git a/products/pluto/models/staging/stg__dcp_transit_zones.sql b/products/pluto/models/staging/stg__dcp_transit_zones.sql deleted file mode 100644 index b2c0c7d7d7..0000000000 --- a/products/pluto/models/staging/stg__dcp_transit_zones.sql +++ /dev/null @@ -1,13 +0,0 @@ -{{ - config( - materialized='table', - indexes=[ - {'columns': ['geom'], 'type': 'gist'} - ] - ) -}} - -SELECT - *, - wkb_geometry AS geom -FROM {{ source('recipe_sources', 'dcp_transit_zones') }} diff --git a/products/pluto/models/staging/stg__dof_condo.sql b/products/pluto/models/staging/stg__dof_condo.sql index 8fd8c178b3..a4d6729fe2 100644 --- a/products/pluto/models/staging/stg__dof_condo.sql +++ b/products/pluto/models/staging/stg__dof_condo.sql @@ -1,13 +1,9 @@ {{ config( materialized='table', - indexes=[ - {'columns': ['geom'], 'type': 'gist'} - ] ) }} SELECT - *, - wkb_geometry AS geom + * FROM {{ source('recipe_sources', 'dof_condo') }} diff --git a/products/pluto/models/staging/stg__pluto_pts.sql b/products/pluto/models/staging/stg__pluto_pts.sql index 97ef76b7d5..9d9d595767 100644 --- a/products/pluto/models/staging/stg__pluto_pts.sql +++ b/products/pluto/models/staging/stg__pluto_pts.sql @@ -1,13 +1,9 @@ {{ config( materialized='table', - indexes=[ - {'columns': ['geom'], 'type': 'gist'} - ] ) }} SELECT - *, - wkb_geometry AS geom + * FROM {{ source('recipe_sources', 'pluto_pts') }} diff --git a/products/pluto/models/staging/stg__previous_pluto.sql b/products/pluto/models/staging/stg__previous_pluto.sql index 1eeeda3831..7b9779aa19 100644 --- a/products/pluto/models/staging/stg__previous_pluto.sql +++ b/products/pluto/models/staging/stg__previous_pluto.sql @@ -9,6 +9,5 @@ -- Previous version of PLUTO for change detection SELECT - *, - wkb_geometry AS geom + * FROM {{ source('build_sources', 'previous_pluto') }} From 97d8740b8717667e9951986ec0723e67bc202e40 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 24 Feb 2026 13:07:59 -0500 Subject: [PATCH 16/26] Fix seed schema and staging model references - Change seeds schema from 'public' to BUILD_ENGINE_SCHEMA to match build scripts - Update stg__pluto_input_research to reference seed with ref() instead of source() - Ensures build scripts can find seed tables in the correct schema --- products/pluto/dbt_project.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/pluto/dbt_project.yml b/products/pluto/dbt_project.yml index 091b14b844..4f06134430 100644 --- a/products/pluto/dbt_project.yml +++ b/products/pluto/dbt_project.yml @@ -8,7 +8,7 @@ test-paths: [ "tests" ] seeds: pluto: +quote_columns: true - +schema: public + +schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" tests: +store_failures: true From 26e2b59956f73f5fd1befbe6e8f599dd04f882ed Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 24 Feb 2026 14:34:26 -0500 Subject: [PATCH 17/26] Remove unnecessary schema override from PLUTO seeds - Seeds were loading to doubled schema (target_schema + custom_schema) - dbt automatically uses BUILD_ENGINE_SCHEMA from profiles.yml as target - Removing +schema config fixes: ar_dbtify_pluto_staging_models_ar_dbtify_pluto_staging_models -> ar_dbtify_pluto_staging_models - Matches green_fast_track pattern Closes data-engineering-n58.5 --- products/pluto/dbt_project.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/products/pluto/dbt_project.yml b/products/pluto/dbt_project.yml index 4f06134430..ad6fdf80d1 100644 --- a/products/pluto/dbt_project.yml +++ b/products/pluto/dbt_project.yml @@ -8,7 +8,6 @@ test-paths: [ "tests" ] seeds: pluto: +quote_columns: true - +schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" tests: +store_failures: true From 7653bc493b30e0a51ea6fc5adc4934dea7dad38d Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 26 Feb 2026 14:16:50 -0500 Subject: [PATCH 18/26] kill 00_setup.sh. We don't actually run this. Thankfully --- products/pluto/pluto_build/00_setup.sh | 13 ------------- products/pluto/pluto_build/README.md | 2 -- 2 files changed, 15 deletions(-) delete mode 100755 products/pluto/pluto_build/00_setup.sh diff --git a/products/pluto/pluto_build/00_setup.sh b/products/pluto/pluto_build/00_setup.sh deleted file mode 100755 index 6b0ba3d1e4..0000000000 --- a/products/pluto/pluto_build/00_setup.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -source ./bash/config.sh -set_error_traps - -run_sql_command " -DO \$\$ DECLARE - r RECORD; -BEGIN - FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public' and tablename !='spatial_ref_sys') LOOP - EXECUTE 'DROP TABLE IF EXISTS ' || quote_ident(r.tablename) || ' CASCADE'; - END LOOP; -END \$\$; -" diff --git a/products/pluto/pluto_build/README.md b/products/pluto/pluto_build/README.md index 2f59814a69..6c7fef09e1 100644 --- a/products/pluto/pluto_build/README.md +++ b/products/pluto/pluto_build/README.md @@ -37,8 +37,6 @@ As of Phase 1 of the DBT migration, **01a_dbt_staging.sh must run before 02_buil ## Running the Build ```bash -# Full build sequence (after recipe loads data): -./00_setup.sh ./01a_dbt_staging.sh # πŸ†• DBT seeds + staging models ./02_build.sh ./03_corrections.sh From e1e364ddadaa8283540903b955e7bb9204311a24 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 26 Feb 2026 15:04:07 -0500 Subject: [PATCH 19/26] refactor pluto_input_geocodes to avoid table altering --- products/pluto/models/_sources.yml | 2 +- .../models/staging/stg__pluto_input_geocodes.sql | 2 +- products/pluto/pluto_build/02_build.sh | 1 + .../sql/create_pluto_input_geocodes.sql | 14 ++++++++++++++ products/pluto/pluto_build/sql/create_rpad_geo.sql | 10 ---------- products/pluto/recipe.yml | 1 + 6 files changed, 18 insertions(+), 12 deletions(-) create mode 100644 products/pluto/pluto_build/sql/create_pluto_input_geocodes.sql diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index 096eaed8e2..a28d697c5e 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -6,7 +6,7 @@ sources: tables: - name: pluto_input_research - name: pluto_input_cama_dof - - name: pluto_input_geocodes + - name: pluto_input_geocodes_raw - name: pluto_input_numbldgs - name: pluto_pts - name: previous_pluto diff --git a/products/pluto/models/staging/stg__pluto_input_geocodes.sql b/products/pluto/models/staging/stg__pluto_input_geocodes.sql index 95bec1b0e0..c3812677fb 100644 --- a/products/pluto/models/staging/stg__pluto_input_geocodes.sql +++ b/products/pluto/models/staging/stg__pluto_input_geocodes.sql @@ -1,2 +1,2 @@ SELECT * -FROM {{ source('recipe_sources', 'pluto_input_geocodes') }} +FROM {{ source('recipe_sources', 'pluto_input_geocodes_raw') }} diff --git a/products/pluto/pluto_build/02_build.sh b/products/pluto/pluto_build/02_build.sh index 5657da5a6e..3b55ad9e9b 100755 --- a/products/pluto/pluto_build/02_build.sh +++ b/products/pluto/pluto_build/02_build.sh @@ -5,6 +5,7 @@ set_error_traps echo "Starting to build PLUTO ..." run_sql_file sql/preprocessing.sql run_sql_file sql/create_pts.sql +run_sql_file sql/create_pluto_input_geocodes.sql run_sql_file sql/create_rpad_geo.sql echo 'Making DCP edits to RPAD...' diff --git a/products/pluto/pluto_build/sql/create_pluto_input_geocodes.sql b/products/pluto/pluto_build/sql/create_pluto_input_geocodes.sql new file mode 100644 index 0000000000..c033fbe41b --- /dev/null +++ b/products/pluto/pluto_build/sql/create_pluto_input_geocodes.sql @@ -0,0 +1,14 @@ +-- Create pluto_input_geocodes from DBT staging model with transformations +DROP TABLE IF EXISTS pluto_input_geocodes; +CREATE TABLE pluto_input_geocodes AS +SELECT * FROM stg__pluto_input_geocodes; + +ALTER TABLE pluto_input_geocodes RENAME bbl TO geo_bbl; +ALTER TABLE pluto_input_geocodes ADD COLUMN xcoord text; +ALTER TABLE pluto_input_geocodes ADD COLUMN ycoord text; + +UPDATE pluto_input_geocodes +SET + xcoord = ST_X(ST_TRANSFORM(geom, 2263))::integer, + ycoord = ST_Y(ST_TRANSFORM(geom, 2263))::integer, + ct2010 = (CASE WHEN ct2010::numeric = 0 THEN NULL ELSE ct2010 END); diff --git a/products/pluto/pluto_build/sql/create_rpad_geo.sql b/products/pluto/pluto_build/sql/create_rpad_geo.sql index 606248435f..c722471308 100644 --- a/products/pluto/pluto_build/sql/create_rpad_geo.sql +++ b/products/pluto/pluto_build/sql/create_rpad_geo.sql @@ -1,14 +1,4 @@ -- getting distinct BBLs FROM raw data -ALTER TABLE pluto_input_geocodes RENAME bbl TO geo_bbl; -ALTER TABLE pluto_input_geocodes ADD COLUMN xcoord text; -ALTER TABLE pluto_input_geocodes ADD COLUMN ycoord text; - -UPDATE pluto_input_geocodes -SET - xcoord = ST_X(ST_TRANSFORM(geom, 2263))::integer, - ycoord = ST_Y(ST_TRANSFORM(geom, 2263))::integer, - ct2010 = (CASE WHEN ct2010::numeric = 0 THEN NULL ELSE ct2010 END); - DROP TABLE IF EXISTS pluto_rpad_geo; CREATE TABLE pluto_rpad_geo AS ( WITH pluto_rpad_rownum AS ( diff --git a/products/pluto/recipe.yml b/products/pluto/recipe.yml index e760b30600..5435a25a0a 100644 --- a/products/pluto/recipe.yml +++ b/products/pluto/recipe.yml @@ -42,5 +42,6 @@ inputs: - name: lpc_landmarks - name: pluto_input_cama_dof - name: pluto_input_geocodes + import_as: pluto_input_geocodes_raw - name: pluto_input_numbldgs - name: pluto_pts From 866b9aeaeadae764470c0c1bba1d920ea30efa47 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Fri, 27 Feb 2026 11:14:57 -0500 Subject: [PATCH 20/26] Ok, let's try recipe caching again! --- .github/workflows/pluto_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pluto_build.yml b/.github/workflows/pluto_build.yml index 7556736f84..c72ce89451 100644 --- a/.github/workflows/pluto_build.yml +++ b/.github/workflows/pluto_build.yml @@ -81,7 +81,7 @@ jobs: - name: Dataloading working-directory: products/pluto run: python3 -m dcpy lifecycle builds load load --recipe-path ${{ inputs.recipe_file - }}.lock.yml + }}.lock.yml --cache-schema recipe_cache --cached-entity-type view - name: Load Local Data and DBT Staging run: ./01a_dbt_staging.sh From 27253fb840bb1d2b34351556dbf8d1bca0933b79 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Mon, 2 Mar 2026 16:46:33 -0500 Subject: [PATCH 21/26] downcase the seed files --- products/pluto/seeds/dcp_zoning_maxfar.csv | 4 ++-- products/pluto/seeds/lookup_bldgclass.csv | 2 +- products/pluto/seeds/pluto_input_condo_bldgclass.csv | 4 ++-- .../seeds/pluto_input_condolot_descriptiveattributes.csv | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/products/pluto/seeds/dcp_zoning_maxfar.csv b/products/pluto/seeds/dcp_zoning_maxfar.csv index 3b5b2298ce..2ca6237b54 100644 --- a/products/pluto/seeds/dcp_zoning_maxfar.csv +++ b/products/pluto/seeds/dcp_zoning_maxfar.csv @@ -1,4 +1,4 @@ -ο»Ώzonedist,Contextual,zoningdistricttype,ResDistEq,residfar,AffResFAR,facilfar,commfar,mnffar,verified +ο»Ώzonedist,contextual,zoningdistricttype,resdisteq,residfar,affresfar,facilfar,commfar,mnffar,verified BALL FIELD,-,-,,0,0,0,0,0, BPC,-,-,,0,0,0,0,0,y C1-6,Non-contextual,Local Retail and Service,R7-2,3.44,5.01,6.5,2,0,y @@ -218,4 +218,4 @@ R9A,Contextual,High-Density Residence District,,7.52,9.02,7.5,0,0,y R9X,Contextual,High-Density Residence District,,9,10.8,9,0,0,y R11,Non-contextual,High-Density Residence District,,12,15,12,0,0,y R11A,Contextual,High-Density Residence District,,12,15,12,0,0,y -R12,Non-contextual,High-Density Residence District,,15,18,15,0,0,y \ No newline at end of file +R12,Non-contextual,High-Density Residence District,,15,18,15,0,0,y diff --git a/products/pluto/seeds/lookup_bldgclass.csv b/products/pluto/seeds/lookup_bldgclass.csv index ef0aabb2a7..a61c31df28 100644 --- a/products/pluto/seeds/lookup_bldgclass.csv +++ b/products/pluto/seeds/lookup_bldgclass.csv @@ -1,4 +1,4 @@ -CodedValue,CodeDescrip +codedvalue,codedescrip A,One Family Dwellings A0,One Family Dwellings - Cape Cod A1,One Family Dwellings - Two Stories Detached diff --git a/products/pluto/seeds/pluto_input_condo_bldgclass.csv b/products/pluto/seeds/pluto_input_condo_bldgclass.csv index b9dc9dc70c..1b598cb7a1 100644 --- a/products/pluto/seeds/pluto_input_condo_bldgclass.csv +++ b/products/pluto/seeds/pluto_input_condo_bldgclass.csv @@ -1,4 +1,4 @@ -Code,Description,Type,DCP created,Logic for codes created by DCP +code,description,type,dcp created,logic for codes created by DCP RG, Indoor Parking,,, RP,Outdoor Parking,,, RS,Non-Business Storage Space,,, @@ -24,4 +24,4 @@ R3,Residential Unit in 1-3 Story Bldg,Res,, R4, Residential Unit in Elevator Bldg,Res,, R6,Residential Unit of 1-3 Unit Bldg-Orig Class 1,Res,, RR,Condominium Rentals,Res,, -R0, Condo Billing Lot,,, \ No newline at end of file +R0, Condo Billing Lot,,, diff --git a/products/pluto/seeds/pluto_input_condolot_descriptiveattributes.csv b/products/pluto/seeds/pluto_input_condolot_descriptiveattributes.csv index 8f32a19889..3acd1ab0a7 100644 --- a/products/pluto/seeds/pluto_input_condolot_descriptiveattributes.csv +++ b/products/pluto/seeds/pluto_input_condolot_descriptiveattributes.csv @@ -1,4 +1,4 @@ -CondNO,Boro,PARID,BC,TC,LandSize,Story,YearBuilt +condno,boro,parid,bc,tc,landsize,story,yearbuilt 1,1,1014387501,R0,2,"35,147",34,1965 3,1,1005767501,R0,2,"8,982",14,1966 4,1,1012257501,R0,2,"1,711",3,1968 @@ -8338,4 +8338,4 @@ CondNO,Boro,PARID,BC,TC,LandSize,Story,YearBuilt 200,5,5046337502,R0,2,"15,700",2,2016 201,5,5033437501,R0,2,"3,413",2,2016 203,5,5065797501,R0,2,"14,000",2,2016 -1918,5,5019657501,R0,2,"34,992",2,2015 \ No newline at end of file +1918,5,5019657501,R0,2,"34,992",2,2015 From 2102e9f27b96913f629c269b2cd22f5139ecab44 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 3 Mar 2026 12:52:57 -0500 Subject: [PATCH 22/26] Fix pluto seed script to run from products/pluto directory - Use SCRIPT_DIR to find bash/config.sh relative to script location - Remove 'cd ..' and 'cd pluto_build' navigation - Fix column name case in _seeds.yml (CondNO -> condno, PARID -> parid) - Script now runs successfully from products/pluto directory --- .github/workflows/pluto_build.yml | 1 + products/pluto/pluto_build/01a_dbt_staging.sh | 6 ++---- products/pluto/seeds/_seeds.yml | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pluto_build.yml b/.github/workflows/pluto_build.yml index c72ce89451..f205bb2456 100644 --- a/.github/workflows/pluto_build.yml +++ b/.github/workflows/pluto_build.yml @@ -84,6 +84,7 @@ jobs: }}.lock.yml --cache-schema recipe_cache --cached-entity-type view - name: Load Local Data and DBT Staging + working-directory: products/pluto run: ./01a_dbt_staging.sh - name: building ... diff --git a/products/pluto/pluto_build/01a_dbt_staging.sh b/products/pluto/pluto_build/01a_dbt_staging.sh index 96c140e048..fc5d8fc3cb 100755 --- a/products/pluto/pluto_build/01a_dbt_staging.sh +++ b/products/pluto/pluto_build/01a_dbt_staging.sh @@ -1,9 +1,9 @@ #!/bin/bash -source ./bash/config.sh +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$SCRIPT_DIR/bash/config.sh" set_error_traps echo "Setup dbt" -cd .. dbt deps --profiles-dir . dbt debug --profiles-dir . @@ -22,5 +22,3 @@ if [ $? -ne 0 ]; then exit 1 fi echo "βœ“ DBT staging models materialized successfully" - -cd pluto_build diff --git a/products/pluto/seeds/_seeds.yml b/products/pluto/seeds/_seeds.yml index dd4973a9ee..31c090d05d 100644 --- a/products/pluto/seeds/_seeds.yml +++ b/products/pluto/seeds/_seeds.yml @@ -42,8 +42,8 @@ seeds: description: Descriptive attributes for condo lots (8.3k rows) config: column_types: - CondNO: text - PARID: text + condno: text + parid: text - name: pluto_input_condo_bldgclass description: Condo building class codes From 039c065ee47fb3eebb1f3ec71ec74128a6299715 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 3 Mar 2026 13:35:21 -0500 Subject: [PATCH 23/26] fix gha script location --- .github/workflows/pluto_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pluto_build.yml b/.github/workflows/pluto_build.yml index f205bb2456..efcf1db602 100644 --- a/.github/workflows/pluto_build.yml +++ b/.github/workflows/pluto_build.yml @@ -85,7 +85,7 @@ jobs: - name: Load Local Data and DBT Staging working-directory: products/pluto - run: ./01a_dbt_staging.sh + run: ./pluto_build/01a_dbt_staging.sh - name: building ... run: ./02_build.sh From a64bafd4393a930ac7d380674e96538b75250743 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 3 Mar 2026 14:51:16 -0500 Subject: [PATCH 24/26] fix seed types --- products/pluto/seeds/_seeds.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/products/pluto/seeds/_seeds.yml b/products/pluto/seeds/_seeds.yml index 31c090d05d..680c5f279e 100644 --- a/products/pluto/seeds/_seeds.yml +++ b/products/pluto/seeds/_seeds.yml @@ -50,6 +50,11 @@ seeds: - name: pluto_input_bsmtcode description: Basement type codes and classifications + config: + column_types: + bsmnt_type: text + bsmntgradient: text + bsmtcode: text - name: lookup_bldgclass description: Building class lookup table with descriptions From d8972477007c8840f8520ae172351916ddd02928 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 3 Mar 2026 16:40:18 -0500 Subject: [PATCH 25/26] remaining src -> stg tables --- products/pluto/pluto_build/sql/dtmmergepolygons.sql | 2 +- products/pluto/pluto_build/sql/plutomapid.sql | 2 +- products/pluto/pluto_build/sql/zoning_zoningdistrict.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/products/pluto/pluto_build/sql/dtmmergepolygons.sql b/products/pluto/pluto_build/sql/dtmmergepolygons.sql index 6f6023483d..d4179debdf 100644 --- a/products/pluto/pluto_build/sql/dtmmergepolygons.sql +++ b/products/pluto/pluto_build/sql/dtmmergepolygons.sql @@ -1,7 +1,7 @@ --copy condo table from source DOF DROP TABLE IF EXISTS pluto_dtm; CREATE TABLE pluto_dtm AS ( - SELECT * FROM dof_dtm + SELECT * FROM stg__dof_dtm ); ALTER TABLE pluto_dtm ADD COLUMN primebbl text; diff --git a/products/pluto/pluto_build/sql/plutomapid.sql b/products/pluto/pluto_build/sql/plutomapid.sql index 95452bac5e..775aa8961d 100644 --- a/products/pluto/pluto_build/sql/plutomapid.sql +++ b/products/pluto/pluto_build/sql/plutomapid.sql @@ -28,5 +28,5 @@ SELECT ST_SUBDIVIDE(ST_MAKEVALID(geom), 100) AS geom INTO dof_shoreline_subdivide FROM ( SELECT ST_UNION(geom) AS geom - FROM dof_shoreline + FROM stg__dof_shoreline ) AS a; diff --git a/products/pluto/pluto_build/sql/zoning_zoningdistrict.sql b/products/pluto/pluto_build/sql/zoning_zoningdistrict.sql index d8314cc2c9..c3effd47c8 100644 --- a/products/pluto/pluto_build/sql/zoning_zoningdistrict.sql +++ b/products/pluto/pluto_build/sql/zoning_zoningdistrict.sql @@ -26,7 +26,7 @@ CREATE TABLE validzones AS ( ELSE zonedist END AS zonedist, ST_MAKEVALID(geom) AS geom - FROM dcp_zoningdistricts + FROM stg__dcp_zoningdistricts WHERE ST_GEOMETRYTYPE(ST_MAKEVALID(geom)) = 'ST_MultiPolygon' ); From 026e50b745408482c4ea2ecde8334a44a1660225 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 4 Mar 2026 13:42:16 -0500 Subject: [PATCH 26/26] fix pluto_input_research seed --- products/pluto/models/_sources.yml | 1 - .../qaqc_int__active_condo_bbl_unitsres_corrections.sql | 2 +- products/pluto/seeds/_seeds.yml | 5 +++-- products/pluto/seeds/pluto_input_research.csv | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/products/pluto/models/_sources.yml b/products/pluto/models/_sources.yml index a28d697c5e..e3cd6046d4 100644 --- a/products/pluto/models/_sources.yml +++ b/products/pluto/models/_sources.yml @@ -4,7 +4,6 @@ sources: - name: recipe_sources schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" tables: - - name: pluto_input_research - name: pluto_input_cama_dof - name: pluto_input_geocodes_raw - name: pluto_input_numbldgs diff --git a/products/pluto/models/qaqc/intermediate/qaqc_int__active_condo_bbl_unitsres_corrections.sql b/products/pluto/models/qaqc/intermediate/qaqc_int__active_condo_bbl_unitsres_corrections.sql index 8f6ca0563d..1b9db7e659 100644 --- a/products/pluto/models/qaqc/intermediate/qaqc_int__active_condo_bbl_unitsres_corrections.sql +++ b/products/pluto/models/qaqc/intermediate/qaqc_int__active_condo_bbl_unitsres_corrections.sql @@ -6,7 +6,7 @@ WITH historical_condo_unit_corrections AS ( SELECT bbl, old_value::numeric - FROM {{ source("recipe_sources", "pluto_input_research") }} + FROM {{ ref("pluto_input_research") }} WHERE field = 'unitsres' AND substring(bbl, 7, 2) = '75' diff --git a/products/pluto/seeds/_seeds.yml b/products/pluto/seeds/_seeds.yml index 680c5f279e..47d12c4e57 100644 --- a/products/pluto/seeds/_seeds.yml +++ b/products/pluto/seeds/_seeds.yml @@ -10,12 +10,13 @@ seeds: column_types: bbl: text pluto_version: text - + - name: pluto_input_research - description: Manual corrections and overrides for PLUTO data (27k rows) + description: Manual corrections and overrides for PLUTO data config: column_types: bbl: text + type: text columns: - name: bbl description: Borough-Block-Lot identifier diff --git a/products/pluto/seeds/pluto_input_research.csv b/products/pluto/seeds/pluto_input_research.csv index 9b7e08ae1c..8b68b1c807 100644 --- a/products/pluto/seeds/pluto_input_research.csv +++ b/products/pluto/seeds/pluto_input_research.csv @@ -1,4 +1,4 @@ -bbl,field,old_value,new_value,Type,reason,version +bbl,field,old_value,new_value,type,reason,version 1000070027,yearbuilt,1900,1828,2,LPC year built,20v1 1000070028,yearbuilt,1920,1828,2,LPC year built,20v1 1000070029,yearbuilt,1900,1828,2,LPC year built,20v1