From 149f13e811e4c769e78c3605c22e665ade3a2900 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Fri, 27 Feb 2026 12:46:28 -0500 Subject: [PATCH 01/13] CSCL: prod data_loader fix --- products/cscl/poc_validation/prod_data_loader.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/products/cscl/poc_validation/prod_data_loader.py b/products/cscl/poc_validation/prod_data_loader.py index 64fccda0c7..e7433a9e1d 100644 --- a/products/cscl/poc_validation/prod_data_loader.py +++ b/products/cscl/poc_validation/prod_data_loader.py @@ -156,16 +156,17 @@ def _load( """ Primary purpose is to load production outputs for comparison to outputs of this pipeline """ - if not version or local: + if not (version or local): raise Exception( "Either specify loading locally with '-l' flag or specify version to pull from s3 with '-v' flag" ) - for dataset in datasets: - file_name = datasets_by_name[dataset].file_name - s3.download_file( - "edm-private", f"cscl_etl/{version}/{file_name}", local_folder / file_name - ) + if not local: + for dataset in datasets: + file_name = datasets_by_name[dataset].file_name + s3.download_file( + "edm-private", f"cscl_etl/{version}/{file_name}", local_folder / file_name + ) load_datasets(datasets, local_folder) From d02806a33845c40b93c75cf84edd850b2539e908 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Fri, 27 Feb 2026 12:47:49 -0500 Subject: [PATCH 02/13] bump cscl version --- products/cscl/recipe.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/cscl/recipe.yml b/products/cscl/recipe.yml index f4d2bf5bf4..6333c7bcc8 100644 --- a/products/cscl/recipe.yml +++ b/products/cscl/recipe.yml @@ -1,6 +1,6 @@ name: CSCL product: cscl -version: 25d +version: 26a inputs: dataset_defaults: From fc4e1e830a03734e34c86dd8c6c77cfe140230d3 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 24 Feb 2026 14:56:39 -0500 Subject: [PATCH 03/13] Create text formatting CSV for thinlion - Added text_formatting__thinlion_dat.csv with 40 field specifications - Field numbers use TL1-TL40 format from spec - Mapped justify_and_fill from format field (RJZF default if blank) - Set blank_if_none=TRUE for fields with 'blank if none/empty' format - Includes position, length, and label for each field Closes data-engineering-vwb.3 --- .../text_formatting__thinlion_dat.csv | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv diff --git a/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv b/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv new file mode 100644 index 0000000000..9ca8919b40 --- /dev/null +++ b/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv @@ -0,0 +1,42 @@ +field_number,field_name,field_label,field_length,start_index,end_index,justify_and_fill,blank_if_none +TL1,borough,Borough,1,1,1,RJZF,FALSE +TL2,census_tract_2020,2020 Census Tract,6,2,7,RJZF,FALSE +TL3,dynamic_block,Dynamic Block,3,8,10,RJZF,FALSE +TL4,census_block_2020,2020 Census Block Basic,4,11,14,RJZF,FALSE +TL5,census_block_suffix_2020,2020 Census Block Suffix,1,15,15,RJZF,TRUE +TL6,census_tract_1990,1990 Census Tract,6,16,21,RJZF,FALSE +TL7,community_development_eligibility,Community Development Eligibility,1,22,22,RJZF,FALSE +TL8,community_district,Community District,3,23,25,RJZF,FALSE +TL9,minor_census_economic_area,Minor Census Economic Area,4,26,29,RJZF,FALSE +TL10,health_area,Health Area,4,30,33,RJZF,FALSE +TL11,health_center_district,Health Center District,2,34,35,RJZF,FALSE +TL12,police_patrol_borough_command,Police Patrol Borough Command,1,36,36,RJZF,FALSE +TL13,police_precinct,Police Precinct,3,37,39,RJZF,FALSE +TL14,water_block_mapping_suppression_flag,Water Block Mapping Suppression Flag,1,40,40,RJZF,FALSE +TL15,fire_company_type,Fire Company Type,1,41,41,RJZF,TRUE +TL16,fire_company_number,Fire Company Number,3,42,44,RJZF,TRUE +TL17,sanborn_borough_1,Sanborn Borough-1,1,45,45,RJZF,TRUE +TL18,sanborn_volume_1,Sanborn Volume-1,3,46,48,LJSF,TRUE +TL19,sanborn_page_1,Sanborn Page-1,4,49,52,LJSF,TRUE +TL20,sanborn_borough_2,Sanborn Borough-2, if any,1,53,53,RJZF,TRUE +TL21,sanborn_volume_2,Sanborn Volume-2, if any,3,54,56,LJSF,TRUE +TL22,sanborn_page_2,Sanborn Page-2, if any,4,57,60,LJSF,TRUE +TL23,sanborn_borough_3,Sanborn Borough-3, if any,1,61,61,RJZF,TRUE +TL24,sanborn_volume_3,Sanborn Volume-3, if any,3,62,64,LJSF,TRUE +TL25,sanborn_page_3,Sanborn Page-3, if any,4,65,68,LJSF,TRUE +TL26,census_tract_2000,2000 Census Tract,6,69,74,RJZF,FALSE +TL27,census_block_2000,2000 Census Block Basic,4,75,78,RJZF,FALSE +TL28,census_block_suffix_2000,2000 Census Block Suffix,1,79,79,RJZF,TRUE +TL29,assembly_district,Assembly District,2,81,82,RJZF,TRUE +TL30,election_district,Election District,3,84,86,RJZF,FALSE +TL31,hurricane_evacuation_zone,Hurricane Evacuation Zone,2,87,88,LJSF,FALSE +TL32,patrol_borough,Patrol Borough,2,89,90,LJSF,FALSE +TL33,police_sector,Police Sector,4,91,94,RJSF,FALSE +TL34,census_tract_2010_basic,2010 Census Tract Basic,4,95,98,RJSF,FALSE +TL34_1,census_tract_2010_suffix,2010 Census Tract Suffix,2,99,100,RJZF,TRUE +TL35,census_block_2010,2010 Census Block,4,101,104,RJZF,FALSE +TL36,census_block_suffix_2010,2010 Census Block Suffix,1,105,105,RJZF,TRUE +TL37,nta2020,NTA2020,6,106,111,RJZF,TRUE +TL38,cdta,CDTA,4,112,115,RJZF,TRUE +TL39,cwz,CWZ,4,116,119,RJSF,TRUE +TL40,puma2020,PUMA2020,4,120,123,RJZF,TRUE From e1ac51952cf3ba8061b25dd10609cff321a5190b Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 24 Feb 2026 16:27:27 -0500 Subject: [PATCH 04/13] Add ThinLION recipe exports --- products/cscl/recipe.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/products/cscl/recipe.yml b/products/cscl/recipe.yml index 6333c7bcc8..1065ef77ad 100644 --- a/products/cscl/recipe.yml +++ b/products/cscl/recipe.yml @@ -251,3 +251,25 @@ exports: filename: StatenIslandFace.txt format: dat custom: { formatting: face_code } + + # Thin LION + - name: bronx_thin_lion + filename: BronxThinLION.txt + format: dat + custom: { formatting: thinlion } + - name: brooklyn_thin_lion + filename: BrooklynThinLION.txt + format: dat + custom: { formatting: thinlion } + - name: manhattan_thin_lion + filename: ManhattanThinLION.txt + format: dat + custom: { formatting: thinlion } + - name: queens_thin_lion + filename: QueensThinLION.txt + format: dat + custom: { formatting: thinlion } + - name: staten_island_thin_lion + filename: StatenIslandThinLION.txt + format: dat + custom: { formatting: thinlion } From e63c25a5d9cdb3ac94c8b71aa9165b097869102e Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 25 Feb 2026 14:53:21 -0500 Subject: [PATCH 05/13] bookmarked --- products/cscl/models/staging/stg__atomicpolygons.sql | 1 + products/cscl/poc_validation/prod_data_loader.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/products/cscl/models/staging/stg__atomicpolygons.sql b/products/cscl/models/staging/stg__atomicpolygons.sql index 29f032a587..40b66c7d8a 100644 --- a/products/cscl/models/staging/stg__atomicpolygons.sql +++ b/products/cscl/models/staging/stg__atomicpolygons.sql @@ -10,6 +10,7 @@ SELECT borough AS borocode, censustract_2000, left(censustract_2000, 4)::INT AS censustract_2000_basic, + -- TODO: you might need this for thinlion outputs nullif(right(censustract_2000, 2), '00')::INT AS censustract_2000_suffix, censustract_2010, left(censustract_2010, 4)::INT AS censustract_2010_basic, diff --git a/products/cscl/poc_validation/prod_data_loader.py b/products/cscl/poc_validation/prod_data_loader.py index e7433a9e1d..4e49c64f45 100644 --- a/products/cscl/poc_validation/prod_data_loader.py +++ b/products/cscl/poc_validation/prod_data_loader.py @@ -170,7 +170,8 @@ def _load( load_datasets(datasets, local_folder) - boro_level_files = {"lion_dat", "face_code"} + boro_level_files = {"lion", "face_code"} + # bookmark for file in boro_level_files: if any(dataset.endswith(f"_{file}") for dataset in datasets): create_citywide_table(file) From e2a0af2d7f3430cc90bc87441992d4511f30962f Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 25 Feb 2026 15:05:01 -0500 Subject: [PATCH 06/13] add thin_lion output to recipe --- products/cscl/recipe.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/products/cscl/recipe.yml b/products/cscl/recipe.yml index 1065ef77ad..701658d29b 100644 --- a/products/cscl/recipe.yml +++ b/products/cscl/recipe.yml @@ -253,6 +253,10 @@ exports: custom: { formatting: face_code } # Thin LION + - name: thin_lion + filename: nyc.thinlion + format: dat + custom: { formatting: thinlion } - name: bronx_thin_lion filename: BronxThinLION.txt format: dat From 12a51e9e728062ca746af4be45c52178fb38e516 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Wed, 25 Feb 2026 15:28:57 -0500 Subject: [PATCH 07/13] scaffold of changes --- products/cscl/models/product/thinlion/_thinlion.yml | 0 .../cscl/models/product/thinlion/exports/thinlion_bronx.sql | 0 products/cscl/models/product/thinlion/thinlion_by_field.sql | 6 ++++++ 3 files changed, 6 insertions(+) create mode 100644 products/cscl/models/product/thinlion/_thinlion.yml create mode 100644 products/cscl/models/product/thinlion/exports/thinlion_bronx.sql create mode 100644 products/cscl/models/product/thinlion/thinlion_by_field.sql diff --git a/products/cscl/models/product/thinlion/_thinlion.yml b/products/cscl/models/product/thinlion/_thinlion.yml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql b/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/products/cscl/models/product/thinlion/thinlion_by_field.sql b/products/cscl/models/product/thinlion/thinlion_by_field.sql new file mode 100644 index 0000000000..88ec7eee9b --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_by_field.sql @@ -0,0 +1,6 @@ + +SELECT + {{ apply_text_formatting_from_seed('text_formatting__thinlion_dat') }} +FROM {{ ref("stg__atomic_polygons" ) }} + +-- maybe more intermediate stuff here. From 8d6ba750da717b7895e5f1a72c6fea860cce1433 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 26 Feb 2026 13:29:29 -0500 Subject: [PATCH 08/13] Add ThinLION product export implementation - Add 5 missing FGDB layers to recipe.yml (CensusTract2010, CensusTract2020, HealthArea, NYPDPrecinct, NYPDPatrolBorough) - Create staging models for new layers - Update stg__atomicpolygons with ThinLION-required fields (fire company, sanborn, water flag, etc) - Implement thinlion_by_field.sql with census/health/NYPD joins and spatial lookups - Create 5 borough-specific export files (Manhattan, Bronx, Brooklyn, Queens, Staten Island) - Add _thinlion.yml with column tests Closes de-lju.1, de-lju.2, de-lju.3, de-lju.4, de-lju.5, de-lju.6 --- .../models/product/thinlion/_thinlion.yml | 43 +++++++++++ .../thinlion/exports/thinlion_bronx.sql | 1 + .../thinlion/exports/thinlion_brooklyn.sql | 1 + .../thinlion/exports/thinlion_manhattan.sql | 1 + .../thinlion/exports/thinlion_queens.sql | 1 + .../exports/thinlion_statenisland.sql | 1 + .../product/thinlion/thinlion_by_field.sql | 74 ++++++++++++++++++- .../models/staging/stg__atomicpolygons.sql | 13 ++++ .../models/staging/stg__censustract2010.sql | 28 +++++++ .../models/staging/stg__censustract2020.sql | 29 ++++++++ .../cscl/models/staging/stg__healtharea.sql | 21 ++++++ .../models/staging/stg__nypdpatrolborough.sql | 19 +++++ .../cscl/models/staging/stg__nypdprecinct.sql | 19 +++++ products/cscl/recipe.yml | 25 +++++++ 14 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql create mode 100644 products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql create mode 100644 products/cscl/models/product/thinlion/exports/thinlion_queens.sql create mode 100644 products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql create mode 100644 products/cscl/models/staging/stg__censustract2010.sql create mode 100644 products/cscl/models/staging/stg__censustract2020.sql create mode 100644 products/cscl/models/staging/stg__healtharea.sql create mode 100644 products/cscl/models/staging/stg__nypdpatrolborough.sql create mode 100644 products/cscl/models/staging/stg__nypdprecinct.sql diff --git a/products/cscl/models/product/thinlion/_thinlion.yml b/products/cscl/models/product/thinlion/_thinlion.yml index e69de29bb2..5abf4fd63c 100644 --- a/products/cscl/models/product/thinlion/_thinlion.yml +++ b/products/cscl/models/product/thinlion/_thinlion.yml @@ -0,0 +1,43 @@ +version: 2 + +models: +- name: thinlion_by_field + columns: + - name: borough + data_type: string + tests: + - not_null + - accepted_values: + arguments: + values: [ "1", "2", "3", "4", "5" ] + - name: census_tract_2020 + data_type: string + tests: + - not_null + - name: dynamic_block + data_type: string + tests: + - not_null + - dbt_expectations.expect_column_value_lengths_to_equal: + arguments: { value: 3 } + +# Borough export models with text output +- name: thinlion_manhattan + columns: &dat_column_123 + - name: dat_column + data_type: string + tests: + - dbt_expectations.expect_column_value_lengths_to_equal: + arguments: { value: 123 } + +- name: thinlion_bronx + columns: *dat_column_123 + +- name: thinlion_brooklyn + columns: *dat_column_123 + +- name: thinlion_queens + columns: *dat_column_123 + +- name: thinlion_statenisland + columns: *dat_column_123 diff --git a/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql b/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql index e69de29bb2..9879a9d365 100644 --- a/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql +++ b/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_by_field', where="borough='2'") }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql b/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql new file mode 100644 index 0000000000..ea119df691 --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_by_field', where="borough='3'") }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql b/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql new file mode 100644 index 0000000000..c9bf802d82 --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_by_field', where="borough='1'") }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_queens.sql b/products/cscl/models/product/thinlion/exports/thinlion_queens.sql new file mode 100644 index 0000000000..af4fe95b8a --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_queens.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_by_field', where="borough='4'") }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql b/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql new file mode 100644 index 0000000000..ced79b9e86 --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_by_field', where="borough='5'") }} diff --git a/products/cscl/models/product/thinlion/thinlion_by_field.sql b/products/cscl/models/product/thinlion/thinlion_by_field.sql index 88ec7eee9b..900788e2b0 100644 --- a/products/cscl/models/product/thinlion/thinlion_by_field.sql +++ b/products/cscl/models/product/thinlion/thinlion_by_field.sql @@ -1,6 +1,76 @@ +WITH atomic_polygons_with_lookups AS ( + SELECT + ap.borocode AS borough, + ap.censustract_2020, + RIGHT(ap.atomicid, 3) AS dynamic_block, + ap.censusblock_2020_basic, + ap.censusblock_2020_suffix, + ap.censustract_1990, + ct2010.cd_eligibility AS community_development_eligibility, + ap.commdist AS community_district, + ct2010.mcea AS minor_census_economic_area, + ct2010.health_area, + ha.health_ct_district AS health_center_district, + NULL AS police_patrol_borough_command, -- TL12: NYPDPrecinct doesn't have this field + prec.precinct AS police_precinct, + ap.water_flag AS water_block_mapping_suppression_flag, + ap.fire_company_type, + ap.fire_company_number, + ap.borocode AS sanborn_borough_1, + ap.sb1_volume AS sanborn_volume_1, + ap.sb1_page AS sanborn_page_1, + ap.borocode AS sanborn_borough_2, + ap.sb2_volume AS sanborn_volume_2, + ap.sb2_page AS sanborn_page_2, + ap.borocode AS sanborn_borough_3, + ap.sb3_volume AS sanborn_volume_3, + ap.sb3_page AS sanborn_page_3, + ap.censustract_2000, + ap.censusblock_2000_basic, + ap.censusblock_2000_suffix, + ap.assemdist AS assembly_district, + ap.electdist AS election_district, + ap.hurricane_evacuation_zone, + CASE + WHEN pb.patrol_borough = 'Manhattan South' THEN '1' + WHEN pb.patrol_borough = 'Manhattan North' THEN '2' + WHEN pb.patrol_borough = 'Bronx' THEN '3' + WHEN pb.patrol_borough = 'Brooklyn South' THEN '4' + WHEN pb.patrol_borough = 'Brooklyn North' THEN '5' + WHEN pb.patrol_borough = 'Queens North' THEN '6' + WHEN pb.patrol_borough = 'Staten Island' THEN '7' + WHEN pb.patrol_borough = 'Queens South' THEN '8' + END AS patrol_borough, + beat.sector AS police_sector, + ap.censustract_2010_basic, + ap.censustract_2010_suffix, + ap.censusblock_2010_basic, + ap.censusblock_2010_suffix, + ct2020.neighborhood_code AS nta2020, + ct2020.cdta_code AS cdta, + ap.commercial_waste_zone AS cwz, + ct2020.puma AS puma2020 + FROM {{ ref("stg__atomicpolygons") }} ap + -- Join CensusTract2010 via concatenated key + LEFT JOIN {{ ref("stg__censustract2010") }} ct2010 + ON ap.borocode || ap.censustract_2010 = ct2010.boroct + -- Join CensusTract2020 via concatenated key + LEFT JOIN {{ ref("stg__censustract2020") }} ct2020 + ON ap.borocode || ap.censustract_2020 = ct2020.boroct + -- Join HealthArea via health_area from CensusTract2010 + LEFT JOIN {{ ref("stg__healtharea") }} ha + ON ct2010.health_area = ha.healtharea + -- Spatial joins using centroid point-in-polygon + LEFT JOIN {{ ref("stg__nypdprecinct") }} prec + ON ST_Within(ST_Centroid(ap.geom), prec.geom) + LEFT JOIN {{ ref("stg__nypdpatrolborough") }} pb + ON ST_Within(ST_Centroid(ap.geom), pb.geom) + LEFT JOIN {{ ref("stg__nypdbeat") }} beat + ON ST_Within(ST_Centroid(ap.geom), beat.geom) +) SELECT {{ apply_text_formatting_from_seed('text_formatting__thinlion_dat') }} -FROM {{ ref("stg__atomic_polygons" ) }} +FROM atomic_polygons_with_lookups +ORDER BY censustract_2020, dynamic_block --- maybe more intermediate stuff here. diff --git a/products/cscl/models/staging/stg__atomicpolygons.sql b/products/cscl/models/staging/stg__atomicpolygons.sql index 40b66c7d8a..a0e619489a 100644 --- a/products/cscl/models/staging/stg__atomicpolygons.sql +++ b/products/cscl/models/staging/stg__atomicpolygons.sql @@ -27,6 +27,19 @@ SELECT nullif(assemdist, ' ') AS assemdist, nullif(electdist, ' ') AS electdist, nullif(schooldist, '0') AS schooldist, + commdist, + LEFT(admin_fire_company, 1) AS fire_company_type, + RIGHT(admin_fire_company, 3) AS fire_company_number, + sb1_volume, + sb1_page, + sb2_volume, + sb2_page, + sb3_volume, + sb3_page, + water_flag, + commercial_waste_zone, + hurricane_evacuation_zone, + censustract_1990, linearize(geom) AS geom, geom AS raw_geom FROM {{ source("recipe_sources", "dcp_cscl_atomicpolygons") }} diff --git a/products/cscl/models/staging/stg__censustract2010.sql b/products/cscl/models/staging/stg__censustract2010.sql new file mode 100644 index 0000000000..791b988b34 --- /dev/null +++ b/products/cscl/models/staging/stg__censustract2010.sql @@ -0,0 +1,28 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + ctlabel, + borocode, + neighborhood_code, + ct, + boroct, + cd_eligibility, + puma, + empowerment_zone, + mcea, + created_by, + created_date, + modified_by, + modified_date, + health_area, + globalid, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_censustract2010") }} diff --git a/products/cscl/models/staging/stg__censustract2020.sql b/products/cscl/models/staging/stg__censustract2020.sql new file mode 100644 index 0000000000..31bbcac444 --- /dev/null +++ b/products/cscl/models/staging/stg__censustract2020.sql @@ -0,0 +1,29 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + ctlabel, + borocode, + neighborhood_code, + ct, + boroct, + cd_eligibility, + empowerment_zone, + mcea, + created_by, + created_date, + modified_by, + modified_date, + health_area, + globalid, + cdta_code, + puma, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_censustract2020") }} diff --git a/products/cscl/models/staging/stg__healtharea.sql b/products/cscl/models/staging/stg__healtharea.sql new file mode 100644 index 0000000000..dc25229e67 --- /dev/null +++ b/products/cscl/models/staging/stg__healtharea.sql @@ -0,0 +1,21 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + created_by, + created_date, + modified_by, + modified_date, + healtharea, + health_ct_district, + borough, + globalid, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_healtharea") }} diff --git a/products/cscl/models/staging/stg__nypdpatrolborough.sql b/products/cscl/models/staging/stg__nypdpatrolborough.sql new file mode 100644 index 0000000000..12db91d3c2 --- /dev/null +++ b/products/cscl/models/staging/stg__nypdpatrolborough.sql @@ -0,0 +1,19 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + created_by, + created_date, + modified_by, + modified_date, + globalid, + patrol_borough, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_nypdpatrolborough") }} diff --git a/products/cscl/models/staging/stg__nypdprecinct.sql b/products/cscl/models/staging/stg__nypdprecinct.sql new file mode 100644 index 0000000000..a8237853c1 --- /dev/null +++ b/products/cscl/models/staging/stg__nypdprecinct.sql @@ -0,0 +1,19 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + precinct, + globalid, + created_by, + created_date, + modified_by, + modified_date, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_nypdprecinct") }} diff --git a/products/cscl/recipe.yml b/products/cscl/recipe.yml index 701658d29b..95fd68adcd 100644 --- a/products/cscl/recipe.yml +++ b/products/cscl/recipe.yml @@ -90,6 +90,31 @@ inputs: custom: filename: ETL Working GDB.gdb.zip layer_name: NYPDBeat + - name: dcp_cscl_gdb + import_as: dcp_cscl_nypdprecinct + custom: + filename: ETL Working GDB.gdb.zip + layer_name: NYPDPrecinct + - name: dcp_cscl_gdb + import_as: dcp_cscl_nypdpatrolborough + custom: + filename: ETL Working GDB.gdb.zip + layer_name: NYPDPatrolBorough + - name: dcp_cscl_gdb + import_as: dcp_cscl_censustract2010 + custom: + filename: ETL Working GDB.gdb.zip + layer_name: CensusTract2010 + - name: dcp_cscl_gdb + import_as: dcp_cscl_censustract2020 + custom: + filename: ETL Working GDB.gdb.zip + layer_name: CensusTract2020 + - name: dcp_cscl_gdb + import_as: dcp_cscl_healtharea + custom: + filename: ETL Working GDB.gdb.zip + layer_name: HealthArea - name: dcp_cscl_gdb import_as: dcp_cscl_roadbed_pointer_list custom: From f8dac8c1c1ed38073f503420c0e2327df63ec721 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 26 Feb 2026 14:12:53 -0500 Subject: [PATCH 09/13] Fix ThinLION export models compilation - Create borough-specific _by_field models (manhattan, bronx, brooklyn, queens, statenisland) - Update export models to reference borough-specific models instead of using unsupported 'where' parameter - Add 5 new sources to sources.yml (censustract2010, censustract2020, healtharea, nypdprecinct, nypdpatrolborough) - Update _thinlion.yml to include new borough-specific models Fixes de-hlf --- products/cscl/models/product/thinlion/_thinlion.yml | 7 +++++++ .../models/product/thinlion/exports/thinlion_bronx.sql | 2 +- .../models/product/thinlion/exports/thinlion_brooklyn.sql | 2 +- .../models/product/thinlion/exports/thinlion_manhattan.sql | 2 +- .../models/product/thinlion/exports/thinlion_queens.sql | 2 +- .../product/thinlion/exports/thinlion_statenisland.sql | 2 +- .../models/product/thinlion/thinlion_bronx_by_field.sql | 3 +++ .../models/product/thinlion/thinlion_brooklyn_by_field.sql | 3 +++ .../product/thinlion/thinlion_manhattan_by_field.sql | 3 +++ .../models/product/thinlion/thinlion_queens_by_field.sql | 3 +++ .../product/thinlion/thinlion_statenisland_by_field.sql | 3 +++ products/cscl/models/sources.yml | 5 +++++ 12 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 products/cscl/models/product/thinlion/thinlion_bronx_by_field.sql create mode 100644 products/cscl/models/product/thinlion/thinlion_brooklyn_by_field.sql create mode 100644 products/cscl/models/product/thinlion/thinlion_manhattan_by_field.sql create mode 100644 products/cscl/models/product/thinlion/thinlion_queens_by_field.sql create mode 100644 products/cscl/models/product/thinlion/thinlion_statenisland_by_field.sql diff --git a/products/cscl/models/product/thinlion/_thinlion.yml b/products/cscl/models/product/thinlion/_thinlion.yml index 5abf4fd63c..811bfafcb9 100644 --- a/products/cscl/models/product/thinlion/_thinlion.yml +++ b/products/cscl/models/product/thinlion/_thinlion.yml @@ -21,6 +21,13 @@ models: - dbt_expectations.expect_column_value_lengths_to_equal: arguments: { value: 3 } +# Borough-specific filtered models +- name: thinlion_manhattan_by_field +- name: thinlion_bronx_by_field +- name: thinlion_brooklyn_by_field +- name: thinlion_queens_by_field +- name: thinlion_statenisland_by_field + # Borough export models with text output - name: thinlion_manhattan columns: &dat_column_123 diff --git a/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql b/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql index 9879a9d365..ca170c5b4f 100644 --- a/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql +++ b/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql @@ -1 +1 @@ -{{ select_rows_as_text(model='thinlion_by_field', where="borough='2'") }} +{{ select_rows_as_text(model='thinlion_bronx_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql b/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql index ea119df691..de2e103237 100644 --- a/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql +++ b/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql @@ -1 +1 @@ -{{ select_rows_as_text(model='thinlion_by_field', where="borough='3'") }} +{{ select_rows_as_text(model='thinlion_brooklyn_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql b/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql index c9bf802d82..2c863fb7a6 100644 --- a/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql +++ b/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql @@ -1 +1 @@ -{{ select_rows_as_text(model='thinlion_by_field', where="borough='1'") }} +{{ select_rows_as_text(model='thinlion_manhattan_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_queens.sql b/products/cscl/models/product/thinlion/exports/thinlion_queens.sql index af4fe95b8a..242fe3ad4e 100644 --- a/products/cscl/models/product/thinlion/exports/thinlion_queens.sql +++ b/products/cscl/models/product/thinlion/exports/thinlion_queens.sql @@ -1 +1 @@ -{{ select_rows_as_text(model='thinlion_by_field', where="borough='4'") }} +{{ select_rows_as_text(model='thinlion_queens_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql b/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql index ced79b9e86..a8e91f0966 100644 --- a/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql +++ b/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql @@ -1 +1 @@ -{{ select_rows_as_text(model='thinlion_by_field', where="borough='5'") }} +{{ select_rows_as_text(model='thinlion_statenisland_by_field') }} diff --git a/products/cscl/models/product/thinlion/thinlion_bronx_by_field.sql b/products/cscl/models/product/thinlion/thinlion_bronx_by_field.sql new file mode 100644 index 0000000000..d4c412f684 --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_bronx_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '2' diff --git a/products/cscl/models/product/thinlion/thinlion_brooklyn_by_field.sql b/products/cscl/models/product/thinlion/thinlion_brooklyn_by_field.sql new file mode 100644 index 0000000000..cd717dc6c2 --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_brooklyn_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '3' diff --git a/products/cscl/models/product/thinlion/thinlion_manhattan_by_field.sql b/products/cscl/models/product/thinlion/thinlion_manhattan_by_field.sql new file mode 100644 index 0000000000..1cee314c89 --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_manhattan_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '1' diff --git a/products/cscl/models/product/thinlion/thinlion_queens_by_field.sql b/products/cscl/models/product/thinlion/thinlion_queens_by_field.sql new file mode 100644 index 0000000000..bc427be17d --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_queens_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '4' diff --git a/products/cscl/models/product/thinlion/thinlion_statenisland_by_field.sql b/products/cscl/models/product/thinlion/thinlion_statenisland_by_field.sql new file mode 100644 index 0000000000..bae2c3085e --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_statenisland_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '5' diff --git a/products/cscl/models/sources.yml b/products/cscl/models/sources.yml index db4ba06889..385071e953 100644 --- a/products/cscl/models/sources.yml +++ b/products/cscl/models/sources.yml @@ -24,6 +24,11 @@ sources: - name: dcp_cscl_lastword - name: dcp_cscl_namedintersection - name: dcp_cscl_nypdbeat + - name: dcp_cscl_censustract2010 + - name: dcp_cscl_censustract2020 + - name: dcp_cscl_healtharea + - name: dcp_cscl_nypdprecinct + - name: dcp_cscl_nypdpatrolborough - name: dcp_cscl_roadbed_pointer_list - name: dcp_cscl_sectionalmap - name: dcp_cscl_sedat From a153410f14a60b748d3bdb0f64b1378d9800979d Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 26 Feb 2026 14:36:36 -0500 Subject: [PATCH 10/13] Fix CSV format in text_formatting__thinlion_dat seed Quote field labels containing commas (Sanborn fields 'if any' labels). This fixes 'Row 19 has 9 values, but Table only has 8 columns' error. Fixes de-713 --- .../text_formatting__thinlion_dat.csv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv b/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv index 9ca8919b40..ab806c523d 100644 --- a/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv +++ b/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv @@ -18,12 +18,12 @@ TL16,fire_company_number,Fire Company Number,3,42,44,RJZF,TRUE TL17,sanborn_borough_1,Sanborn Borough-1,1,45,45,RJZF,TRUE TL18,sanborn_volume_1,Sanborn Volume-1,3,46,48,LJSF,TRUE TL19,sanborn_page_1,Sanborn Page-1,4,49,52,LJSF,TRUE -TL20,sanborn_borough_2,Sanborn Borough-2, if any,1,53,53,RJZF,TRUE -TL21,sanborn_volume_2,Sanborn Volume-2, if any,3,54,56,LJSF,TRUE -TL22,sanborn_page_2,Sanborn Page-2, if any,4,57,60,LJSF,TRUE -TL23,sanborn_borough_3,Sanborn Borough-3, if any,1,61,61,RJZF,TRUE -TL24,sanborn_volume_3,Sanborn Volume-3, if any,3,62,64,LJSF,TRUE -TL25,sanborn_page_3,Sanborn Page-3, if any,4,65,68,LJSF,TRUE +TL20,sanborn_borough_2,"Sanborn Borough-2, if any",1,53,53,RJZF,TRUE +TL21,sanborn_volume_2,"Sanborn Volume-2, if any",3,54,56,LJSF,TRUE +TL22,sanborn_page_2,"Sanborn Page-2, if any",4,57,60,LJSF,TRUE +TL23,sanborn_borough_3,"Sanborn Borough-3, if any",1,61,61,RJZF,TRUE +TL24,sanborn_volume_3,"Sanborn Volume-3, if any",3,62,64,LJSF,TRUE +TL25,sanborn_page_3,"Sanborn Page-3, if any",4,65,68,LJSF,TRUE TL26,census_tract_2000,2000 Census Tract,6,69,74,RJZF,FALSE TL27,census_block_2000,2000 Census Block Basic,4,75,78,RJZF,FALSE TL28,census_block_suffix_2000,2000 Census Block Suffix,1,79,79,RJZF,TRUE From 15bf4b0c98451108a74ba6e45326ccaf37f63fc4 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 26 Feb 2026 15:03:19 -0500 Subject: [PATCH 11/13] Fix column name aliases in thinlion_by_field Add underscores to census/block column aliases to match seed field names: - censustract_XXXX -> census_tract_XXXX - censusblock_XXXX -> census_block_XXXX This allows apply_text_formatting_from_seed macro to find the columns. Fixes de-k9n --- .../product/thinlion/thinlion_by_field.sql | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/products/cscl/models/product/thinlion/thinlion_by_field.sql b/products/cscl/models/product/thinlion/thinlion_by_field.sql index 900788e2b0..73d6b364b1 100644 --- a/products/cscl/models/product/thinlion/thinlion_by_field.sql +++ b/products/cscl/models/product/thinlion/thinlion_by_field.sql @@ -1,11 +1,11 @@ WITH atomic_polygons_with_lookups AS ( SELECT ap.borocode AS borough, - ap.censustract_2020, + ap.censustract_2020 AS census_tract_2020, RIGHT(ap.atomicid, 3) AS dynamic_block, - ap.censusblock_2020_basic, - ap.censusblock_2020_suffix, - ap.censustract_1990, + ap.censusblock_2020_basic AS census_block_2020, + ap.censusblock_2020_suffix AS census_block_suffix_2020, + ap.censustract_1990 AS census_tract_1990, ct2010.cd_eligibility AS community_development_eligibility, ap.commdist AS community_district, ct2010.mcea AS minor_census_economic_area, @@ -25,9 +25,9 @@ WITH atomic_polygons_with_lookups AS ( ap.borocode AS sanborn_borough_3, ap.sb3_volume AS sanborn_volume_3, ap.sb3_page AS sanborn_page_3, - ap.censustract_2000, - ap.censusblock_2000_basic, - ap.censusblock_2000_suffix, + ap.censustract_2000 AS census_tract_2000, + ap.censusblock_2000_basic AS census_block_2000, + ap.censusblock_2000_suffix AS census_block_suffix_2000, ap.assemdist AS assembly_district, ap.electdist AS election_district, ap.hurricane_evacuation_zone, @@ -42,10 +42,10 @@ WITH atomic_polygons_with_lookups AS ( WHEN pb.patrol_borough = 'Queens South' THEN '8' END AS patrol_borough, beat.sector AS police_sector, - ap.censustract_2010_basic, - ap.censustract_2010_suffix, - ap.censusblock_2010_basic, - ap.censusblock_2010_suffix, + ap.censustract_2010_basic AS census_tract_2010_basic, + ap.censustract_2010_suffix AS census_tract_2010_suffix, + ap.censusblock_2010_basic AS census_block_2010, + ap.censusblock_2010_suffix AS census_block_suffix_2010, ct2020.neighborhood_code AS nta2020, ct2020.cdta_code AS cdta, ap.commercial_waste_zone AS cwz, @@ -72,5 +72,5 @@ WITH atomic_polygons_with_lookups AS ( SELECT {{ apply_text_formatting_from_seed('text_formatting__thinlion_dat') }} FROM atomic_polygons_with_lookups -ORDER BY censustract_2020, dynamic_block +ORDER BY census_tract_2020, dynamic_block From dd893349237f84dcce08f6e3e6f97becc2b825e0 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Thu, 26 Feb 2026 15:38:21 -0500 Subject: [PATCH 12/13] fix dat file name --- products/cscl/recipe.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/products/cscl/recipe.yml b/products/cscl/recipe.yml index 95fd68adcd..33cba9d1ec 100644 --- a/products/cscl/recipe.yml +++ b/products/cscl/recipe.yml @@ -281,24 +281,24 @@ exports: - name: thin_lion filename: nyc.thinlion format: dat - custom: { formatting: thinlion } + custom: { formatting: thinlion_dat } - name: bronx_thin_lion filename: BronxThinLION.txt format: dat - custom: { formatting: thinlion } + custom: { formatting: thinlion_dat } - name: brooklyn_thin_lion filename: BrooklynThinLION.txt format: dat - custom: { formatting: thinlion } + custom: { formatting: thinlion_dat } - name: manhattan_thin_lion filename: ManhattanThinLION.txt format: dat - custom: { formatting: thinlion } + custom: { formatting: thinlion_dat } - name: queens_thin_lion filename: QueensThinLION.txt format: dat - custom: { formatting: thinlion } + custom: { formatting: thinlion_dat } - name: staten_island_thin_lion filename: StatenIslandThinLION.txt format: dat - custom: { formatting: thinlion } + custom: { formatting: thinlion_dat } From 68dad671151e17443082666c28fc65ccb96cc307 Mon Sep 17 00:00:00 2001 From: Alex Richey Date: Tue, 3 Mar 2026 16:55:01 -0500 Subject: [PATCH 13/13] use st_makevalid on atomic poly geoms --- products/cscl/models/staging/stg__atomicpolygons.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/cscl/models/staging/stg__atomicpolygons.sql b/products/cscl/models/staging/stg__atomicpolygons.sql index a0e619489a..d34d4fde7b 100644 --- a/products/cscl/models/staging/stg__atomicpolygons.sql +++ b/products/cscl/models/staging/stg__atomicpolygons.sql @@ -40,6 +40,6 @@ SELECT commercial_waste_zone, hurricane_evacuation_zone, censustract_1990, - linearize(geom) AS geom, + st_makevalid(linearize(geom)) AS geom, geom AS raw_geom FROM {{ source("recipe_sources", "dcp_cscl_atomicpolygons") }}