diff --git a/products/cscl/models/product/thinlion/_thinlion.yml b/products/cscl/models/product/thinlion/_thinlion.yml new file mode 100644 index 0000000000..811bfafcb9 --- /dev/null +++ b/products/cscl/models/product/thinlion/_thinlion.yml @@ -0,0 +1,50 @@ +version: 2 + +models: +- name: thinlion_by_field + columns: + - name: borough + data_type: string + tests: + - not_null + - accepted_values: + arguments: + values: [ "1", "2", "3", "4", "5" ] + - name: census_tract_2020 + data_type: string + tests: + - not_null + - name: dynamic_block + data_type: string + tests: + - not_null + - dbt_expectations.expect_column_value_lengths_to_equal: + arguments: { value: 3 } + +# Borough-specific filtered models +- name: thinlion_manhattan_by_field +- name: thinlion_bronx_by_field +- name: thinlion_brooklyn_by_field +- name: thinlion_queens_by_field +- name: thinlion_statenisland_by_field + +# Borough export models with text output +- name: thinlion_manhattan + columns: &dat_column_123 + - name: dat_column + data_type: string + tests: + - dbt_expectations.expect_column_value_lengths_to_equal: + arguments: { value: 123 } + +- name: thinlion_bronx + columns: *dat_column_123 + +- name: thinlion_brooklyn + columns: *dat_column_123 + +- name: thinlion_queens + columns: *dat_column_123 + +- name: thinlion_statenisland + columns: *dat_column_123 diff --git a/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql b/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql new file mode 100644 index 0000000000..ca170c5b4f --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_bronx.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_bronx_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql b/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql new file mode 100644 index 0000000000..de2e103237 --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_brooklyn.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_brooklyn_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql b/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql new file mode 100644 index 0000000000..2c863fb7a6 --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_manhattan.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_manhattan_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_queens.sql b/products/cscl/models/product/thinlion/exports/thinlion_queens.sql new file mode 100644 index 0000000000..242fe3ad4e --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_queens.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_queens_by_field') }} diff --git a/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql b/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql new file mode 100644 index 0000000000..a8e91f0966 --- /dev/null +++ b/products/cscl/models/product/thinlion/exports/thinlion_statenisland.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='thinlion_statenisland_by_field') }} diff --git a/products/cscl/models/product/thinlion/thinlion_bronx_by_field.sql b/products/cscl/models/product/thinlion/thinlion_bronx_by_field.sql new file mode 100644 index 0000000000..d4c412f684 --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_bronx_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '2' diff --git a/products/cscl/models/product/thinlion/thinlion_brooklyn_by_field.sql b/products/cscl/models/product/thinlion/thinlion_brooklyn_by_field.sql new file mode 100644 index 0000000000..cd717dc6c2 --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_brooklyn_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '3' diff --git a/products/cscl/models/product/thinlion/thinlion_by_field.sql b/products/cscl/models/product/thinlion/thinlion_by_field.sql new file mode 100644 index 0000000000..73d6b364b1 --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_by_field.sql @@ -0,0 +1,76 @@ +WITH atomic_polygons_with_lookups AS ( + SELECT + ap.borocode AS borough, + ap.censustract_2020 AS census_tract_2020, + RIGHT(ap.atomicid, 3) AS dynamic_block, + ap.censusblock_2020_basic AS census_block_2020, + ap.censusblock_2020_suffix AS census_block_suffix_2020, + ap.censustract_1990 AS census_tract_1990, + ct2010.cd_eligibility AS community_development_eligibility, + ap.commdist AS community_district, + ct2010.mcea AS minor_census_economic_area, + ct2010.health_area, + ha.health_ct_district AS health_center_district, + NULL AS police_patrol_borough_command, -- TL12: NYPDPrecinct doesn't have this field + prec.precinct AS police_precinct, + ap.water_flag AS water_block_mapping_suppression_flag, + ap.fire_company_type, + ap.fire_company_number, + ap.borocode AS sanborn_borough_1, + ap.sb1_volume AS sanborn_volume_1, + ap.sb1_page AS sanborn_page_1, + ap.borocode AS sanborn_borough_2, + ap.sb2_volume AS sanborn_volume_2, + ap.sb2_page AS sanborn_page_2, + ap.borocode AS sanborn_borough_3, + ap.sb3_volume AS sanborn_volume_3, + ap.sb3_page AS sanborn_page_3, + ap.censustract_2000 AS census_tract_2000, + ap.censusblock_2000_basic AS census_block_2000, + ap.censusblock_2000_suffix AS census_block_suffix_2000, + ap.assemdist AS assembly_district, + ap.electdist AS election_district, + ap.hurricane_evacuation_zone, + CASE + WHEN pb.patrol_borough = 'Manhattan South' THEN '1' + WHEN pb.patrol_borough = 'Manhattan North' THEN '2' + WHEN pb.patrol_borough = 'Bronx' THEN '3' + WHEN pb.patrol_borough = 'Brooklyn South' THEN '4' + WHEN pb.patrol_borough = 'Brooklyn North' THEN '5' + WHEN pb.patrol_borough = 'Queens North' THEN '6' + WHEN pb.patrol_borough = 'Staten Island' THEN '7' + WHEN pb.patrol_borough = 'Queens South' THEN '8' + END AS patrol_borough, + beat.sector AS police_sector, + ap.censustract_2010_basic AS census_tract_2010_basic, + ap.censustract_2010_suffix AS census_tract_2010_suffix, + ap.censusblock_2010_basic AS census_block_2010, + ap.censusblock_2010_suffix AS census_block_suffix_2010, + ct2020.neighborhood_code AS nta2020, + ct2020.cdta_code AS cdta, + ap.commercial_waste_zone AS cwz, + ct2020.puma AS puma2020 + FROM {{ ref("stg__atomicpolygons") }} ap + -- Join CensusTract2010 via concatenated key + LEFT JOIN {{ ref("stg__censustract2010") }} ct2010 + ON ap.borocode || ap.censustract_2010 = ct2010.boroct + -- Join CensusTract2020 via concatenated key + LEFT JOIN {{ ref("stg__censustract2020") }} ct2020 + ON ap.borocode || ap.censustract_2020 = ct2020.boroct + -- Join HealthArea via health_area from CensusTract2010 + LEFT JOIN {{ ref("stg__healtharea") }} ha + ON ct2010.health_area = ha.healtharea + -- Spatial joins using centroid point-in-polygon + LEFT JOIN {{ ref("stg__nypdprecinct") }} prec + ON ST_Within(ST_Centroid(ap.geom), prec.geom) + LEFT JOIN {{ ref("stg__nypdpatrolborough") }} pb + ON ST_Within(ST_Centroid(ap.geom), pb.geom) + LEFT JOIN {{ ref("stg__nypdbeat") }} beat + ON ST_Within(ST_Centroid(ap.geom), beat.geom) +) + +SELECT + {{ apply_text_formatting_from_seed('text_formatting__thinlion_dat') }} +FROM atomic_polygons_with_lookups +ORDER BY census_tract_2020, dynamic_block + diff --git a/products/cscl/models/product/thinlion/thinlion_manhattan_by_field.sql b/products/cscl/models/product/thinlion/thinlion_manhattan_by_field.sql new file mode 100644 index 0000000000..1cee314c89 --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_manhattan_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '1' diff --git a/products/cscl/models/product/thinlion/thinlion_queens_by_field.sql b/products/cscl/models/product/thinlion/thinlion_queens_by_field.sql new file mode 100644 index 0000000000..bc427be17d --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_queens_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '4' diff --git a/products/cscl/models/product/thinlion/thinlion_statenisland_by_field.sql b/products/cscl/models/product/thinlion/thinlion_statenisland_by_field.sql new file mode 100644 index 0000000000..bae2c3085e --- /dev/null +++ b/products/cscl/models/product/thinlion/thinlion_statenisland_by_field.sql @@ -0,0 +1,3 @@ +SELECT * +FROM {{ ref('thinlion_by_field') }} +WHERE borough = '5' diff --git a/products/cscl/models/sources.yml b/products/cscl/models/sources.yml index db4ba06889..385071e953 100644 --- a/products/cscl/models/sources.yml +++ b/products/cscl/models/sources.yml @@ -24,6 +24,11 @@ sources: - name: dcp_cscl_lastword - name: dcp_cscl_namedintersection - name: dcp_cscl_nypdbeat + - name: dcp_cscl_censustract2010 + - name: dcp_cscl_censustract2020 + - name: dcp_cscl_healtharea + - name: dcp_cscl_nypdprecinct + - name: dcp_cscl_nypdpatrolborough - name: dcp_cscl_roadbed_pointer_list - name: dcp_cscl_sectionalmap - name: dcp_cscl_sedat diff --git a/products/cscl/models/staging/stg__atomicpolygons.sql b/products/cscl/models/staging/stg__atomicpolygons.sql index 29f032a587..d34d4fde7b 100644 --- a/products/cscl/models/staging/stg__atomicpolygons.sql +++ b/products/cscl/models/staging/stg__atomicpolygons.sql @@ -10,6 +10,7 @@ SELECT borough AS borocode, censustract_2000, left(censustract_2000, 4)::INT AS censustract_2000_basic, + -- TODO: you might need this for thinlion outputs nullif(right(censustract_2000, 2), '00')::INT AS censustract_2000_suffix, censustract_2010, left(censustract_2010, 4)::INT AS censustract_2010_basic, @@ -26,6 +27,19 @@ SELECT nullif(assemdist, ' ') AS assemdist, nullif(electdist, ' ') AS electdist, nullif(schooldist, '0') AS schooldist, - linearize(geom) AS geom, + commdist, + LEFT(admin_fire_company, 1) AS fire_company_type, + RIGHT(admin_fire_company, 3) AS fire_company_number, + sb1_volume, + sb1_page, + sb2_volume, + sb2_page, + sb3_volume, + sb3_page, + water_flag, + commercial_waste_zone, + hurricane_evacuation_zone, + censustract_1990, + st_makevalid(linearize(geom)) AS geom, geom AS raw_geom FROM {{ source("recipe_sources", "dcp_cscl_atomicpolygons") }} diff --git a/products/cscl/models/staging/stg__censustract2010.sql b/products/cscl/models/staging/stg__censustract2010.sql new file mode 100644 index 0000000000..791b988b34 --- /dev/null +++ b/products/cscl/models/staging/stg__censustract2010.sql @@ -0,0 +1,28 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + ctlabel, + borocode, + neighborhood_code, + ct, + boroct, + cd_eligibility, + puma, + empowerment_zone, + mcea, + created_by, + created_date, + modified_by, + modified_date, + health_area, + globalid, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_censustract2010") }} diff --git a/products/cscl/models/staging/stg__censustract2020.sql b/products/cscl/models/staging/stg__censustract2020.sql new file mode 100644 index 0000000000..31bbcac444 --- /dev/null +++ b/products/cscl/models/staging/stg__censustract2020.sql @@ -0,0 +1,29 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + ctlabel, + borocode, + neighborhood_code, + ct, + boroct, + cd_eligibility, + empowerment_zone, + mcea, + created_by, + created_date, + modified_by, + modified_date, + health_area, + globalid, + cdta_code, + puma, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_censustract2020") }} diff --git a/products/cscl/models/staging/stg__healtharea.sql b/products/cscl/models/staging/stg__healtharea.sql new file mode 100644 index 0000000000..dc25229e67 --- /dev/null +++ b/products/cscl/models/staging/stg__healtharea.sql @@ -0,0 +1,21 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + created_by, + created_date, + modified_by, + modified_date, + healtharea, + health_ct_district, + borough, + globalid, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_healtharea") }} diff --git a/products/cscl/models/staging/stg__nypdpatrolborough.sql b/products/cscl/models/staging/stg__nypdpatrolborough.sql new file mode 100644 index 0000000000..12db91d3c2 --- /dev/null +++ b/products/cscl/models/staging/stg__nypdpatrolborough.sql @@ -0,0 +1,19 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + created_by, + created_date, + modified_by, + modified_date, + globalid, + patrol_borough, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_nypdpatrolborough") }} diff --git a/products/cscl/models/staging/stg__nypdprecinct.sql b/products/cscl/models/staging/stg__nypdprecinct.sql new file mode 100644 index 0000000000..a8237853c1 --- /dev/null +++ b/products/cscl/models/staging/stg__nypdprecinct.sql @@ -0,0 +1,19 @@ +{{ config( + materialized = 'table', + indexes=[ + {'columns': ['geom'], 'type': 'gist'}, + ] +) }} + +SELECT + precinct, + globalid, + created_by, + created_date, + modified_by, + modified_date, + shape_length, + shape_area, + geom AS raw_geom, + LINEARIZE(geom) AS geom +FROM {{ source("recipe_sources", "dcp_cscl_nypdprecinct") }} diff --git a/products/cscl/poc_validation/prod_data_loader.py b/products/cscl/poc_validation/prod_data_loader.py index 64fccda0c7..4e49c64f45 100644 --- a/products/cscl/poc_validation/prod_data_loader.py +++ b/products/cscl/poc_validation/prod_data_loader.py @@ -156,20 +156,22 @@ def _load( """ Primary purpose is to load production outputs for comparison to outputs of this pipeline """ - if not version or local: + if not (version or local): raise Exception( "Either specify loading locally with '-l' flag or specify version to pull from s3 with '-v' flag" ) - for dataset in datasets: - file_name = datasets_by_name[dataset].file_name - s3.download_file( - "edm-private", f"cscl_etl/{version}/{file_name}", local_folder / file_name - ) + if not local: + for dataset in datasets: + file_name = datasets_by_name[dataset].file_name + s3.download_file( + "edm-private", f"cscl_etl/{version}/{file_name}", local_folder / file_name + ) load_datasets(datasets, local_folder) - boro_level_files = {"lion_dat", "face_code"} + boro_level_files = {"lion", "face_code"} + # bookmark for file in boro_level_files: if any(dataset.endswith(f"_{file}") for dataset in datasets): create_citywide_table(file) diff --git a/products/cscl/recipe.yml b/products/cscl/recipe.yml index f4d2bf5bf4..33cba9d1ec 100644 --- a/products/cscl/recipe.yml +++ b/products/cscl/recipe.yml @@ -1,6 +1,6 @@ name: CSCL product: cscl -version: 25d +version: 26a inputs: dataset_defaults: @@ -90,6 +90,31 @@ inputs: custom: filename: ETL Working GDB.gdb.zip layer_name: NYPDBeat + - name: dcp_cscl_gdb + import_as: dcp_cscl_nypdprecinct + custom: + filename: ETL Working GDB.gdb.zip + layer_name: NYPDPrecinct + - name: dcp_cscl_gdb + import_as: dcp_cscl_nypdpatrolborough + custom: + filename: ETL Working GDB.gdb.zip + layer_name: NYPDPatrolBorough + - name: dcp_cscl_gdb + import_as: dcp_cscl_censustract2010 + custom: + filename: ETL Working GDB.gdb.zip + layer_name: CensusTract2010 + - name: dcp_cscl_gdb + import_as: dcp_cscl_censustract2020 + custom: + filename: ETL Working GDB.gdb.zip + layer_name: CensusTract2020 + - name: dcp_cscl_gdb + import_as: dcp_cscl_healtharea + custom: + filename: ETL Working GDB.gdb.zip + layer_name: HealthArea - name: dcp_cscl_gdb import_as: dcp_cscl_roadbed_pointer_list custom: @@ -251,3 +276,29 @@ exports: filename: StatenIslandFace.txt format: dat custom: { formatting: face_code } + + # Thin LION + - name: thin_lion + filename: nyc.thinlion + format: dat + custom: { formatting: thinlion_dat } + - name: bronx_thin_lion + filename: BronxThinLION.txt + format: dat + custom: { formatting: thinlion_dat } + - name: brooklyn_thin_lion + filename: BrooklynThinLION.txt + format: dat + custom: { formatting: thinlion_dat } + - name: manhattan_thin_lion + filename: ManhattanThinLION.txt + format: dat + custom: { formatting: thinlion_dat } + - name: queens_thin_lion + filename: QueensThinLION.txt + format: dat + custom: { formatting: thinlion_dat } + - name: staten_island_thin_lion + filename: StatenIslandThinLION.txt + format: dat + custom: { formatting: thinlion_dat } diff --git a/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv b/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv new file mode 100644 index 0000000000..ab806c523d --- /dev/null +++ b/products/cscl/seeds/text_formatting/text_formatting__thinlion_dat.csv @@ -0,0 +1,42 @@ +field_number,field_name,field_label,field_length,start_index,end_index,justify_and_fill,blank_if_none +TL1,borough,Borough,1,1,1,RJZF,FALSE +TL2,census_tract_2020,2020 Census Tract,6,2,7,RJZF,FALSE +TL3,dynamic_block,Dynamic Block,3,8,10,RJZF,FALSE +TL4,census_block_2020,2020 Census Block Basic,4,11,14,RJZF,FALSE +TL5,census_block_suffix_2020,2020 Census Block Suffix,1,15,15,RJZF,TRUE +TL6,census_tract_1990,1990 Census Tract,6,16,21,RJZF,FALSE +TL7,community_development_eligibility,Community Development Eligibility,1,22,22,RJZF,FALSE +TL8,community_district,Community District,3,23,25,RJZF,FALSE +TL9,minor_census_economic_area,Minor Census Economic Area,4,26,29,RJZF,FALSE +TL10,health_area,Health Area,4,30,33,RJZF,FALSE +TL11,health_center_district,Health Center District,2,34,35,RJZF,FALSE +TL12,police_patrol_borough_command,Police Patrol Borough Command,1,36,36,RJZF,FALSE +TL13,police_precinct,Police Precinct,3,37,39,RJZF,FALSE +TL14,water_block_mapping_suppression_flag,Water Block Mapping Suppression Flag,1,40,40,RJZF,FALSE +TL15,fire_company_type,Fire Company Type,1,41,41,RJZF,TRUE +TL16,fire_company_number,Fire Company Number,3,42,44,RJZF,TRUE +TL17,sanborn_borough_1,Sanborn Borough-1,1,45,45,RJZF,TRUE +TL18,sanborn_volume_1,Sanborn Volume-1,3,46,48,LJSF,TRUE +TL19,sanborn_page_1,Sanborn Page-1,4,49,52,LJSF,TRUE +TL20,sanborn_borough_2,"Sanborn Borough-2, if any",1,53,53,RJZF,TRUE +TL21,sanborn_volume_2,"Sanborn Volume-2, if any",3,54,56,LJSF,TRUE +TL22,sanborn_page_2,"Sanborn Page-2, if any",4,57,60,LJSF,TRUE +TL23,sanborn_borough_3,"Sanborn Borough-3, if any",1,61,61,RJZF,TRUE +TL24,sanborn_volume_3,"Sanborn Volume-3, if any",3,62,64,LJSF,TRUE +TL25,sanborn_page_3,"Sanborn Page-3, if any",4,65,68,LJSF,TRUE +TL26,census_tract_2000,2000 Census Tract,6,69,74,RJZF,FALSE +TL27,census_block_2000,2000 Census Block Basic,4,75,78,RJZF,FALSE +TL28,census_block_suffix_2000,2000 Census Block Suffix,1,79,79,RJZF,TRUE +TL29,assembly_district,Assembly District,2,81,82,RJZF,TRUE +TL30,election_district,Election District,3,84,86,RJZF,FALSE +TL31,hurricane_evacuation_zone,Hurricane Evacuation Zone,2,87,88,LJSF,FALSE +TL32,patrol_borough,Patrol Borough,2,89,90,LJSF,FALSE +TL33,police_sector,Police Sector,4,91,94,RJSF,FALSE +TL34,census_tract_2010_basic,2010 Census Tract Basic,4,95,98,RJSF,FALSE +TL34_1,census_tract_2010_suffix,2010 Census Tract Suffix,2,99,100,RJZF,TRUE +TL35,census_block_2010,2010 Census Block,4,101,104,RJZF,FALSE +TL36,census_block_suffix_2010,2010 Census Block Suffix,1,105,105,RJZF,TRUE +TL37,nta2020,NTA2020,6,106,111,RJZF,TRUE +TL38,cdta,CDTA,4,112,115,RJZF,TRUE +TL39,cwz,CWZ,4,116,119,RJSF,TRUE +TL40,puma2020,PUMA2020,4,120,123,RJZF,TRUE