Skip to content
Draft
50 changes: 50 additions & 0 deletions products/cscl/models/product/thinlion/_thinlion.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
version: 2

models:
- name: thinlion_by_field
columns:
- name: borough
data_type: string
tests:
- not_null
- accepted_values:
arguments:
values: [ "1", "2", "3", "4", "5" ]
- name: census_tract_2020
data_type: string
tests:
- not_null
- name: dynamic_block
data_type: string
tests:
- not_null
- dbt_expectations.expect_column_value_lengths_to_equal:
arguments: { value: 3 }

# Borough-specific filtered models
- name: thinlion_manhattan_by_field
- name: thinlion_bronx_by_field
- name: thinlion_brooklyn_by_field
- name: thinlion_queens_by_field
- name: thinlion_statenisland_by_field

# Borough export models with text output
- name: thinlion_manhattan
columns: &dat_column_123
- name: dat_column
data_type: string
tests:
- dbt_expectations.expect_column_value_lengths_to_equal:
arguments: { value: 123 }

- name: thinlion_bronx
columns: *dat_column_123

- name: thinlion_brooklyn
columns: *dat_column_123

- name: thinlion_queens
columns: *dat_column_123

- name: thinlion_statenisland
columns: *dat_column_123
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{ select_rows_as_text(model='thinlion_bronx_by_field') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{ select_rows_as_text(model='thinlion_brooklyn_by_field') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{ select_rows_as_text(model='thinlion_manhattan_by_field') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{ select_rows_as_text(model='thinlion_queens_by_field') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{ select_rows_as_text(model='thinlion_statenisland_by_field') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT *
FROM {{ ref('thinlion_by_field') }}
WHERE borough = '2'
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT *
FROM {{ ref('thinlion_by_field') }}
WHERE borough = '3'
76 changes: 76 additions & 0 deletions products/cscl/models/product/thinlion/thinlion_by_field.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
WITH atomic_polygons_with_lookups AS (
SELECT
ap.borocode AS borough,
ap.censustract_2020 AS census_tract_2020,
RIGHT(ap.atomicid, 3) AS dynamic_block,
ap.censusblock_2020_basic AS census_block_2020,
ap.censusblock_2020_suffix AS census_block_suffix_2020,
ap.censustract_1990 AS census_tract_1990,
ct2010.cd_eligibility AS community_development_eligibility,
ap.commdist AS community_district,
ct2010.mcea AS minor_census_economic_area,
ct2010.health_area,
ha.health_ct_district AS health_center_district,
NULL AS police_patrol_borough_command, -- TL12: NYPDPrecinct doesn't have this field
prec.precinct AS police_precinct,
ap.water_flag AS water_block_mapping_suppression_flag,
ap.fire_company_type,
ap.fire_company_number,
ap.borocode AS sanborn_borough_1,
ap.sb1_volume AS sanborn_volume_1,
ap.sb1_page AS sanborn_page_1,
ap.borocode AS sanborn_borough_2,
ap.sb2_volume AS sanborn_volume_2,
ap.sb2_page AS sanborn_page_2,
ap.borocode AS sanborn_borough_3,
ap.sb3_volume AS sanborn_volume_3,
ap.sb3_page AS sanborn_page_3,
ap.censustract_2000 AS census_tract_2000,
ap.censusblock_2000_basic AS census_block_2000,
ap.censusblock_2000_suffix AS census_block_suffix_2000,
ap.assemdist AS assembly_district,
ap.electdist AS election_district,
ap.hurricane_evacuation_zone,
CASE
WHEN pb.patrol_borough = 'Manhattan South' THEN '1'
WHEN pb.patrol_borough = 'Manhattan North' THEN '2'
WHEN pb.patrol_borough = 'Bronx' THEN '3'
WHEN pb.patrol_borough = 'Brooklyn South' THEN '4'
WHEN pb.patrol_borough = 'Brooklyn North' THEN '5'
WHEN pb.patrol_borough = 'Queens North' THEN '6'
WHEN pb.patrol_borough = 'Staten Island' THEN '7'
WHEN pb.patrol_borough = 'Queens South' THEN '8'
END AS patrol_borough,
beat.sector AS police_sector,
ap.censustract_2010_basic AS census_tract_2010_basic,
ap.censustract_2010_suffix AS census_tract_2010_suffix,
ap.censusblock_2010_basic AS census_block_2010,
ap.censusblock_2010_suffix AS census_block_suffix_2010,
ct2020.neighborhood_code AS nta2020,
ct2020.cdta_code AS cdta,
ap.commercial_waste_zone AS cwz,
ct2020.puma AS puma2020
FROM {{ ref("stg__atomicpolygons") }} ap
-- Join CensusTract2010 via concatenated key
LEFT JOIN {{ ref("stg__censustract2010") }} ct2010
ON ap.borocode || ap.censustract_2010 = ct2010.boroct
-- Join CensusTract2020 via concatenated key
LEFT JOIN {{ ref("stg__censustract2020") }} ct2020
ON ap.borocode || ap.censustract_2020 = ct2020.boroct
-- Join HealthArea via health_area from CensusTract2010
LEFT JOIN {{ ref("stg__healtharea") }} ha
ON ct2010.health_area = ha.healtharea
-- Spatial joins using centroid point-in-polygon
LEFT JOIN {{ ref("stg__nypdprecinct") }} prec
ON ST_Within(ST_Centroid(ap.geom), prec.geom)
LEFT JOIN {{ ref("stg__nypdpatrolborough") }} pb
ON ST_Within(ST_Centroid(ap.geom), pb.geom)
LEFT JOIN {{ ref("stg__nypdbeat") }} beat
ON ST_Within(ST_Centroid(ap.geom), beat.geom)
)

SELECT
{{ apply_text_formatting_from_seed('text_formatting__thinlion_dat') }}
FROM atomic_polygons_with_lookups
ORDER BY census_tract_2020, dynamic_block

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT *
FROM {{ ref('thinlion_by_field') }}
WHERE borough = '1'
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT *
FROM {{ ref('thinlion_by_field') }}
WHERE borough = '4'
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT *
FROM {{ ref('thinlion_by_field') }}
WHERE borough = '5'
5 changes: 5 additions & 0 deletions products/cscl/models/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ sources:
- name: dcp_cscl_lastword
- name: dcp_cscl_namedintersection
- name: dcp_cscl_nypdbeat
- name: dcp_cscl_censustract2010
- name: dcp_cscl_censustract2020
- name: dcp_cscl_healtharea
- name: dcp_cscl_nypdprecinct
- name: dcp_cscl_nypdpatrolborough
- name: dcp_cscl_roadbed_pointer_list
- name: dcp_cscl_sectionalmap
- name: dcp_cscl_sedat
Expand Down
16 changes: 15 additions & 1 deletion products/cscl/models/staging/stg__atomicpolygons.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ SELECT
borough AS borocode,
censustract_2000,
left(censustract_2000, 4)::INT AS censustract_2000_basic,
-- TODO: you might need this for thinlion outputs
nullif(right(censustract_2000, 2), '00')::INT AS censustract_2000_suffix,
censustract_2010,
left(censustract_2010, 4)::INT AS censustract_2010_basic,
Expand All @@ -26,6 +27,19 @@ SELECT
nullif(assemdist, ' ') AS assemdist,
nullif(electdist, ' ') AS electdist,
nullif(schooldist, '0') AS schooldist,
linearize(geom) AS geom,
commdist,
LEFT(admin_fire_company, 1) AS fire_company_type,
RIGHT(admin_fire_company, 3) AS fire_company_number,
sb1_volume,
sb1_page,
sb2_volume,
sb2_page,
sb3_volume,
sb3_page,
water_flag,
commercial_waste_zone,
hurricane_evacuation_zone,
censustract_1990,
st_makevalid(linearize(geom)) AS geom,
geom AS raw_geom
FROM {{ source("recipe_sources", "dcp_cscl_atomicpolygons") }}
28 changes: 28 additions & 0 deletions products/cscl/models/staging/stg__censustract2010.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{ config(
materialized = 'table',
indexes=[
{'columns': ['geom'], 'type': 'gist'},
]
) }}

SELECT
ctlabel,
borocode,
neighborhood_code,
ct,
boroct,
cd_eligibility,
puma,
empowerment_zone,
mcea,
created_by,
created_date,
modified_by,
modified_date,
health_area,
globalid,
shape_length,
shape_area,
geom AS raw_geom,
LINEARIZE(geom) AS geom
FROM {{ source("recipe_sources", "dcp_cscl_censustract2010") }}
29 changes: 29 additions & 0 deletions products/cscl/models/staging/stg__censustract2020.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{ config(
materialized = 'table',
indexes=[
{'columns': ['geom'], 'type': 'gist'},
]
) }}

SELECT
ctlabel,
borocode,
neighborhood_code,
ct,
boroct,
cd_eligibility,
empowerment_zone,
mcea,
created_by,
created_date,
modified_by,
modified_date,
health_area,
globalid,
cdta_code,
puma,
shape_length,
shape_area,
geom AS raw_geom,
LINEARIZE(geom) AS geom
FROM {{ source("recipe_sources", "dcp_cscl_censustract2020") }}
21 changes: 21 additions & 0 deletions products/cscl/models/staging/stg__healtharea.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{ config(
materialized = 'table',
indexes=[
{'columns': ['geom'], 'type': 'gist'},
]
) }}

SELECT
created_by,
created_date,
modified_by,
modified_date,
healtharea,
health_ct_district,
borough,
globalid,
shape_length,
shape_area,
geom AS raw_geom,
LINEARIZE(geom) AS geom
FROM {{ source("recipe_sources", "dcp_cscl_healtharea") }}
19 changes: 19 additions & 0 deletions products/cscl/models/staging/stg__nypdpatrolborough.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{ config(
materialized = 'table',
indexes=[
{'columns': ['geom'], 'type': 'gist'},
]
) }}

SELECT
created_by,
created_date,
modified_by,
modified_date,
globalid,
patrol_borough,
shape_length,
shape_area,
geom AS raw_geom,
LINEARIZE(geom) AS geom
FROM {{ source("recipe_sources", "dcp_cscl_nypdpatrolborough") }}
19 changes: 19 additions & 0 deletions products/cscl/models/staging/stg__nypdprecinct.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{ config(
materialized = 'table',
indexes=[
{'columns': ['geom'], 'type': 'gist'},
]
) }}

SELECT
precinct,
globalid,
created_by,
created_date,
modified_by,
modified_date,
shape_length,
shape_area,
geom AS raw_geom,
LINEARIZE(geom) AS geom
FROM {{ source("recipe_sources", "dcp_cscl_nypdprecinct") }}
16 changes: 9 additions & 7 deletions products/cscl/poc_validation/prod_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,20 +156,22 @@ def _load(
"""
Primary purpose is to load production outputs for comparison to outputs of this pipeline
"""
if not version or local:
if not (version or local):
raise Exception(
"Either specify loading locally with '-l' flag or specify version to pull from s3 with '-v' flag"
)

for dataset in datasets:
file_name = datasets_by_name[dataset].file_name
s3.download_file(
"edm-private", f"cscl_etl/{version}/{file_name}", local_folder / file_name
)
if not local:
for dataset in datasets:
file_name = datasets_by_name[dataset].file_name
s3.download_file(
"edm-private", f"cscl_etl/{version}/{file_name}", local_folder / file_name
)

load_datasets(datasets, local_folder)

boro_level_files = {"lion_dat", "face_code"}
boro_level_files = {"lion", "face_code"}
# bookmark
for file in boro_level_files:
if any(dataset.endswith(f"_{file}") for dataset in datasets):
create_citywide_table(file)
Expand Down
Loading
Loading