Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/cscl_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ jobs:
./bash/build_env_setup.sh

- name: Plan build
run: dcpy lc builds plan ${{ inputs.plan_command }}
run: dcpy lifecycle builds plan ${{ inputs.plan_command }}

- name: Dataloading
run: dcpy lc builds load load --recipe-path ${{ inputs.recipe_file }}.lock.yml --cache-schema recipe_cache --cached-entity-type view
run: dcpy lifecycle builds load load --recipe-path ${{ inputs.recipe_file }}.lock.yml --cache-schema recipe_cache --cached-entity-type view

- name: Build
run: |
Expand All @@ -73,7 +73,7 @@ jobs:
dbt build --full-refresh

- name: Export
run: dcpy lc builds build export --recipe-path ${{ inputs.recipe_file }}.lock.yml
run: dcpy lifecycle builds build export --recipe-path ${{ inputs.recipe_file }}.lock.yml

- name: Validate against production outputs
# TODO - this currently relies on version hard-coded in recipe.yml
Expand Down
1 change: 0 additions & 1 deletion dcpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def cli():
app = typer.Typer()

app.add_typer(lifecycle.app, name="lifecycle")
app.add_typer(lifecycle.app, name="lc") # alias
app.add_typer(connectors.app, name="connectors")
app.add_typer(utils.app, name="utils")
app()
Expand Down
4 changes: 3 additions & 1 deletion products/cscl/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ model-paths: [ "models" ]

tests:
+store_failures: true
schema: "_tests"
+schema: "_tests"

models:
cscl:
Expand All @@ -16,6 +16,8 @@ models:
+materialized: view
product:
+materialized: table
etl_dev_qa:
+materialized: table

on-run-start:
- '{{ create_pg_functions() }}'
Expand Down
34 changes: 34 additions & 0 deletions products/cscl/models/etl_dev_qa/qa__rpl_order.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- especially concerned about diffs in the order of rows within each generic_segmentid grouping
{% set old_query %}
select
ROW_NUMBER() OVER () as row_number,
generic_segmentid || '_' || ROW_NUMBER() OVER (
PARTITION BY generic_segmentid
) AS rpl_id,
*
from production_outputs.rpl
order by row_number asc
{% endset %}

{% set new_query %}
select
ROW_NUMBER() OVER () as row_number,
lpad(rpl_id, 9, '0') as rpl_id,
generic_segmentid,
roadbed_segmentid,
roadbed_position_code
from {{ ref('rpl_by_field') }}
order by row_number asc
{% endset %}

-- {% set columns = dbt_utils.get_filtered_columns_in_relation(ref('rpl_by_field')) %}

{{
audit_helper.compare_and_classify_query_results(
old_query,
new_query,
primary_key_columns=['rpl_id'],
columns=['generic_segmentid', 'roadbed_segmentid', 'roadbed_position_code'],
sample_limit=50
)
}}
87 changes: 87 additions & 0 deletions products/cscl/models/etl_dev_qa/qa__rpl_order_diffs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
-- for RPL records rows with diffs in their ordering, get and compare their ordering and along with relevent geometries
-- rpl_id indicates the order of roadbed_segmentid rows within each generic_segmentid grouping
WITH

diff_ids AS (
SELECT DISTINCT generic_segmentid
FROM
{{ ref('qa__rpl_order') }}
WHERE
dbt_audit_row_status = 'modified'
),

focus_rpl_prod AS (
SELECT
prod_rpl.generic_segmentid || '_' || ROW_NUMBER() OVER (
PARTITION BY prod_rpl.generic_segmentid
) AS rpl_id,
prod_rpl.*
FROM
production_outputs.rpl AS prod_rpl
INNER JOIN diff_ids
ON
prod_rpl.generic_segmentid = diff_ids.generic_segmentid
),
focus_rpl AS (
SELECT dev_rpl.*
FROM
{{ ref('int__rpl') }} AS dev_rpl
INNER JOIN diff_ids
ON
LPAD(dev_rpl.generic_segmentid::TEXT, 7, '0') = diff_ids.generic_segmentid
),
focus_lion AS (
SELECT dev_lion.*
FROM
{{ ref('int__lion') }} AS dev_lion
INNER JOIN diff_ids
ON
LPAD(dev_lion.segmentid::TEXT, 7, '0') = diff_ids.generic_segmentid
),

all_segments AS (
SELECT
focus_rpl.generic_segmentid,
focus_rpl.roadbed_segmentid,
LPAD(focus_rpl.rpl_id::TEXT, 7, '0') AS rpl_id_dev,
focus_rpl_prod.rpl_id AS rpl_id_prod,
focus_rpl.roadbed_position_code,
focus_rpl.generic_segmenttype,
ST_TRANSFORM(focus_rpl.midpoint, 4326) AS midpoint,
ST_TRANSFORM(focus_rpl.geom, 4326) AS geom,
null AS midpoint_generic,
null AS geom_generic
FROM
focus_rpl
LEFT JOIN focus_rpl_prod
ON
LPAD(focus_rpl.generic_segmentid::TEXT, 7, '0') = focus_rpl_prod.generic_segmentid
AND LPAD(focus_rpl.roadbed_segmentid::TEXT, 7, '0') = focus_rpl_prod.roadbed_segmentid
UNION ALL
SELECT
segmentid AS generic_segmentid,
null AS roadbed_segmentid,
null AS rpl_id_dev,
null AS rpl_id_prod,
null AS roadbed_position_code,
null AS generic_segmenttype,
null AS midpoint,
null AS geom,
ST_TRANSFORM(midpoint, 4326) AS midpoint_generic,
ST_TRANSFORM(geom, 4326) AS geom_generic
FROM
focus_lion
),

final_cte AS (
SELECT *
FROM
all_segments
ORDER BY
generic_segmentid ASC,
rpl_id_dev ASC
)

SELECT *
FROM
final_cte
18 changes: 18 additions & 0 deletions products/cscl/models/etl_dev_qa/qa__rpl_values.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- ignoring the order of rows within each generic_segmentid grouping to focus on testing for diffs in their values
{% set old_relation = adapter.get_relation(
database = "db-cscl",
schema = "production_outputs",
identifier = "rpl"
) -%}

{% set dbt_relation = ref('rpl_by_field') %}

{%- if execute -%}
{{ audit_helper.compare_and_classify_relation_rows(
a_relation = old_relation,
b_relation = dbt_relation,
primary_key_columns=['generic_segmentid', 'roadbed_segmentid'],
columns = None,
sample_limit=50
) }}
{%- endif -%}
1 change: 1 addition & 0 deletions products/cscl/models/intermediate/int__lion.sql
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ SELECT
segments.feature_type_description,
segments.source_table,
segments.geom,
segments.midpoint,
segments.globalid,
CASE
WHEN segments.source_table = 'centerline' THEN centerline.include_in_geosupport_lion
Expand Down
7 changes: 7 additions & 0 deletions products/cscl/models/intermediate/rpl/_int__rpl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2

models:
- name: int__rpl
columns:
- name: rpl_id
tests: [ unique, not_null ]
126 changes: 126 additions & 0 deletions products/cscl/models/intermediate/rpl/int__rpl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
WITH lion AS (
SELECT DISTINCT ON (segmentid)
segmentid,
segment_type,
from_nodeid,
to_nodeid,
geom,
midpoint
FROM {{ ref("int__lion") }}
ORDER BY segmentid
),

cscl_rpl AS (SELECT * FROM {{ source("recipe_sources", "dcp_cscl_roadbed_pointer_list") }}),

generic_attributes AS (
SELECT
cscl_rpl.generic_segmentid,
lion.segment_type AS generic_segmenttype,
cscl_rpl.roadbed_segmentid,
cscl_rpl.roadbed_position_code,
'B' AS node_correspondence_indicator, -- all records are 'B' in production, but the docs say it should be node_correspondence_ind
-- cscl_rpl.node_correspondence_ind AS node_correspondence_indicator,
chr(64 + cscl_rpl.from_node_level_coincident_rb) AS from_node_level_code_of_coincident_roadbed_segment,
chr(64 + cscl_rpl.to_node_level_coincident_rb) AS to_node_level_code_of_coincident_roadbed_segment,
lion.from_nodeid AS from_nodeid_of_generic_segment,
lion.to_nodeid AS to_nodeid_of_generic_segment,
lion.geom AS generic_geom
FROM cscl_rpl
LEFT JOIN lion
ON cscl_rpl.generic_segmentid = lion.segmentid
),

roadbed_attributes AS (
SELECT
generic_attributes.*,
lion.from_nodeid AS from_nodeid_of_roadbed_segment,
lion.to_nodeid AS to_nodeid_of_roadbed_segment,
lion.geom,
lion.midpoint
FROM generic_attributes
LEFT JOIN lion
ON generic_attributes.roadbed_segmentid = lion.segmentid
),

-- Compute the cross product for every row and attach the R outermost's cross product
-- as a per-group sign reference. Comparing signs (rather than using the raw sign of
-- the cross product alone) means side determination is correct regardless of the
-- direction of the generic segment geometry.
side_reference AS (
SELECT
*,
(
(st_x(st_endpoint(generic_geom)) - st_x(st_startpoint(generic_geom)))
* (st_y(midpoint) - st_y(st_startpoint(generic_geom)))
- (st_y(st_endpoint(generic_geom)) - st_y(st_startpoint(generic_geom)))
* (st_x(midpoint) - st_x(st_startpoint(generic_geom)))
) AS cross_product,
first_value(
(st_x(st_endpoint(generic_geom)) - st_x(st_startpoint(generic_geom)))
* (st_y(midpoint) - st_y(st_startpoint(generic_geom)))
- (st_y(st_endpoint(generic_geom)) - st_y(st_startpoint(generic_geom)))
* (st_x(midpoint) - st_x(st_startpoint(generic_geom)))
) OVER (
PARTITION BY generic_segmentid
ORDER BY (roadbed_position_code = 'R') DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS r_cross_product
FROM roadbed_attributes
),

-- Ordering:
-- 1. 'R' outermost
-- 2. Right-side 'I' segments, from outermost to innermost
-- 3. 'L' outermost
-- 4. Left-side 'I' segments, from outermost to innermost
--
-- For 'I' segments, side is determined by whether the row's cross product has the
-- same sign as the R outermost's cross product for that generic segment.
-- Within a side, rows are ordered by descending perpendicular distance from the
-- generic segment (outermost first).
add_group_order_id AS (
SELECT
*,
row_number() OVER (
PARTITION BY generic_segmentid
ORDER BY
CASE
WHEN roadbed_position_code = 'R' THEN 1
WHEN
roadbed_position_code = 'I'
AND sign(cross_product) = sign(r_cross_product) THEN 2
WHEN roadbed_position_code = 'L' THEN 3
ELSE 4 -- left-side 'I' segments
END,
-- Negate so outermost (largest perpendicular distance) sorts first
CASE
WHEN roadbed_position_code = 'I'
THEN -st_distance(midpoint, generic_geom)
ELSE 0
END
) AS group_order_id
FROM side_reference
),

final AS (
SELECT
generic_segmentid || '_' || group_order_id AS rpl_id,
generic_segmentid,
generic_segmenttype,
roadbed_segmentid,
group_order_id,
roadbed_position_code,
node_correspondence_indicator,
from_node_level_code_of_coincident_roadbed_segment,
to_node_level_code_of_coincident_roadbed_segment,
from_nodeid_of_roadbed_segment,
from_nodeid_of_generic_segment,
to_nodeid_of_roadbed_segment,
to_nodeid_of_generic_segment,
midpoint,
geom
FROM add_group_order_id
ORDER BY rpl_id
)

SELECT * FROM final
65 changes: 65 additions & 0 deletions products/cscl/models/product/rpl/_rpl.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
version: 2

models:
- name: rpl_by_field
config:
contract:
enforced: true
columns:
- name: rpl_id
data_type: string
- name: generic_segmentid
data_type: string
- name: generic_segmenttype
data_type: string
- name: roadbed_segmentid
data_type: string
- name: filler_rpl3
data_type: string
- name: roadbed_position_code
data_type: string
- name: filler_rpl4
data_type: string
- name: node_correspondence_indicator
data_type: string
- name: filler_rpl5
data_type: string
- name: from_node_level_code_of_coincident_roadbed_segment
data_type: string
- name: filler_rpl6
data_type: string
- name: to_node_level_code_of_coincident_roadbed_segment
data_type: string
- name: filler_rpl7
data_type: string
- name: from_nodeid_of_roadbed_segment
data_type: string
- name: filler_rpl8
data_type: string
- name: from_nodeid_of_generic_segment
data_type: string
- name: filler_rpl9
data_type: string
- name: to_nodeid_of_roadbed_segment
data_type: string
- name: filler_rpl10
data_type: string
- name: to_nodeid_of_generic_segment
data_type: string
data_tests:
- test_name: dbt_utils.unique_combination_of_columns
arguments:
combination_of_columns:
- generic_segmentid
- roadbed_segmentid

- name: rpl
config:
contract:
enforced: true
columns:
- name: dat_column
data_type: string
tests:
- dbt_expectations.expect_column_value_lengths_to_equal:
arguments: { value: 59 }
Loading