From ca3bf4aeffe30453ccbdb33d652a93d7cfb3579d Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Wed, 4 Mar 2026 16:32:22 -0500 Subject: [PATCH 1/6] drop dcpy.lifecycle cli alias --- .github/workflows/cscl_build.yml | 6 +++--- dcpy/__main__.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cscl_build.yml b/.github/workflows/cscl_build.yml index a2ad118d2b..5ec2354b71 100644 --- a/.github/workflows/cscl_build.yml +++ b/.github/workflows/cscl_build.yml @@ -59,10 +59,10 @@ jobs: ./bash/build_env_setup.sh - name: Plan build - run: dcpy lc builds plan ${{ inputs.plan_command }} + run: dcpy lifecycle builds plan ${{ inputs.plan_command }} - name: Dataloading - run: dcpy lc builds load load --recipe-path ${{ inputs.recipe_file }}.lock.yml --cache-schema recipe_cache --cached-entity-type view + run: dcpy lifecycle builds load load --recipe-path ${{ inputs.recipe_file }}.lock.yml --cache-schema recipe_cache --cached-entity-type view - name: Build run: | @@ -73,7 +73,7 @@ jobs: dbt build --full-refresh - name: Export - run: dcpy lc builds build export --recipe-path ${{ inputs.recipe_file }}.lock.yml + run: dcpy lifecycle builds build export --recipe-path ${{ inputs.recipe_file }}.lock.yml - name: Validate against production outputs # TODO - this currently relies on version hard-coded in recipe.yml diff --git a/dcpy/__main__.py b/dcpy/__main__.py index ce1f1e9c3b..328f629b54 100644 --- a/dcpy/__main__.py +++ b/dcpy/__main__.py @@ -13,7 +13,6 @@ def cli(): app = typer.Typer() app.add_typer(lifecycle.app, name="lifecycle") - app.add_typer(lifecycle.app, name="lc") # alias app.add_typer(connectors.app, name="connectors") app.add_typer(utils.app, name="utils") app() From 19081032a0cfb0fce2ac77456c8958950ae9b72e Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 3 Mar 2026 16:31:32 -0500 Subject: [PATCH 2/6] fix deprecation warnings --- products/cscl/dbt_project.yml | 2 +- products/cscl/models/staging/_stg.yml | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/products/cscl/dbt_project.yml b/products/cscl/dbt_project.yml index 7bafdd6f25..4dff46b500 100644 --- a/products/cscl/dbt_project.yml +++ b/products/cscl/dbt_project.yml @@ -6,7 +6,7 @@ model-paths: [ "models" ] tests: +store_failures: true - schema: "_tests" + +schema: "_tests" models: cscl: diff --git a/products/cscl/models/staging/_stg.yml b/products/cscl/models/staging/_stg.yml index fbbd054d1a..290fdeb4f0 100644 --- a/products/cscl/models/staging/_stg.yml +++ b/products/cscl/models/staging/_stg.yml @@ -33,8 +33,9 @@ models: - name: b7sc tests: - unique: - config: { where: "b7sc IS NOT NULL" } - error_if: "> 1" + arguments: + config: { where: "b7sc IS NOT NULL" } + error_if: "> 1" - name: lookup_key - name: face_code From 5ef8b22df98e42abf34615f07e62adb41d096c6b Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Sat, 7 Mar 2026 23:37:00 -0500 Subject: [PATCH 3/6] use gitignored paths for data files --- products/cscl/poc_validation/prod_data_loader.py | 2 +- products/cscl/poc_validation/validate_outputs.sh | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/products/cscl/poc_validation/prod_data_loader.py b/products/cscl/poc_validation/prod_data_loader.py index b630ecd6b6..c36107d0ed 100644 --- a/products/cscl/poc_validation/prod_data_loader.py +++ b/products/cscl/poc_validation/prod_data_loader.py @@ -16,7 +16,7 @@ from dcpy.utils import postgres, s3 CLIENT = postgres.PostgresClient(database="db-cscl", schema="production_outputs") -LOAD_FOLDER = Path("prod") +LOAD_FOLDER = Path(".data/prod") version: str | None = None datasets_by_name = {} diff --git a/products/cscl/poc_validation/validate_outputs.sh b/products/cscl/poc_validation/validate_outputs.sh index 28b51095d3..d321a1ee14 100755 --- a/products/cscl/poc_validation/validate_outputs.sh +++ b/products/cscl/poc_validation/validate_outputs.sh @@ -2,14 +2,14 @@ # Expects two folders in current directory # output - contains outputs of build -# prod - contains "production" 25a (or whatever version) for comparison -mkdir validation_output +# .data/prod - contains "production" 25a (or whatever version) for comparison +mkdir output/validation_output total_records=0 total_mismatched=0 for filepath in output/*; do file=$(basename "$filepath") - if [[ "$file" =~ "zip" ]]; then + if [[ "$file" =~ "zip" ]] || [[ -d "$filepath" ]]; then continue fi echo "Validating $file" @@ -17,7 +17,7 @@ for filepath in output/*; do n_records="$(cat output/$file | wc -l | awk '{print $1}')" echo "Total records: $n_records" total_records=$(($total_records + $n_records)) - mismatched_rows=$(comm -23 <(sort output/$file) <(sort prod/$file)) + mismatched_rows=$(comm -23 <(sort output/$file) <(sort .data/prod/$file)) if [ -z "$mismatched_rows" ]; then n_mismatched=0 @@ -27,7 +27,7 @@ for filepath in output/*; do echo "Mismatched records: $n_mismatched" total_mismatched=$(($total_mismatched + $n_mismatched)) - echo -e "$mismatched_rows" > validation_output/$file + echo -e "$mismatched_rows" > output/validation_output/$file echo "" done From 7a63150654f032d258c1a404cd18e5851394ddf7 Mon Sep 17 00:00:00 2001 From: Finn van Krieken Date: Thu, 19 Feb 2026 16:44:32 -0500 Subject: [PATCH 4/6] add rpl formatting seed and recipe entry --- .../cscl/poc_validation/prod_data_loader.py | 2 ++ products/cscl/recipe.yml | 6 ++++++ .../text_formatting/text_formatting__rpl.csv | 20 +++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 products/cscl/seeds/text_formatting/text_formatting__rpl.csv diff --git a/products/cscl/poc_validation/prod_data_loader.py b/products/cscl/poc_validation/prod_data_loader.py index c36107d0ed..1c4fa48ae3 100644 --- a/products/cscl/poc_validation/prod_data_loader.py +++ b/products/cscl/poc_validation/prod_data_loader.py @@ -3,6 +3,8 @@ and loading the result into a postgres table This is done ad-hoc and not on an operational basis + +It assumes that production outputs are specified as exports in recipe.yml """ from dataclasses import dataclass diff --git a/products/cscl/recipe.yml b/products/cscl/recipe.yml index 6333c7bcc8..3598d09ed1 100644 --- a/products/cscl/recipe.yml +++ b/products/cscl/recipe.yml @@ -251,3 +251,9 @@ exports: filename: StatenIslandFace.txt format: dat custom: { formatting: face_code } + + # Other + - name: rpl + filename: RPL.txt + format: dat + custom: { formatting: rpl } diff --git a/products/cscl/seeds/text_formatting/text_formatting__rpl.csv b/products/cscl/seeds/text_formatting/text_formatting__rpl.csv new file mode 100644 index 0000000000..e1f13b7558 --- /dev/null +++ b/products/cscl/seeds/text_formatting/text_formatting__rpl.csv @@ -0,0 +1,20 @@ +fic,field_name,field_label,field_length,start_index,end_index,justify_and_fill,blank_if_none +RPL1,generic_segmentid,Generic SEGMENTID,7,1,7,RJZF,FALSE +RPL2,generic_segmenttype,Segment Type of Generic Segment,1,8,8,RJSF,FALSE +RPL3,roadbed_segmentid,Roadbed SEGMENTID,7,9,15,RJZF,FALSE +,filler_rpl3,Filler,1,16,16,RJSF,FALSE +RPL4,roadbed_position_code,Roadbed Position Code,1,17,17,RJSF,FALSE +,filler_rpl4,Filler,1,18,18,RJSF,FALSE +RPL5,node_correspondence_indicator,Node Correspondence Indicator,1,19,19,RJSF,FALSE +,filler_rpl5,Filler,3,20,22,RJSF,FALSE +RPL6,from_node_level_code_of_coincident_roadbed_segment,From Node Level Code of Coincident Roadbed Segment (if any),1,23,23,RJSF,FALSE +,filler_rpl6,Filler,3,24,26,RJSF,FALSE +RPL7,to_node_level_code_of_coincident_roadbed_segment,To Node Level Code of Coincident Roadbed Segment (if any),1,27,27,RJSF,FALSE +,filler_rpl7,Filler,1,28,28,RJSF,FALSE +RPL8,from_nodeid_of_roadbed_segment,From NODEID of Roadbed Segment,7,29,35,RJZF,FALSE +,filler_rpl8,Filler,1,36,36,RJSF,FALSE +RPL9,from_nodeid_of_generic_segment,From NODEID of Generic Segment,7,37,43,RJZF,FALSE +,filler_rpl9,Filler,1,44,44,RJSF,FALSE +RPL10,to_nodeid_of_roadbed_segment,To NODEID of Roadbed Segment,7,45,51,RJZF,FALSE +,filler_rpl10,Filler,1,52,52,RJSF,FALSE +RPL11,to_nodeid_of_generic_segment,To NODEID of Generic Segment,7,53,59,RJZF,FALSE From 9ebbcd5cdb05464473c3deff611321b6a6538167 Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Tue, 3 Mar 2026 17:03:59 -0500 Subject: [PATCH 5/6] add rpl models --- .../cscl/models/intermediate/int__lion.sql | 1 + .../models/intermediate/rpl/_int__rpl.yml | 7 + .../cscl/models/intermediate/rpl/int__rpl.sql | 126 ++++++++++++++++++ products/cscl/models/product/rpl/_rpl.yml | 63 +++++++++ products/cscl/models/product/rpl/rpl.sql | 1 + .../cscl/models/product/rpl/rpl_by_field.sql | 3 + 6 files changed, 201 insertions(+) create mode 100644 products/cscl/models/intermediate/rpl/_int__rpl.yml create mode 100644 products/cscl/models/intermediate/rpl/int__rpl.sql create mode 100644 products/cscl/models/product/rpl/_rpl.yml create mode 100644 products/cscl/models/product/rpl/rpl.sql create mode 100644 products/cscl/models/product/rpl/rpl_by_field.sql diff --git a/products/cscl/models/intermediate/int__lion.sql b/products/cscl/models/intermediate/int__lion.sql index 3b7a17e95c..eee92b69b4 100644 --- a/products/cscl/models/intermediate/int__lion.sql +++ b/products/cscl/models/intermediate/int__lion.sql @@ -255,6 +255,7 @@ SELECT segments.feature_type_description, segments.source_table, segments.geom, + segments.midpoint, segments.globalid, CASE WHEN segments.source_table = 'centerline' THEN centerline.include_in_geosupport_lion diff --git a/products/cscl/models/intermediate/rpl/_int__rpl.yml b/products/cscl/models/intermediate/rpl/_int__rpl.yml new file mode 100644 index 0000000000..d64e89daaa --- /dev/null +++ b/products/cscl/models/intermediate/rpl/_int__rpl.yml @@ -0,0 +1,7 @@ +version: 2 + +models: +- name: int__rpl + columns: + - name: rpl_id + tests: [ unique, not_null ] diff --git a/products/cscl/models/intermediate/rpl/int__rpl.sql b/products/cscl/models/intermediate/rpl/int__rpl.sql new file mode 100644 index 0000000000..9ba038b5ad --- /dev/null +++ b/products/cscl/models/intermediate/rpl/int__rpl.sql @@ -0,0 +1,126 @@ +WITH lion AS ( + SELECT DISTINCT ON (segmentid) + segmentid, + segment_type, + from_nodeid, + to_nodeid, + geom, + midpoint + FROM {{ ref("int__lion") }} + ORDER BY segmentid +), + +cscl_rpl AS (SELECT * FROM {{ source("recipe_sources", "dcp_cscl_roadbed_pointer_list") }}), + +generic_attributes AS ( + SELECT + cscl_rpl.generic_segmentid, + lion.segment_type AS generic_segmenttype, + cscl_rpl.roadbed_segmentid, + cscl_rpl.roadbed_position_code, + 'B' AS node_correspondence_indicator, -- all records are 'B' in production, but the docs say it should be node_correspondence_ind + -- cscl_rpl.node_correspondence_ind AS node_correspondence_indicator, + chr(64 + cscl_rpl.from_node_level_coincident_rb) AS from_node_level_code_of_coincident_roadbed_segment, + chr(64 + cscl_rpl.to_node_level_coincident_rb) AS to_node_level_code_of_coincident_roadbed_segment, + lion.from_nodeid AS from_nodeid_of_generic_segment, + lion.to_nodeid AS to_nodeid_of_generic_segment, + lion.geom AS generic_geom + FROM cscl_rpl + LEFT JOIN lion + ON cscl_rpl.generic_segmentid = lion.segmentid +), + +roadbed_attributes AS ( + SELECT + generic_attributes.*, + lion.from_nodeid AS from_nodeid_of_roadbed_segment, + lion.to_nodeid AS to_nodeid_of_roadbed_segment, + lion.geom, + lion.midpoint + FROM generic_attributes + LEFT JOIN lion + ON generic_attributes.roadbed_segmentid = lion.segmentid +), + +-- Compute the cross product for every row and attach the R outermost's cross product +-- as a per-group sign reference. Comparing signs (rather than using the raw sign of +-- the cross product alone) means side determination is correct regardless of the +-- direction of the generic segment geometry. +side_reference AS ( + SELECT + *, + ( + (st_x(st_endpoint(generic_geom)) - st_x(st_startpoint(generic_geom))) + * (st_y(midpoint) - st_y(st_startpoint(generic_geom))) + - (st_y(st_endpoint(generic_geom)) - st_y(st_startpoint(generic_geom))) + * (st_x(midpoint) - st_x(st_startpoint(generic_geom))) + ) AS cross_product, + first_value( + (st_x(st_endpoint(generic_geom)) - st_x(st_startpoint(generic_geom))) + * (st_y(midpoint) - st_y(st_startpoint(generic_geom))) + - (st_y(st_endpoint(generic_geom)) - st_y(st_startpoint(generic_geom))) + * (st_x(midpoint) - st_x(st_startpoint(generic_geom))) + ) OVER ( + PARTITION BY generic_segmentid + ORDER BY (roadbed_position_code = 'R') DESC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS r_cross_product + FROM roadbed_attributes +), + +-- Ordering: +-- 1. 'R' outermost +-- 2. Right-side 'I' segments, from outermost to innermost +-- 3. 'L' outermost +-- 4. Left-side 'I' segments, from outermost to innermost +-- +-- For 'I' segments, side is determined by whether the row's cross product has the +-- same sign as the R outermost's cross product for that generic segment. +-- Within a side, rows are ordered by descending perpendicular distance from the +-- generic segment (outermost first). +add_group_order_id AS ( + SELECT + *, + row_number() OVER ( + PARTITION BY generic_segmentid + ORDER BY + CASE + WHEN roadbed_position_code = 'R' THEN 1 + WHEN + roadbed_position_code = 'I' + AND sign(cross_product) = sign(r_cross_product) THEN 2 + WHEN roadbed_position_code = 'L' THEN 3 + ELSE 4 -- left-side 'I' segments + END, + -- Negate so outermost (largest perpendicular distance) sorts first + CASE + WHEN roadbed_position_code = 'I' + THEN -st_distance(midpoint, generic_geom) + ELSE 0 + END + ) AS group_order_id + FROM side_reference +), + +final AS ( + SELECT + generic_segmentid || '_' || group_order_id AS rpl_id, + generic_segmentid, + generic_segmenttype, + roadbed_segmentid, + group_order_id, + roadbed_position_code, + node_correspondence_indicator, + from_node_level_code_of_coincident_roadbed_segment, + to_node_level_code_of_coincident_roadbed_segment, + from_nodeid_of_roadbed_segment, + from_nodeid_of_generic_segment, + to_nodeid_of_roadbed_segment, + to_nodeid_of_generic_segment, + midpoint, + geom + FROM add_group_order_id + ORDER BY rpl_id +) + +SELECT * FROM final diff --git a/products/cscl/models/product/rpl/_rpl.yml b/products/cscl/models/product/rpl/_rpl.yml new file mode 100644 index 0000000000..4c29f6f30b --- /dev/null +++ b/products/cscl/models/product/rpl/_rpl.yml @@ -0,0 +1,63 @@ +version: 2 + +models: +- name: rpl_by_field + config: + contract: + enforced: true + columns: + - name: generic_segmentid + data_type: string + - name: generic_segmenttype + data_type: string + - name: roadbed_segmentid + data_type: string + - name: filler_rpl3 + data_type: string + - name: roadbed_position_code + data_type: string + - name: filler_rpl4 + data_type: string + - name: node_correspondence_indicator + data_type: string + - name: filler_rpl5 + data_type: string + - name: from_node_level_code_of_coincident_roadbed_segment + data_type: string + - name: filler_rpl6 + data_type: string + - name: to_node_level_code_of_coincident_roadbed_segment + data_type: string + - name: filler_rpl7 + data_type: string + - name: from_nodeid_of_roadbed_segment + data_type: string + - name: filler_rpl8 + data_type: string + - name: from_nodeid_of_generic_segment + data_type: string + - name: filler_rpl9 + data_type: string + - name: to_nodeid_of_roadbed_segment + data_type: string + - name: filler_rpl10 + data_type: string + - name: to_nodeid_of_generic_segment + data_type: string +data_tests: +- test_name: dbt_utils.unique_combination_of_columns + arguments: + combination_of_columns: + - generic_segmentid + - roadbed_segmentid + +- name: rpl + config: + contract: + enforced: true + columns: + - name: dat_column + data_type: string + tests: + - dbt_expectations.expect_column_value_lengths_to_equal: + arguments: { value: 59 } diff --git a/products/cscl/models/product/rpl/rpl.sql b/products/cscl/models/product/rpl/rpl.sql new file mode 100644 index 0000000000..5b7c02201f --- /dev/null +++ b/products/cscl/models/product/rpl/rpl.sql @@ -0,0 +1 @@ +{{ select_rows_as_text(model='rpl_by_field') }} diff --git a/products/cscl/models/product/rpl/rpl_by_field.sql b/products/cscl/models/product/rpl/rpl_by_field.sql new file mode 100644 index 0000000000..0b51d9be19 --- /dev/null +++ b/products/cscl/models/product/rpl/rpl_by_field.sql @@ -0,0 +1,3 @@ +SELECT + {{ apply_text_formatting_from_seed('text_formatting__rpl') }} +FROM {{ ref("int__rpl") }} From f607e56e49f90196f181afef618c0a9bd109082b Mon Sep 17 00:00:00 2001 From: Damon McCullough Date: Sun, 8 Mar 2026 10:38:01 -0400 Subject: [PATCH 6/6] add rpl QA models --- products/cscl/dbt_project.yml | 2 + .../cscl/models/etl_dev_qa/qa__rpl_order.sql | 34 ++++++++ .../models/etl_dev_qa/qa__rpl_order_diffs.sql | 87 +++++++++++++++++++ .../cscl/models/etl_dev_qa/qa__rpl_values.sql | 18 ++++ products/cscl/models/product/rpl/_rpl.yml | 14 +-- products/cscl/models/product/rpl/rpl.sql | 2 +- .../cscl/models/product/rpl/rpl_by_field.sql | 2 + products/cscl/packages.yml | 2 + 8 files changed, 154 insertions(+), 7 deletions(-) create mode 100644 products/cscl/models/etl_dev_qa/qa__rpl_order.sql create mode 100644 products/cscl/models/etl_dev_qa/qa__rpl_order_diffs.sql create mode 100644 products/cscl/models/etl_dev_qa/qa__rpl_values.sql diff --git a/products/cscl/dbt_project.yml b/products/cscl/dbt_project.yml index 4dff46b500..bd04b6b7b3 100644 --- a/products/cscl/dbt_project.yml +++ b/products/cscl/dbt_project.yml @@ -16,6 +16,8 @@ models: +materialized: view product: +materialized: table + etl_dev_qa: + +materialized: table on-run-start: - '{{ create_pg_functions() }}' diff --git a/products/cscl/models/etl_dev_qa/qa__rpl_order.sql b/products/cscl/models/etl_dev_qa/qa__rpl_order.sql new file mode 100644 index 0000000000..8430660019 --- /dev/null +++ b/products/cscl/models/etl_dev_qa/qa__rpl_order.sql @@ -0,0 +1,34 @@ +-- especially concerned about diffs in the order of rows within each generic_segmentid grouping +{% set old_query %} + select + ROW_NUMBER() OVER () as row_number, + generic_segmentid || '_' || ROW_NUMBER() OVER ( + PARTITION BY generic_segmentid + ) AS rpl_id, + * + from production_outputs.rpl + order by row_number asc +{% endset %} + +{% set new_query %} + select + ROW_NUMBER() OVER () as row_number, + lpad(rpl_id, 9, '0') as rpl_id, + generic_segmentid, + roadbed_segmentid, + roadbed_position_code + from {{ ref('rpl_by_field') }} + order by row_number asc +{% endset %} + +-- {% set columns = dbt_utils.get_filtered_columns_in_relation(ref('rpl_by_field')) %} + +{{ + audit_helper.compare_and_classify_query_results( + old_query, + new_query, + primary_key_columns=['rpl_id'], + columns=['generic_segmentid', 'roadbed_segmentid', 'roadbed_position_code'], + sample_limit=50 + ) +}} diff --git a/products/cscl/models/etl_dev_qa/qa__rpl_order_diffs.sql b/products/cscl/models/etl_dev_qa/qa__rpl_order_diffs.sql new file mode 100644 index 0000000000..9768e778d0 --- /dev/null +++ b/products/cscl/models/etl_dev_qa/qa__rpl_order_diffs.sql @@ -0,0 +1,87 @@ +-- for RPL records rows with diffs in their ordering, get and compare their ordering and along with relevent geometries +-- rpl_id indicates the order of roadbed_segmentid rows within each generic_segmentid grouping +WITH + +diff_ids AS ( + SELECT DISTINCT generic_segmentid + FROM + {{ ref('qa__rpl_order') }} + WHERE + dbt_audit_row_status = 'modified' +), + +focus_rpl_prod AS ( + SELECT + prod_rpl.generic_segmentid || '_' || ROW_NUMBER() OVER ( + PARTITION BY prod_rpl.generic_segmentid + ) AS rpl_id, + prod_rpl.* + FROM + production_outputs.rpl AS prod_rpl + INNER JOIN diff_ids + ON + prod_rpl.generic_segmentid = diff_ids.generic_segmentid +), +focus_rpl AS ( + SELECT dev_rpl.* + FROM + {{ ref('int__rpl') }} AS dev_rpl + INNER JOIN diff_ids + ON + LPAD(dev_rpl.generic_segmentid::TEXT, 7, '0') = diff_ids.generic_segmentid +), +focus_lion AS ( + SELECT dev_lion.* + FROM + {{ ref('int__lion') }} AS dev_lion + INNER JOIN diff_ids + ON + LPAD(dev_lion.segmentid::TEXT, 7, '0') = diff_ids.generic_segmentid +), + +all_segments AS ( + SELECT + focus_rpl.generic_segmentid, + focus_rpl.roadbed_segmentid, + LPAD(focus_rpl.rpl_id::TEXT, 7, '0') AS rpl_id_dev, + focus_rpl_prod.rpl_id AS rpl_id_prod, + focus_rpl.roadbed_position_code, + focus_rpl.generic_segmenttype, + ST_TRANSFORM(focus_rpl.midpoint, 4326) AS midpoint, + ST_TRANSFORM(focus_rpl.geom, 4326) AS geom, + null AS midpoint_generic, + null AS geom_generic + FROM + focus_rpl + LEFT JOIN focus_rpl_prod + ON + LPAD(focus_rpl.generic_segmentid::TEXT, 7, '0') = focus_rpl_prod.generic_segmentid + AND LPAD(focus_rpl.roadbed_segmentid::TEXT, 7, '0') = focus_rpl_prod.roadbed_segmentid + UNION ALL + SELECT + segmentid AS generic_segmentid, + null AS roadbed_segmentid, + null AS rpl_id_dev, + null AS rpl_id_prod, + null AS roadbed_position_code, + null AS generic_segmenttype, + null AS midpoint, + null AS geom, + ST_TRANSFORM(midpoint, 4326) AS midpoint_generic, + ST_TRANSFORM(geom, 4326) AS geom_generic + FROM + focus_lion +), + +final_cte AS ( + SELECT * + FROM + all_segments + ORDER BY + generic_segmentid ASC, + rpl_id_dev ASC +) + +SELECT * +FROM + final_cte diff --git a/products/cscl/models/etl_dev_qa/qa__rpl_values.sql b/products/cscl/models/etl_dev_qa/qa__rpl_values.sql new file mode 100644 index 0000000000..3577341a22 --- /dev/null +++ b/products/cscl/models/etl_dev_qa/qa__rpl_values.sql @@ -0,0 +1,18 @@ +-- ignoring the order of rows within each generic_segmentid grouping to focus on testing for diffs in their values +{% set old_relation = adapter.get_relation( + database = "db-cscl", + schema = "production_outputs", + identifier = "rpl" +) -%} + +{% set dbt_relation = ref('rpl_by_field') %} + +{%- if execute -%} + {{ audit_helper.compare_and_classify_relation_rows( + a_relation = old_relation, + b_relation = dbt_relation, + primary_key_columns=['generic_segmentid', 'roadbed_segmentid'], + columns = None, + sample_limit=50 + ) }} +{%- endif -%} diff --git a/products/cscl/models/product/rpl/_rpl.yml b/products/cscl/models/product/rpl/_rpl.yml index 4c29f6f30b..f4b0dea802 100644 --- a/products/cscl/models/product/rpl/_rpl.yml +++ b/products/cscl/models/product/rpl/_rpl.yml @@ -6,6 +6,8 @@ models: contract: enforced: true columns: + - name: rpl_id + data_type: string - name: generic_segmentid data_type: string - name: generic_segmenttype @@ -44,12 +46,12 @@ models: data_type: string - name: to_nodeid_of_generic_segment data_type: string -data_tests: -- test_name: dbt_utils.unique_combination_of_columns - arguments: - combination_of_columns: - - generic_segmentid - - roadbed_segmentid + data_tests: + - test_name: dbt_utils.unique_combination_of_columns + arguments: + combination_of_columns: + - generic_segmentid + - roadbed_segmentid - name: rpl config: diff --git a/products/cscl/models/product/rpl/rpl.sql b/products/cscl/models/product/rpl/rpl.sql index 5b7c02201f..e55338ef05 100644 --- a/products/cscl/models/product/rpl/rpl.sql +++ b/products/cscl/models/product/rpl/rpl.sql @@ -1 +1 @@ -{{ select_rows_as_text(model='rpl_by_field') }} +{{ select_rows_as_text(model='rpl_by_field', exclude=['rpl_id']) }} diff --git a/products/cscl/models/product/rpl/rpl_by_field.sql b/products/cscl/models/product/rpl/rpl_by_field.sql index 0b51d9be19..0ce218acdd 100644 --- a/products/cscl/models/product/rpl/rpl_by_field.sql +++ b/products/cscl/models/product/rpl/rpl_by_field.sql @@ -1,3 +1,5 @@ SELECT + rpl_id, {{ apply_text_formatting_from_seed('text_formatting__rpl') }} FROM {{ ref("int__rpl") }} +ORDER BY rpl_id ASC diff --git a/products/cscl/packages.yml b/products/cscl/packages.yml index fa915ec5e4..6e452feff2 100644 --- a/products/cscl/packages.yml +++ b/products/cscl/packages.yml @@ -1,5 +1,7 @@ packages: - package: dbt-labs/dbt_utils version: 1.3.2 +- package: dbt-labs/audit_helper + version: 0.13.0 - package: metaplane/dbt_expectations version: 0.10.9