DBT Test: Trase Regions Matches Indonesia Provinces
DBT test name: trase_regions_matches_indonesia_provinces
DBT details
-
Kind:
generic(reference_match) -
Test file: trase/data_pipeline/models/indonesia/spatial/_sources.yml
Description
No description
Details
{{ test_reference_match(**_dbt_generic_test_kwargs) }}
with unioned as (
-- 2. Union distinct key combinations from both models
-- Each row is tagged with its source ('model' or 'compare_to_model') to track its origin.
-- Select distinct keys from the primary model
select distinct
trase_id as trase_id,
name as name
,
'model' as source_table
from ST_READ('s3://resources.trase.earth/data/trase-regions/id/province.geojson')
union all
-- Select distinct keys from the comparison model
select distinct
'ID-' || prov_code::int::text as trase_id,
prov as name
,
'compare_to_model' as source_table
from "trase_production"."main"."indonesia_kabupaten_boundaries_simplified_2023"
),
grouped as (
-- 3. Group by the key columns to find unique occurrences
-- Keys that appear only once (`occurrences = 1`) represent a mismatch between the models.
select
trase_id,
name,
count(*) as occurrences,
min(source_table) as source_table -- If occurrences = 1, this reveals the single source table
from unioned
group by
trase_id,
name
)
-- 4. Select and classify the discrepancies based on the join_type
-- The test fails if this query returns any rows.
select
trase_id,
name,
case
when source_table = 'compare_to_model'
then 'missing from model' -- The key was only in the reference model
when source_table = 'model'
then 'missing from reference' -- The key was only in the primary model
end as error_type
from grouped
where occurrences = 1