DBT Test: Exporter Provinces Match Reference
DBT test name: exporter_provinces_match_reference
DBT details
-
Kind:
generic(reference_match) -
Test file: trase/data_pipeline/models/indonesia/wood_pulp/sei_pcs/_schema.yml
Description
No description
Details
{{ test_reference_match(**_dbt_generic_test_kwargs) }}
with unioned as (
-- 2. Union distinct key combinations from both models
-- Each row is tagged with its source ('model' or 'compare_to_model') to track its origin.
-- Select distinct keys from the primary model
select distinct
EXPORTER_PROV_CODE as exporter_prov_code,
'ID-' || prov_code::int::text as id___prov_codeinttext
,
'model' as source_table
from "trase_production"."main"."wood_pulp_ind_v3_2_0_2025_post_embedding_quant"
where EXPORTER_PROV_CODE != 'ID-XX'
union all
-- Select distinct keys from the comparison model
select distinct
EXPORTER_PROV_CODE as exporter_prov_code,
'ID-' || prov_code::int::text as id___prov_codeinttext
,
'compare_to_model' as source_table
from "trase_production"."main"."indonesia_kabupaten_boundaries_simplified_2023"
),
grouped as (
-- 3. Group by the key columns to find unique occurrences
-- Keys that appear only once (`occurrences = 1`) represent a mismatch between the models.
select
exporter_prov_code,
id___prov_codeinttext,
count(*) as occurrences,
min(source_table) as source_table -- If occurrences = 1, this reveals the single source table
from unioned
group by
exporter_prov_code,
id___prov_codeinttext
)
-- 4. Select and classify the discrepancies based on the join_type
-- The test fails if this query returns any rows.
select
exporter_prov_code,
id___prov_codeinttext,
case
when source_table = 'compare_to_model'
then 'missing from model' -- The key was only in the reference model
when source_table = 'model'
then 'missing from reference' -- The key was only in the primary model
end as error_type
from grouped
where occurrences = 1
-- For a 'left' join test, we only care if rows from the main `model` are missing from `compare_to`.
-- These are rows that appear only once and have `source_table = 'model'`.
and source_table = 'model'