DBT Test: Postgres Countries Trase Id Iso2 Reference Match
DBT test name: postgres_countries_trase_id_iso2_reference_match
DBT details
-
Kind:
generic(reference_match) -
Column under test:
country_trase_id -
Test file: trase/data_pipeline/models/postgres_views/_schema_postgres_tables.yml
Description
No description
Details
{{ test_reference_match(**_dbt_generic_test_kwargs) }}
with unioned as (
-- 2. Union distinct key combinations from both models
-- Each row is tagged with its source ('model' or 'compare_to_model') to track its origin.
-- Select distinct keys from the primary model
select distinct
country_trase_id as country_trase_id
,
'model' as source_table
from "trase_production"."main"."postgres_countries"
union all
-- Select distinct keys from the comparison model
select distinct
alpha_2 as country_trase_id
,
'compare_to_model' as source_table
from 's3://trase-storage/world/metadata/codes/country_iso_codes.parquet'
where alpha_2 IS NOT NULL
),
grouped as (
-- 3. Group by the key columns to find unique occurrences
-- Keys that appear only once (`occurrences = 1`) represent a mismatch between the models.
select
country_trase_id,
count(*) as occurrences,
min(source_table) as source_table -- If occurrences = 1, this reveals the single source table
from unioned
group by
country_trase_id
)
-- 4. Select and classify the discrepancies based on the join_type
-- The test fails if this query returns any rows.
select
country_trase_id,
case
when source_table = 'compare_to_model'
then 'missing from model' -- The key was only in the reference model
when source_table = 'model'
then 'missing from reference' -- The key was only in the primary model
end as error_type
from grouped
where occurrences = 1
-- For a 'right' join test, we only care if rows from `compare_to` are missing from the main `model`.
-- These are rows that appear only once and have `source_table = 'compare_to_model'`.
and source_table = 'compare_to_model'