DBT Macro: Test Reference Match
DBT macro name: trase_duckdb.test_reference_match
DBT details
Description
No description
Details
Data Tests
biome_reference_check_beef_sei_pcs_v2_2_2country_destination_reference_check_beef_sei_pcs_v2_2_2country_production_reference_check_beef_sei_pcs_v2_2_2logistics_hub_reference_check_beef_sei_pcs_v2_2_2municipality_and_state_of_production_reference_check_beef_sei_pcs_v2_2_2exporter_reference_check_beef_sei_pcs_v2_2_2supplier_provinces_match_referenceexporter_provinces_match_referenceexporter_reference_check_cote_divoire_cocoa_sei_pcs_v1_2_0country_production_reference_check_cote_divoire_cocoa_sei_pcs_v1_2_0department_and_logistics_hub_reference_check_cote_divoire_cocoa_sei_pcs_v1_2_0logistics_hub_cooperative_name_reference_check_cote_divoire_cocoa_sei_pcs_v1_2_0country_destination_reference_check_cote_divoire_cocoa_sei_pcs_v1_2_0postgres_countries_trase_id_iso2_reference_matchdatabase_contains_all_indonesian_kabupatendatabase_contains_all_indonesian_provincestrase_regions_matches_indonesia_kabupatentrase_regions_matches_indonesia_provincestrase_regions_matches_indonesia_concessions_3_0trase_regions_concessions_2019_have_correct_namestrase_regions_matches_indonesia_concessions_3_1trase_regions_concessions_2020_have_correct_namestrase_regions_matches_indonesia_concessions_3_2trase_regions_concessions_2023_have_correct_names
Macros
slugify
{% test reference_match(
model,
compare_to,
match_on,
join_type,
where_from=None,
where_to=None,
column_name=None
) %}
{#- 1. Parse `match_on` into two separate lists for each model/reference -#}
{% set left_cols = [] %}
{% set right_cols = [] %}
{% for pair in match_on %}
{% if (pair.model is defined) and (pair.ref is defined) %}
{% do left_cols.append(pair.model) %}
{% do right_cols.append(pair.ref) %}
{% elif pair is sequence and pair is not string and (pair|length) >= 2 %}
{% do left_cols.append(pair[0]) %}
{% do right_cols.append(pair[1]) %}
{% elif pair is string %}
{% do left_cols.append(pair) %}
{% do right_cols.append(pair) %}
{% else %}
{{ exceptions.raise_compiler_error(
"reference_match: Each entry must be {model: <col>, ref: <col>}, [model_col, ref_col], or <col> for same-name columns. Got: " ~ pair
) }}
{% endif %}
{% endfor %}
with unioned as (
-- 2. Union distinct key combinations from both models
-- Each row is tagged with its source ('model' or 'compare_to_model') to track its origin.
-- Select distinct keys from the primary model
select distinct
{% for col in left_cols %}
{{ col }} as {{ dbt_utils.slugify(col) }}{% if not loop.last %},{% endif %}
{% endfor %},
'model' as source_table
from {{ model }}
{% if where_from %}
where {{ where_from }}
{% endif %}
union all
-- Select distinct keys from the comparison model
select distinct
{% for i in range(right_cols|length) %}
{{ right_cols[i] }} as {{ dbt_utils.slugify(left_cols[i]) }}{% if not loop.last %},{% endif %}
{% endfor %},
'compare_to_model' as source_table
from {{ compare_to }}
{% if where_to %}
where {{ where_to }}
{% endif %}
),
grouped as (
-- 3. Group by the key columns to find unique occurrences
-- Keys that appear only once (`occurrences = 1`) represent a mismatch between the models.
select
{% for col in left_cols %}
{{ dbt_utils.slugify(col) }},
{% endfor %}
count(*) as occurrences,
min(source_table) as source_table -- If occurrences = 1, this reveals the single source table
from unioned
group by
{% for col in left_cols %}
{{ dbt_utils.slugify(col) }}{% if not loop.last %},{% endif %}
{% endfor %}
)
-- 4. Select and classify the discrepancies based on the join_type
-- The test fails if this query returns any rows.
select
{% for col in left_cols %}
{{ dbt_utils.slugify(col) }},
{% endfor %}
case
when source_table = 'compare_to_model'
then 'missing from model' -- The key was only in the reference model
when source_table = 'model'
then 'missing from reference' -- The key was only in the primary model
end as error_type
from grouped
where occurrences = 1
{% if join_type == 'left' %}
-- For a 'left' join test, we only care if rows from the main `model` are missing from `compare_to`.
-- These are rows that appear only once and have `source_table = 'model'`.
and source_table = 'model'
{% elif join_type == 'right' %}
-- For a 'right' join test, we only care if rows from `compare_to` are missing from the main `model`.
-- These are rows that appear only once and have `source_table = 'compare_to_model'`.
and source_table = 'compare_to_model'
{% endif %}
{% endtest %}