Skip to content

Trase Data Wiki

DBT Macro: Test Reference Match

DBT Macro: Test Reference Match

DBT macro name: `trase_duckdb.test_reference_match`

DBT details

Macro file: trase/data_pipeline/tests/generic/reference_match.sql
Original dbt_docs

Description

No description

Details

Referenced ByDepends OnMacro code

Data Tests

Macros

slugify

{% test reference_match(
    model,
    compare_to,
    match_on,
    join_type,
    where_from=None,
    where_to=None,
    column_name=None
) %}

    {#- 1. Parse `match_on` into two separate lists for each model/reference -#}
    {% set left_cols = [] %}
    {% set right_cols = [] %}

    {% for pair in match_on %}
      {% if (pair.model is defined) and (pair.ref is defined) %}
        {% do left_cols.append(pair.model) %}
        {% do right_cols.append(pair.ref) %}
      {% elif pair is sequence and pair is not string and (pair|length) >= 2 %}
        {% do left_cols.append(pair[0]) %}
        {% do right_cols.append(pair[1]) %}
      {% elif pair is string %}
        {% do left_cols.append(pair) %}
        {% do right_cols.append(pair) %}
      {% else %}
        {{ exceptions.raise_compiler_error(
          "reference_match: Each entry must be {model: <col>, ref: <col>}, [model_col, ref_col], or <col> for same-name columns. Got: " ~ pair
        ) }}
      {% endif %}
    {% endfor %}

    with unioned as (
        -- 2. Union distinct key combinations from both models
        -- Each row is tagged with its source ('model' or 'compare_to_model') to track its origin.

        -- Select distinct keys from the primary model
      select distinct
        {% for col in left_cols %}
          {{ col }} as {{ dbt_utils.slugify(col) }}{% if not loop.last %},{% endif %}
        {% endfor %},
        'model' as source_table
      from {{ model }}
      {% if where_from %}
      where {{ where_from }}
      {% endif %}

      union all

      -- Select distinct keys from the comparison model
      select distinct
        {% for i in range(right_cols|length) %}
          {{ right_cols[i] }} as {{ dbt_utils.slugify(left_cols[i]) }}{% if not loop.last %},{% endif %}
        {% endfor %},
        'compare_to_model' as source_table
      from {{ compare_to }}
      {% if where_to %}
      where {{ where_to }}
      {% endif %}
    ),

    grouped as (
      -- 3. Group by the key columns to find unique occurrences
      -- Keys that appear only once (`occurrences = 1`) represent a mismatch between the models.
      select
        {% for col in left_cols %}
          {{ dbt_utils.slugify(col) }},
        {% endfor %}
        count(*) as occurrences,
        min(source_table) as source_table -- If occurrences = 1, this reveals the single source table
      from unioned
      group by
        {% for col in left_cols %}
          {{ dbt_utils.slugify(col) }}{% if not loop.last %},{% endif %}
        {% endfor %}
    )

    -- 4. Select and classify the discrepancies based on the join_type
    -- The test fails if this query returns any rows.
    select
      {% for col in left_cols %}
        {{ dbt_utils.slugify(col) }},
      {% endfor %}
      case
        when source_table = 'compare_to_model'
        then 'missing from model' -- The key was only in the reference model
        when source_table = 'model'
        then 'missing from reference' -- The key was only in the primary model
      end as error_type
    from grouped
    where occurrences = 1
    {% if join_type == 'left' %}
        -- For a 'left' join test, we only care if rows from the main `model` are missing from `compare_to`.
        -- These are rows that appear only once and have `source_table = 'model'`.
      and source_table = 'model'
    {% elif join_type == 'right' %}
        -- For a 'right' join test, we only care if rows from `compare_to` are missing from the main `model`.
        -- These are rows that appear only once and have `source_table = 'compare_to_model'`.
      and source_table = 'compare_to_model'
    {% endif %}

{% endtest %}