Skip to content

DBT Macro: Test Synonym Check

DBT macro name: trase_duckdb.test_synonym_check

DBT details


Description

No description


Details

{% test synonym_check(
    model,
    column_name,
    synonym_table,
    official_name_field,
    reference_synonym_field,
    reference_official_name_field,
    where_from=None,
    where_to=None,
    check_trimmed_official_names=False,
    return_reference_official_names=True
) %}

    with model_data as (
        select
            {{ column_name }} as synonym,
            trim(lower({{ column_name }})) as synonym_normalized,
            {{ official_name_field }} as official_name,
            trim(lower({{ official_name_field }})) as official_name_normalized,
            count(*) as occurrences
        from {{ model }}
        where {{ column_name }} is not null
        {% if where_from %}
        and {{ where_from }}
        {% endif %}
        group by 1, 2, 3, 4
    ),

    reference_data as (
        select
            t.synonym,
            trim(lower(t.synonym)) as synonym_normalized,
            {{ reference_official_name_field }} as official_name,
            trim(lower({{ reference_official_name_field }})) as official_name_normalized
        from {{ synonym_table }}
        {% if where_to %}
        where {{ where_to }}
        {% endif %}
        cross join unnest({{ reference_synonym_field }}) as t(synonym)
    ),

    reference_grouped as (
        select
            synonym_normalized,
            list(distinct official_name order by official_name) as reference_official_names,
            list(distinct official_name_normalized order by official_name_normalized) as reference_official_names_normalized,
            count(distinct official_name) as reference_official_name_count
        from reference_data
        group by 1
    )

    select
        m.synonym as model_synonym,
        m.official_name as model_official_name,
        m.occurrences as model_occurrences,
        rg.reference_official_name_count,
        {% if return_reference_official_names %}
        rg.reference_official_names,
        {% endif %}
        case
            when rg.reference_official_name_count is null then 'synonym not found in reference'
            when {% if check_trimmed_official_names %}
                    not list_contains(rg.reference_official_names_normalized, m.official_name_normalized)
                 {% else %}
                    not list_contains(rg.reference_official_names, m.official_name)
                 {% endif %}
                 then 'official name does not match reference'
        end as error_reason
    from model_data m
    left join reference_grouped rg
        on m.synonym_normalized = rg.synonym_normalized
    where rg.reference_official_name_count is null
       or {% if check_trimmed_official_names %}
             not list_contains(rg.reference_official_names_normalized, m.official_name_normalized)
          {% else %}
             not list_contains(rg.reference_official_names, m.official_name)
          {% endif %}
    order by m.occurrences desc

{% endtest %}