Skip to content

DBT: Gold India Trade Coffee 2020

File location: s3://trase-storage/india/trade/bol/coffee/2020/gold/gold_india_trade_coffee_2020.parquet

DBT model name: gold_india_trade_coffee_2020

Explore on Metabase: Full table; summary statistics

Explore dependencies/lineage: link


Description

....


Details

Column Type Description
year INTEGER Comes from the Year field.
exporter_label VARCHAR Comes from the Indian_Exporter_Name field. Default definition: Name (label) of the trader, which should appear as one of the synonyms of a trader in the database. It doesn't necessarily have to be the default name however. See Traders-User-Guide.md: 1.2 Synonyms.
exporter_node_id BIGINT
exporter_group_name VARCHAR Official name of the trader group, as it exists in the Trase database. See Traders-Developer-Guide.md: 1. Traders data model.
country_of_destination VARCHAR Comes from the FOREIGN_COUNTRY field. Default definition: Official name of the country, as it exists in the Trase database.
mass_tonnes DOUBLE Comes from the Quantity field, and is converted to tonnes from other units.
fob DOUBLE Comes from FOB_Value_USD field. Default definition: FOB (Free On Board) in USD. Values are reported in customs data as a measure of the financial value of the trade.
importer_label VARCHAR Comes from the Foreign_Importer_Name field. Hasn't been checked and cleaned against Trase's database. Default definition: Name (label) of the trader, which should appear as one of the synonyms of a trader in the database. It doesn't necessarily have to be the default name however. See Traders-User-Guide.md: 1.2 Synonyms.
port_of_export_label VARCHAR Comes from the FOREIGN_PORT field. Hasn't been checked and cleaned against Trase's database.
hs6 VARCHAR Default definition: First 6 characters of the Harmonized System code: an internationally-agreed set of codes used to describe products in trade data.

Review full report including sample errors records if they exist (link)

Test name Test column Last test run Last status
check_bol_against_comtrade_gold_india_trade_coffee_2020_IND__0901_2101__mass_tonnes__2020 `` 2026-04-25 13:23 fail
check_trader_groups_gold_india_trade_coffee_2020_exporter_group_name__2020 `` 2026-04-25 13:23 pass
relationships_gold_india_trade_coffee_2020_country_of_destination__country_name__ref_postgres_countries_ country_of_destination 2026-04-25 13:23 pass

Models

Sources

Macros

No called script or script source not found.

-- gold_india_trade_coffee_2020
{{ 
    config(
        pre_hook='{{ excel_extension() }}',
        materialized='external',
    )
}}

{% set replace_eu_countries_with_bloc = False %}
{% set trader_prefix = "IN-TRADER-" %}
{% set year = 2020 %}

WITH india_exports_2020 AS (
    SELECT
        Date,
        Year,
        EXTRACT(MONTH FROM Date) AS Month,
        EXTRACT(DAY FROM Date) AS Day,
        HS_Code::VARCHAR AS HS_Code,
        -- Casting frequently to VARCHAR as it tends to convert it to INT otherwise
        CASE
            WHEN CAST(HS_Code AS VARCHAR) LIKE '901%' 
                THEN CONCAT('0', HS_Code)[0:6]
            WHEN CAST(HS_Code AS VARCHAR) LIKE '2101%'
                THEN CAST(HS_Code AS VARCHAR)[0:6]::VARCHAR
            ELSE NULL
        END AS HS6,
        Product_Description,
        Quantity,
        Unit,
        CASE WHEN Unit = 'MTS' THEN Quantity * 1000
             WHEN Unit = 'LBS' THEN Quantity * 0.453592
             WHEN Unit = 'KGS' THEN Quantity
             ELSE NULL
        END AS Quantity_in_KGS,
        FOB_Value_INR,
        Unit_Price_INR,
        FOB_Value_USD,
        FOB_Value_Foreign_Currency,
        Unit_Price_Foreign_Currency,
        Currency_Name,
        FOB_Value_in_Lacs_INR,
        IEC,
        -- Trim for spaces, extra double quotes and commas, and uppercase
        UPPER(TRIM(Indian_Exporter_Name, '", ')) AS Indian_Exporter_Name,
        Exporter_Address,
        Exporter_City,
        Pin_Code,
        cha_code,
        cha_name,
        Foreign_Importer_Name,
        Importer_Address,
        FOREIGN_PORT,
        TRIM(FOREIGN_COUNTRY, '" ') AS FOREIGN_COUNTRY,
        Indian_Port,
        Chapter,
        hs_4_Digit,
        S_N1
    FROM
        {{ source('source_india_coffee_trade', 'original_india_coffee_exports_2020') }}
    WHERE
        (HS6 LIKE '0901%' OR HS6 LIKE '2101%')
        AND Unit IN ('KGS', 'MTS', 'LBS')
        AND FOB_Value_USD != 0
),
-- Expand synonyms for each country
expanded_countries AS (
    SELECT
        country_name,
        country_trase_id,
        UNNEST(synonyms) AS synonym
    FROM {{ ref('postgres_countries') }}
),
-- Join expanded countries and adjust some country names not in the official list
with_official_country_names AS (
    SELECT
        ie.*,
        COALESCE(
            ec.country_name,
            CASE 
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,REPUBLIC OF' THEN 'SOUTH KOREA'
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,DEMOCRATIC PEOPLE''S REPUBLIC OF' THEN 'NORTH KOREA'
                WHEN ie.FOREIGN_COUNTRY = 'MACEDONIA,THE FORMER YUGOSLAV REPUBLIC OF' THEN 'MACEDONIA'
                WHEN ie.FOREIGN_COUNTRY = 'LIBYAN ARAB REPUBLIC' THEN 'LIBYA'
                WHEN ie.FOREIGN_COUNTRY = 'AZARBAIJAN' THEN 'AZERBAIJAN'
                WHEN ie.FOREIGN_COUNTRY = 'CENTRAL AFRICAN REPULIC' THEN 'CENTRAL AFRICAN REPUBLIC'
                WHEN ie.FOREIGN_COUNTRY = 'KAZAKISTAN' THEN 'KAZAKHSTAN'
                WHEN ie.FOREIGN_COUNTRY = 'MOLDOVA,REPUBLIC OF' THEN 'MOLDOVA'
                WHEN ie.FOREIGN_COUNTRY = 'SLOVAK REPUBLIC' THEN 'SLOVAKIA'
                WHEN ie.FOREIGN_COUNTRY = 'TRINIDAD & TOBAGO' THEN 'TRINIDAD AND TOBAGO'
                WHEN ie.FOREIGN_COUNTRY = 'VIETNAM, DEMOCRATIC REP. OF' THEN 'VIETNAM'
                ELSE NULL
            END
        ) AS destination_country_name,
        COALESCE(
            ec.country_trase_id,
            CASE 
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,REPUBLIC OF' THEN 'KR'
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,DEMOCRATIC PEOPLE''S REPUBLIC OF' THEN 'KP'
                WHEN ie.FOREIGN_COUNTRY = 'MACEDONIA,THE FORMER YUGOSLAV REPUBLIC OF' THEN 'MK'
                WHEN ie.FOREIGN_COUNTRY = 'LIBYAN ARAB REPUBLIC' THEN 'LY'
                WHEN ie.FOREIGN_COUNTRY = 'AZARBAIJAN' THEN 'AZ'
                WHEN ie.FOREIGN_COUNTRY = 'CENTRAL AFRICAN REPULIC' THEN ' CF'
                WHEN ie.FOREIGN_COUNTRY = 'KAZAKISTAN' THEN 'KZ'
                WHEN ie.FOREIGN_COUNTRY = 'MOLDOVA,REPUBLIC OF' THEN 'MD'
                WHEN ie.FOREIGN_COUNTRY = 'SLOVAK REPUBLIC' THEN 'SK'
                WHEN ie.FOREIGN_COUNTRY = 'TRINIDAD & TOBAGO' THEN 'TT'
                WHEN ie.FOREIGN_COUNTRY = 'VIETNAM, DEMOCRATIC REP. OF' THEN 'VN'
                ELSE NULL
            END
        ) AS destination_country_trase_id
    FROM
        india_exports_2020 ie
    LEFT JOIN expanded_countries ec
    ON ie.FOREIGN_COUNTRY = ec.synonym
),

-- Add exporter group names

------ COPYING THIS LOGIC FROM gold_colombia_cd_coffee_2020.sql ------
-- Eventually it could be a Python UDF

-- Take exporter labels
exporter_labels AS (
    SELECT
        DISTINCT Indian_Exporter_Name AS exporter_label
    FROM with_official_country_names
),

-- Take the info from the traders that have a matching label
filtered_traders_by_label AS (
    SELECT 
        name, 
        trader_node_id,
        trase_id,
        labels,
        UNNEST(groups) AS company_group
    FROM
        {{ ref('postgres_traders') }}
    WHERE EXISTS (
        SELECT 1 FROM exporter_labels
        WHERE list_contains(labels, exporter_labels.exporter_label)
    )
),
-- Flatten the group info from the matching traders, and rank them by relevance
with_traders_group_info AS (
    SELECT 
        name, 
        trader_node_id,
        trase_id,
        json_extract_string(company_group, '$.group')::VARCHAR AS group_name,
        json_extract_string(company_group, '$.time_start')::TIMESTAMP AS time_start,
        json_extract_string(company_group, '$.time_end')::TIMESTAMP AS time_end,
        CASE
            WHEN trase_id LIKE ('{{trader_prefix}}%')
                AND time_start IS NULL AND time_end IS NULL THEN 1
            WHEN trase_id LIKE ('{{trader_prefix}}%') 
                AND YEAR(time_start) <= {{year}} AND YEAR(time_end) >= {{year}} THEN 2
            WHEN trase_id NOT LIKE ('{{trader_prefix}}%')
                AND time_start IS NULL AND time_end IS NULL THEN 3
            WHEN trase_id NOT LIKE ('{{trader_prefix}}%') 
                AND YEAR(time_start) <= {{year}} AND YEAR(time_end) >= {{year}} THEN 4
            WHEN trase_id IS NULL THEN 5
            ELSE NULL
        END AS ranking,
        labels
    FROM filtered_traders_by_label
),
-- Join the export data with the trader info, taking only the top matching ranking when there are multiple matches
-- DISTINCT ON selects the first record per name
filtered_ranked_traders AS (
    SELECT DISTINCT ON (name)
        name, 
        trader_node_id AS trader_node_id_from_label_match,
        trase_id AS trase_id_from_label_match, 
        labels,
        group_name AS group_name_from_label_match,
        time_start,
        time_end,
        ranking
    FROM with_traders_group_info
    ORDER BY name, ranking ASC, trase_id DESC
),
------ END OF COPIED SECTION ------

with_db_trader_info AS (
    SELECT
        with_ocn.*,
        frt.*
    FROM with_official_country_names AS with_ocn
    LEFT JOIN filtered_ranked_traders AS frt
        ON list_contains(frt.labels, with_ocn.Indian_Exporter_Name)
),

-- Final fields
final_fields AS (
    SELECT
        Year::INT AS year,
        Indian_Exporter_Name AS exporter_label,
        trader_node_id_from_label_match AS exporter_node_id,
        group_name_from_label_match AS exporter_group_name,
        destination_country_name AS country_of_destination,
        Quantity_in_KGS/1000 AS mass_tonnes,
        FOB_Value_USD AS fob,
        Foreign_Importer_Name AS importer_label,
        Indian_Port AS port_of_export_label,
        HS6 AS hs6
    FROM
        with_db_trader_info
)
-- If true (default), replace EU countries with the economic bloc name
-- using 'as_bool' to render the value as a boolean
{% if replace_eu_countries_with_bloc | as_bool %}
,
countries_with_economic_blocs AS (
    SELECT
        country_name,
        UNNEST(economic_bloc) AS economic_bloc
    FROM {{ ref('postgres_countries') }}
),
eu_countries AS (
    SELECT DISTINCT
        country_name AS eu_country,
        'EUROPEAN UNION' AS european_union_country
    FROM countries_with_economic_blocs
    WHERE
        json_extract_string(economic_bloc, '$.economic_bloc') = 'EUROPEAN UNION'
    AND
        country_name != 'UNITED KINGDOM'
),
final_with_eu AS (
    SELECT
        ff.year,
        ff.exporter_label,
        ff.exporter_node_id,
        ff.exporter_group_name,
        COALESCE(eu.european_union_country, ff.country_of_destination) AS country_of_destination,
        ff.mass_tonnes,
        ff.fob,
        ff.importer_label,
        ff.port_of_export_label,
        ff.hs6
    FROM final_fields ff
    LEFT JOIN eu_countries eu
        ON ff.country_of_destination = eu.eu_country
)

SELECT * FROM final_with_eu

{% else %}
-- If not, keep the normal country names

SELECT * FROM final_fields

{% endif %}