Skip to content

DBT: Gold India Trade Coffee 2020

File location: s3://trase-storage/india/trade/bol/coffee/2020/gold/gold_india_trade_coffee_2020.parquet

DBT model name: gold_india_trade_coffee_2020

Explore on Metabase: Full table; summary statistics

DBT details


Description

....


Details

Column Type Description
year INTEGER Comes from the Year field.
exporter_label VARCHAR Comes from the Indian_Exporter_Name field. Default definition: Name (label) of the trader, which should appear as one of the synonyms of a trader in the database. It doesn't necessarily have to be the default name however. See Traders-User-Guide.md: 1.2 Synonyms.
exporter_node_id BIGINT
exporter_group_name VARCHAR Official name of the trader group, as it exists in the Trase database. See Traders-Developer-Guide.md: 1. Traders data model.
country_of_destination VARCHAR Comes from the FOREIGN_COUNTRY field. Default definition: Official name of the country, as it exists in the Trase database.
mass_tonnes DOUBLE Comes from the Quantity field, and is converted to tonnes from other units.
fob DOUBLE Comes from FOB_Value_USD field. Default definition: FOB (Free On Board) in USD. Values are reported in customs data as a measure of the financial value of the trade.
importer_label VARCHAR Comes from the Foreign_Importer_Name field. Hasn't been checked and cleaned against Trase's database. Default definition: Name (label) of the trader, which should appear as one of the synonyms of a trader in the database. It doesn't necessarily have to be the default name however. See Traders-User-Guide.md: 1.2 Synonyms.
port_of_export_label VARCHAR Comes from the FOREIGN_PORT field. Hasn't been checked and cleaned against Trase's database.
hs6 VARCHAR Default definition: First 6 characters of the Harmonized System code: an internationally-agreed set of codes used to describe products in trade data.

Models / Seeds

  • source.trase_duckdb.source_india_coffee_trade.original_india_coffee_exports_2020
  • model.trase_duckdb.postgres_countries
  • model.trase_duckdb.postgres_traders

    Macros

    • macro.trase_duckdb.excel_extension

Sources

  • ['source_india_coffee_trade', 'original_india_coffee_exports_2020']

No called script or script source not found.

-- gold_india_trade_coffee_2020
{{ 
    config(
        pre_hook='{{ excel_extension() }}',
        materialized='external',
    )
}}

{% set replace_eu_countries_with_bloc = False %}
{% set trader_prefix = "IN-TRADER-" %}
{% set year = 2020 %}

WITH india_exports_2020 AS (
    SELECT
        Date,
        Year,
        EXTRACT(MONTH FROM Date) AS Month,
        EXTRACT(DAY FROM Date) AS Day,
        HS_Code::VARCHAR AS HS_Code,
        -- Casting frequently to VARCHAR as it tends to convert it to INT otherwise
        CASE
            WHEN CAST(HS_Code AS VARCHAR) LIKE '901%' 
                THEN CONCAT('0', HS_Code)[0:6]
            WHEN CAST(HS_Code AS VARCHAR) LIKE '2101%'
                THEN CAST(HS_Code AS VARCHAR)[0:6]::VARCHAR
            ELSE NULL
        END AS HS6,
        Product_Description,
        Quantity,
        Unit,
        CASE WHEN Unit = 'MTS' THEN Quantity * 1000
             WHEN Unit = 'LBS' THEN Quantity * 0.453592
             WHEN Unit = 'KGS' THEN Quantity
             ELSE NULL
        END AS Quantity_in_KGS,
        FOB_Value_INR,
        Unit_Price_INR,
        FOB_Value_USD,
        FOB_Value_Foreign_Currency,
        Unit_Price_Foreign_Currency,
        Currency_Name,
        FOB_Value_in_Lacs_INR,
        IEC,
        -- Trim for spaces, extra double quotes and commas, and uppercase
        UPPER(TRIM(Indian_Exporter_Name, '", ')) AS Indian_Exporter_Name,
        Exporter_Address,
        Exporter_City,
        Pin_Code,
        cha_code,
        cha_name,
        Foreign_Importer_Name,
        Importer_Address,
        FOREIGN_PORT,
        TRIM(FOREIGN_COUNTRY, '" ') AS FOREIGN_COUNTRY,
        Indian_Port,
        Chapter,
        hs_4_Digit,
        S_N1
    FROM
        {{ source('source_india_coffee_trade', 'original_india_coffee_exports_2020') }}
    WHERE
        (HS6 LIKE '0901%' OR HS6 LIKE '2101%')
        AND Unit IN ('KGS', 'MTS', 'LBS')
        AND FOB_Value_USD != 0
),
-- Expand synonyms for each country
expanded_countries AS (
    SELECT
        country_name,
        country_trase_id,
        UNNEST(synonyms) AS synonym
    FROM {{ ref('postgres_countries') }}
),
-- Join expanded countries and adjust some country names not in the official list
with_official_country_names AS (
    SELECT
        ie.*,
        COALESCE(
            ec.country_name,
            CASE 
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,REPUBLIC OF' THEN 'SOUTH KOREA'
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,DEMOCRATIC PEOPLE''S REPUBLIC OF' THEN 'NORTH KOREA'
                WHEN ie.FOREIGN_COUNTRY = 'MACEDONIA,THE FORMER YUGOSLAV REPUBLIC OF' THEN 'MACEDONIA'
                WHEN ie.FOREIGN_COUNTRY = 'LIBYAN ARAB REPUBLIC' THEN 'LIBYA'
                WHEN ie.FOREIGN_COUNTRY = 'AZARBAIJAN' THEN 'AZERBAIJAN'
                WHEN ie.FOREIGN_COUNTRY = 'CENTRAL AFRICAN REPULIC' THEN 'CENTRAL AFRICAN REPUBLIC'
                WHEN ie.FOREIGN_COUNTRY = 'KAZAKISTAN' THEN 'KAZAKHSTAN'
                WHEN ie.FOREIGN_COUNTRY = 'MOLDOVA,REPUBLIC OF' THEN 'MOLDOVA'
                WHEN ie.FOREIGN_COUNTRY = 'SLOVAK REPUBLIC' THEN 'SLOVAKIA'
                WHEN ie.FOREIGN_COUNTRY = 'TRINIDAD & TOBAGO' THEN 'TRINIDAD AND TOBAGO'
                WHEN ie.FOREIGN_COUNTRY = 'VIETNAM, DEMOCRATIC REP. OF' THEN 'VIETNAM'
                ELSE NULL
            END
        ) AS destination_country_name,
        COALESCE(
            ec.country_trase_id,
            CASE 
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,REPUBLIC OF' THEN 'KR'
                WHEN ie.FOREIGN_COUNTRY = 'KOREA,DEMOCRATIC PEOPLE''S REPUBLIC OF' THEN 'KP'
                WHEN ie.FOREIGN_COUNTRY = 'MACEDONIA,THE FORMER YUGOSLAV REPUBLIC OF' THEN 'MK'
                WHEN ie.FOREIGN_COUNTRY = 'LIBYAN ARAB REPUBLIC' THEN 'LY'
                WHEN ie.FOREIGN_COUNTRY = 'AZARBAIJAN' THEN 'AZ'
                WHEN ie.FOREIGN_COUNTRY = 'CENTRAL AFRICAN REPULIC' THEN ' CF'
                WHEN ie.FOREIGN_COUNTRY = 'KAZAKISTAN' THEN 'KZ'
                WHEN ie.FOREIGN_COUNTRY = 'MOLDOVA,REPUBLIC OF' THEN 'MD'
                WHEN ie.FOREIGN_COUNTRY = 'SLOVAK REPUBLIC' THEN 'SK'
                WHEN ie.FOREIGN_COUNTRY = 'TRINIDAD & TOBAGO' THEN 'TT'
                WHEN ie.FOREIGN_COUNTRY = 'VIETNAM, DEMOCRATIC REP. OF' THEN 'VN'
                ELSE NULL
            END
        ) AS destination_country_trase_id
    FROM
        india_exports_2020 ie
    LEFT JOIN expanded_countries ec
    ON ie.FOREIGN_COUNTRY = ec.synonym
),

-- Add exporter group names

------ COPYING THIS LOGIC FROM gold_colombia_cd_coffee_2020.sql ------
-- Eventually it could be a Python UDF

-- Take exporter labels
exporter_labels AS (
    SELECT
        DISTINCT Indian_Exporter_Name AS exporter_label
    FROM with_official_country_names
),

-- Take the info from the traders that have a matching label
filtered_traders_by_label AS (
    SELECT 
        name, 
        trader_node_id,
        trase_id,
        labels,
        UNNEST(groups) AS company_group
    FROM
        {{ ref('postgres_traders') }}
    WHERE EXISTS (
        SELECT 1 FROM exporter_labels
        WHERE list_contains(labels, exporter_labels.exporter_label)
    )
),
-- Flatten the group info from the matching traders, and rank them by relevance
with_traders_group_info AS (
    SELECT 
        name, 
        trader_node_id,
        trase_id,
        json_extract_string(company_group, '$.group')::VARCHAR AS group_name,
        json_extract_string(company_group, '$.time_start')::TIMESTAMP AS time_start,
        json_extract_string(company_group, '$.time_end')::TIMESTAMP AS time_end,
        CASE
            WHEN trase_id LIKE ('{{trader_prefix}}%')
                AND time_start IS NULL AND time_end IS NULL THEN 1
            WHEN trase_id LIKE ('{{trader_prefix}}%') 
                AND YEAR(time_start) <= {{year}} AND YEAR(time_end) >= {{year}} THEN 2
            WHEN trase_id NOT LIKE ('{{trader_prefix}}%')
                AND time_start IS NULL AND time_end IS NULL THEN 3
            WHEN trase_id NOT LIKE ('{{trader_prefix}}%') 
                AND YEAR(time_start) <= {{year}} AND YEAR(time_end) >= {{year}} THEN 4
            WHEN trase_id IS NULL THEN 5
            ELSE NULL
        END AS ranking,
        labels
    FROM filtered_traders_by_label
),
-- Join the export data with the trader info, taking only the top matching ranking when there are multiple matches
-- DISTINCT ON selects the first record per name
filtered_ranked_traders AS (
    SELECT DISTINCT ON (name)
        name, 
        trader_node_id AS trader_node_id_from_label_match,
        trase_id AS trase_id_from_label_match, 
        labels,
        group_name AS group_name_from_label_match,
        time_start,
        time_end,
        ranking
    FROM with_traders_group_info
    ORDER BY name, ranking ASC, trase_id DESC
),
------ END OF COPIED SECTION ------

with_db_trader_info AS (
    SELECT
        with_ocn.*,
        frt.*
    FROM with_official_country_names AS with_ocn
    LEFT JOIN filtered_ranked_traders AS frt
        ON list_contains(frt.labels, with_ocn.Indian_Exporter_Name)
),

-- Final fields
final_fields AS (
    SELECT
        Year::INT AS year,
        Indian_Exporter_Name AS exporter_label,
        trader_node_id_from_label_match AS exporter_node_id,
        group_name_from_label_match AS exporter_group_name,
        destination_country_name AS country_of_destination,
        Quantity_in_KGS/1000 AS mass_tonnes,
        FOB_Value_USD AS fob,
        Foreign_Importer_Name AS importer_label,
        Indian_Port AS port_of_export_label,
        HS6 AS hs6
    FROM
        with_db_trader_info
)
-- If true (default), replace EU countries with the economic bloc name
-- using 'as_bool' to render the value as a boolean
{% if replace_eu_countries_with_bloc | as_bool %}
,
countries_with_economic_blocs AS (
    SELECT
        country_name,
        UNNEST(economic_bloc) AS economic_bloc
    FROM {{ ref('postgres_countries') }}
),
eu_countries AS (
    SELECT DISTINCT
        country_name AS eu_country,
        'EUROPEAN UNION' AS european_union_country
    FROM countries_with_economic_blocs
    WHERE
        json_extract_string(economic_bloc, '$.economic_bloc') = 'EUROPEAN UNION'
    AND
        country_name != 'UNITED KINGDOM'
),
final_with_eu AS (
    SELECT
        ff.year,
        ff.exporter_label,
        ff.exporter_node_id,
        ff.exporter_group_name,
        COALESCE(eu.european_union_country, ff.country_of_destination) AS country_of_destination,
        ff.mass_tonnes,
        ff.fob,
        ff.importer_label,
        ff.port_of_export_label,
        ff.hs6
    FROM final_fields ff
    LEFT JOIN eu_countries eu
        ON ff.country_of_destination = eu.eu_country
)

SELECT * FROM final_with_eu

{% else %}
-- If not, keep the normal country names

SELECT * FROM final_fields

{% endif %}