DBT: Gold India Trade Coffee 2020
File location: s3://trase-storage/india/trade/bol/coffee/2020/gold/gold_india_trade_coffee_2020.parquet
DBT model name: gold_india_trade_coffee_2020
Explore on Metabase: Full table; summary statistics
DBT details
- Lineage
-
Dbt path:
trase_production.main_india_coffee.gold_india_trade_coffee_2020 -
Containing yaml link: trase/data_pipeline/models/india/trade/bol/coffee/_schema_india_coffee.yml
-
Model file: trase/data_pipeline/models/india/trade/bol/coffee/gold_india_trade_coffee_2020.sql
-
Tags:
gold,india,coffee,trade,2020,diet-trase-coffee
Description
....
Details
| Column | Type | Description |
|---|---|---|
year |
INTEGER |
Comes from the Year field. |
exporter_label |
VARCHAR |
Comes from the Indian_Exporter_Name field. Default definition: Name (label) of the trader, which should appear as one of the synonyms of a trader in the database. It doesn't necessarily have to be the default name however. See Traders-User-Guide.md: 1.2 Synonyms. |
exporter_node_id |
BIGINT |
|
exporter_group_name |
VARCHAR |
Official name of the trader group, as it exists in the Trase database. See Traders-Developer-Guide.md: 1. Traders data model. |
country_of_destination |
VARCHAR |
Comes from the FOREIGN_COUNTRY field. Default definition: Official name of the country, as it exists in the Trase database. |
mass_tonnes |
DOUBLE |
Comes from the Quantity field, and is converted to tonnes from other units. |
fob |
DOUBLE |
Comes from FOB_Value_USD field. Default definition: FOB (Free On Board) in USD. Values are reported in customs data as a measure of the financial value of the trade. |
importer_label |
VARCHAR |
Comes from the Foreign_Importer_Name field. Hasn't been checked and cleaned against Trase's database. Default definition: Name (label) of the trader, which should appear as one of the synonyms of a trader in the database. It doesn't necessarily have to be the default name however. See Traders-User-Guide.md: 1.2 Synonyms. |
port_of_export_label |
VARCHAR |
Comes from the FOREIGN_PORT field. Hasn't been checked and cleaned against Trase's database. |
hs6 |
VARCHAR |
Default definition: First 6 characters of the Harmonized System code: an internationally-agreed set of codes used to describe products in trade data. |
Models / Seeds
source.trase_duckdb.source_india_coffee_trade.original_india_coffee_exports_2020model.trase_duckdb.postgres_countries-
model.trase_duckdb.postgres_tradersMacros
macro.trase_duckdb.excel_extension
Sources
['source_india_coffee_trade', 'original_india_coffee_exports_2020']
No called script or script source not found.
-- gold_india_trade_coffee_2020
{{
config(
pre_hook='{{ excel_extension() }}',
materialized='external',
)
}}
{% set replace_eu_countries_with_bloc = False %}
{% set trader_prefix = "IN-TRADER-" %}
{% set year = 2020 %}
WITH india_exports_2020 AS (
SELECT
Date,
Year,
EXTRACT(MONTH FROM Date) AS Month,
EXTRACT(DAY FROM Date) AS Day,
HS_Code::VARCHAR AS HS_Code,
-- Casting frequently to VARCHAR as it tends to convert it to INT otherwise
CASE
WHEN CAST(HS_Code AS VARCHAR) LIKE '901%'
THEN CONCAT('0', HS_Code)[0:6]
WHEN CAST(HS_Code AS VARCHAR) LIKE '2101%'
THEN CAST(HS_Code AS VARCHAR)[0:6]::VARCHAR
ELSE NULL
END AS HS6,
Product_Description,
Quantity,
Unit,
CASE WHEN Unit = 'MTS' THEN Quantity * 1000
WHEN Unit = 'LBS' THEN Quantity * 0.453592
WHEN Unit = 'KGS' THEN Quantity
ELSE NULL
END AS Quantity_in_KGS,
FOB_Value_INR,
Unit_Price_INR,
FOB_Value_USD,
FOB_Value_Foreign_Currency,
Unit_Price_Foreign_Currency,
Currency_Name,
FOB_Value_in_Lacs_INR,
IEC,
-- Trim for spaces, extra double quotes and commas, and uppercase
UPPER(TRIM(Indian_Exporter_Name, '", ')) AS Indian_Exporter_Name,
Exporter_Address,
Exporter_City,
Pin_Code,
cha_code,
cha_name,
Foreign_Importer_Name,
Importer_Address,
FOREIGN_PORT,
TRIM(FOREIGN_COUNTRY, '" ') AS FOREIGN_COUNTRY,
Indian_Port,
Chapter,
hs_4_Digit,
S_N1
FROM
{{ source('source_india_coffee_trade', 'original_india_coffee_exports_2020') }}
WHERE
(HS6 LIKE '0901%' OR HS6 LIKE '2101%')
AND Unit IN ('KGS', 'MTS', 'LBS')
AND FOB_Value_USD != 0
),
-- Expand synonyms for each country
expanded_countries AS (
SELECT
country_name,
country_trase_id,
UNNEST(synonyms) AS synonym
FROM {{ ref('postgres_countries') }}
),
-- Join expanded countries and adjust some country names not in the official list
with_official_country_names AS (
SELECT
ie.*,
COALESCE(
ec.country_name,
CASE
WHEN ie.FOREIGN_COUNTRY = 'KOREA,REPUBLIC OF' THEN 'SOUTH KOREA'
WHEN ie.FOREIGN_COUNTRY = 'KOREA,DEMOCRATIC PEOPLE''S REPUBLIC OF' THEN 'NORTH KOREA'
WHEN ie.FOREIGN_COUNTRY = 'MACEDONIA,THE FORMER YUGOSLAV REPUBLIC OF' THEN 'MACEDONIA'
WHEN ie.FOREIGN_COUNTRY = 'LIBYAN ARAB REPUBLIC' THEN 'LIBYA'
WHEN ie.FOREIGN_COUNTRY = 'AZARBAIJAN' THEN 'AZERBAIJAN'
WHEN ie.FOREIGN_COUNTRY = 'CENTRAL AFRICAN REPULIC' THEN 'CENTRAL AFRICAN REPUBLIC'
WHEN ie.FOREIGN_COUNTRY = 'KAZAKISTAN' THEN 'KAZAKHSTAN'
WHEN ie.FOREIGN_COUNTRY = 'MOLDOVA,REPUBLIC OF' THEN 'MOLDOVA'
WHEN ie.FOREIGN_COUNTRY = 'SLOVAK REPUBLIC' THEN 'SLOVAKIA'
WHEN ie.FOREIGN_COUNTRY = 'TRINIDAD & TOBAGO' THEN 'TRINIDAD AND TOBAGO'
WHEN ie.FOREIGN_COUNTRY = 'VIETNAM, DEMOCRATIC REP. OF' THEN 'VIETNAM'
ELSE NULL
END
) AS destination_country_name,
COALESCE(
ec.country_trase_id,
CASE
WHEN ie.FOREIGN_COUNTRY = 'KOREA,REPUBLIC OF' THEN 'KR'
WHEN ie.FOREIGN_COUNTRY = 'KOREA,DEMOCRATIC PEOPLE''S REPUBLIC OF' THEN 'KP'
WHEN ie.FOREIGN_COUNTRY = 'MACEDONIA,THE FORMER YUGOSLAV REPUBLIC OF' THEN 'MK'
WHEN ie.FOREIGN_COUNTRY = 'LIBYAN ARAB REPUBLIC' THEN 'LY'
WHEN ie.FOREIGN_COUNTRY = 'AZARBAIJAN' THEN 'AZ'
WHEN ie.FOREIGN_COUNTRY = 'CENTRAL AFRICAN REPULIC' THEN ' CF'
WHEN ie.FOREIGN_COUNTRY = 'KAZAKISTAN' THEN 'KZ'
WHEN ie.FOREIGN_COUNTRY = 'MOLDOVA,REPUBLIC OF' THEN 'MD'
WHEN ie.FOREIGN_COUNTRY = 'SLOVAK REPUBLIC' THEN 'SK'
WHEN ie.FOREIGN_COUNTRY = 'TRINIDAD & TOBAGO' THEN 'TT'
WHEN ie.FOREIGN_COUNTRY = 'VIETNAM, DEMOCRATIC REP. OF' THEN 'VN'
ELSE NULL
END
) AS destination_country_trase_id
FROM
india_exports_2020 ie
LEFT JOIN expanded_countries ec
ON ie.FOREIGN_COUNTRY = ec.synonym
),
-- Add exporter group names
------ COPYING THIS LOGIC FROM gold_colombia_cd_coffee_2020.sql ------
-- Eventually it could be a Python UDF
-- Take exporter labels
exporter_labels AS (
SELECT
DISTINCT Indian_Exporter_Name AS exporter_label
FROM with_official_country_names
),
-- Take the info from the traders that have a matching label
filtered_traders_by_label AS (
SELECT
name,
trader_node_id,
trase_id,
labels,
UNNEST(groups) AS company_group
FROM
{{ ref('postgres_traders') }}
WHERE EXISTS (
SELECT 1 FROM exporter_labels
WHERE list_contains(labels, exporter_labels.exporter_label)
)
),
-- Flatten the group info from the matching traders, and rank them by relevance
with_traders_group_info AS (
SELECT
name,
trader_node_id,
trase_id,
json_extract_string(company_group, '$.group')::VARCHAR AS group_name,
json_extract_string(company_group, '$.time_start')::TIMESTAMP AS time_start,
json_extract_string(company_group, '$.time_end')::TIMESTAMP AS time_end,
CASE
WHEN trase_id LIKE ('{{trader_prefix}}%')
AND time_start IS NULL AND time_end IS NULL THEN 1
WHEN trase_id LIKE ('{{trader_prefix}}%')
AND YEAR(time_start) <= {{year}} AND YEAR(time_end) >= {{year}} THEN 2
WHEN trase_id NOT LIKE ('{{trader_prefix}}%')
AND time_start IS NULL AND time_end IS NULL THEN 3
WHEN trase_id NOT LIKE ('{{trader_prefix}}%')
AND YEAR(time_start) <= {{year}} AND YEAR(time_end) >= {{year}} THEN 4
WHEN trase_id IS NULL THEN 5
ELSE NULL
END AS ranking,
labels
FROM filtered_traders_by_label
),
-- Join the export data with the trader info, taking only the top matching ranking when there are multiple matches
-- DISTINCT ON selects the first record per name
filtered_ranked_traders AS (
SELECT DISTINCT ON (name)
name,
trader_node_id AS trader_node_id_from_label_match,
trase_id AS trase_id_from_label_match,
labels,
group_name AS group_name_from_label_match,
time_start,
time_end,
ranking
FROM with_traders_group_info
ORDER BY name, ranking ASC, trase_id DESC
),
------ END OF COPIED SECTION ------
with_db_trader_info AS (
SELECT
with_ocn.*,
frt.*
FROM with_official_country_names AS with_ocn
LEFT JOIN filtered_ranked_traders AS frt
ON list_contains(frt.labels, with_ocn.Indian_Exporter_Name)
),
-- Final fields
final_fields AS (
SELECT
Year::INT AS year,
Indian_Exporter_Name AS exporter_label,
trader_node_id_from_label_match AS exporter_node_id,
group_name_from_label_match AS exporter_group_name,
destination_country_name AS country_of_destination,
Quantity_in_KGS/1000 AS mass_tonnes,
FOB_Value_USD AS fob,
Foreign_Importer_Name AS importer_label,
Indian_Port AS port_of_export_label,
HS6 AS hs6
FROM
with_db_trader_info
)
-- If true (default), replace EU countries with the economic bloc name
-- using 'as_bool' to render the value as a boolean
{% if replace_eu_countries_with_bloc | as_bool %}
,
countries_with_economic_blocs AS (
SELECT
country_name,
UNNEST(economic_bloc) AS economic_bloc
FROM {{ ref('postgres_countries') }}
),
eu_countries AS (
SELECT DISTINCT
country_name AS eu_country,
'EUROPEAN UNION' AS european_union_country
FROM countries_with_economic_blocs
WHERE
json_extract_string(economic_bloc, '$.economic_bloc') = 'EUROPEAN UNION'
AND
country_name != 'UNITED KINGDOM'
),
final_with_eu AS (
SELECT
ff.year,
ff.exporter_label,
ff.exporter_node_id,
ff.exporter_group_name,
COALESCE(eu.european_union_country, ff.country_of_destination) AS country_of_destination,
ff.mass_tonnes,
ff.fob,
ff.importer_label,
ff.port_of_export_label,
ff.hs6
FROM final_fields ff
LEFT JOIN eu_countries eu
ON ff.country_of_destination = eu.eu_country
)
SELECT * FROM final_with_eu
{% else %}
-- If not, keep the normal country names
SELECT * FROM final_fields
{% endif %}