Skip to content

Qa tanzania coffee 2020

View or edit on GitHub

This page is synchronized from trase/data/tanzania/trade/y2020/qa_tanzania_coffee_2020.ipynb. Last modified on 2025-12-13 00:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

Author: Nicolás Martín (nicolas.martin@sei.org)

Date: 2024-06-26

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")

PATHS = {
    "input_silver_tanzania_coffee": "s3://trase-storage/tanzania/trade/2020/silver/silver_tanzania_coffee_2020.parquet",
    "input_silver_comtrade_tanzania_coffee": "s3://trase-storage/tanzania/trade/2020/silver/silver_comtrade_tanzania_coffee_2020.parquet",
    "input_gold_tanzania_coffee": "s3://trase-storage/tanzania/trade/2020/gold/gold_tanzania_coffee_2020.parquet"
}

export_data = pd.read_parquet(PATHS["input_silver_tanzania_coffee"])
comtrade_data = pd.read_parquet(PATHS["input_silver_comtrade_tanzania_coffee"])
export_data_gold = pd.read_parquet(PATHS["input_gold_tanzania_coffee"])

def modified_z_score(series):
    """
    The modified z-score is useful in identifying outliers in a dataset. 
    It relies in the median absolute deviation (MAD) instead of the standard deviation, making it less
    sensitive to outliers.
    """
    median = series.median()
    mad = (series - median).abs().median()
    modified_z = 0.6745 * (series - median) / mad
    return modified_z

Data description

  • The first dataframe sample_from_original_df shows the fields that are being loaded from the original excel file (they are loaded based on the EXPORT_COLUMN_MAPPING dictionary.
  • The second dataframe sample_from_additional_fields_df shows fields generated afterwards (which didn't exist in the original excel file)
EXPORT_COLUMN_MAPPING = {
    "Date": "DATE",
    "Month": "MONTH",
    "Heading": "HEADING",
    "Sub_Heading": "HS_CODE",
    "Destination_Country": "COUNTRY_DESTINATION",
    "Gross_Weight": "GROSS_WEIGHT",
    "Net_Weight": "NET_WEIGHT",
    "Item_Statistical_Value_USD": "FOB_VALUE_IN_USD",
    "Item_Customs_Value_TZS": "FOB_VALUE_IN_TZS",
    "Unit": "UNIT",
    "Quantity": "QUANTITY",
    "HS_Code_Description": "HS_CODE_DESCRIPTION",
    "Product_Description": "COMMERCIAL_DESCRIPTION",
    "Customs_Office_Code": "OFFICE_CODE",
    "Exporter_Code": "EXPORTER_CODE",
    "Exporter": "EXPORTER_NAME",
    "Declarant_Code": "DECLARANT_CODE",
    "Declarant_Name": "DECLARANT_NAME",
    "Buyer": "BUYER_NAME",
    "Port_Name": "PORT_NAME",
    "HS_Code": "HS_CODE_ORIGINAL",
    "Chapter": "CHAPTER",
    "TANSAD_SERIAL_NO": "TANSAD_SERIAL_NUMBER",
    "TANSAD_NO": "TANSAD_NUMBER",
    "S_N": "SERIAL_NUMBER",
    "Processing_Status_Name": "PROCESSING_STATUS_NAME",
    "Origin_county": "ORIGIN_COUNTRY",
    "Consignment_Country": "CONSIGNMENT_COUNTRY",
    "Trading_country": "TRADING_COUNTRY",
    "Bill_No": "BILL_NUMBER",
    "Container_YN": "CONTAINER_YN",
    "Total_Container_No": "TOTAL_CONTAINER_NO",
    "Transport_Means_ID": "TRANSPORT_MEANS_ID",
    "Transport_Nationality_Code": "TRANSPORT_NATIONALITY_CODE",
    "Arrival_Day": "ARRIVAL_DAY",
    "Discharge_Day": "DISCHARGE_DAY",
    "Entry_Office_Code": "ENTRY_OFFICE_CODE",
    "Transport_Mode_Code": "TRANSPORT_MODE_CODE",
    "Exchange_Rate": "EXCHANGE_RATE",
    "Item_Invoice_Amount_in_FC": "ITEM_INVOICE_AMOUNT_IN_FC",
    "Invoice_Currency": "INVOICE_CURRENCY",
    "Declared_Customs_Value_TZS": "DECLARED_CUSTOMS_VALUE_TZS",
    "Assessed_Customs_Value_TZS": "ASSESSED_CUSTOMS_VALUE_TZS",
}
data_mapped = []
data_unmapped = []
unmapped_fields = [field for field in export_data_gold.columns if field not in EXPORT_COLUMN_MAPPING.values()]
for origin_field, destination_field in EXPORT_COLUMN_MAPPING.items():
        field_type = export_data_gold[destination_field].dtype
        # Get 3 sample values, convert to string to handle any data type gracefully
        sample_values = export_data_gold[destination_field].sample(3, random_state=1).astype(str).tolist()
        data_mapped.append([origin_field, destination_field, field_type, ', '.join(sample_values)])
sample_from_original_df = pd.DataFrame(data_mapped, columns=['Origin Field', 'Destination Field', 'Field Type', '3 Sample Values'])

# Iterate through the unmapped fields
for field in unmapped_fields:
    field_type = str(export_data_gold[field].dtype)
    sample_values = export_data_gold[field].sample(3, random_state=1).astype(str).tolist()
    data_unmapped.append([field, field_type, ', '.join(sample_values)])
sample_from_additional_fields_df = pd.DataFrame(data_unmapped, columns=['Additional Field Name', 'Field Type', '3 Sample Values'])
sample_from_original_df
Origin Field Destination Field Field Type 3 Sample Values
0 Date DATE datetime64[ns] 2020-11-01, 2020-11-01, 2020-02-05
1 Month MONTH Int64 11, 11, 2
2 Heading HEADING Int64 901, 901, 901
3 Sub_Heading HS_CODE object 090111, 090111, 090111
4 Destination_Country COUNTRY_DESTINATION object GERMANY, JAPAN, ISRAEL
5 Gross_Weight GROSS_WEIGHT Float64 12264.0, 39104.0, 38400.0
6 Net_Weight NET_WEIGHT Float64 12264.0, 38400.0, 38400.0
7 Item_Statistical_Value_USD FOB_VALUE_IN_USD Float64 39965.92, 106659.84, 105216.0
8 Item_Customs_Value_TZS FOB_VALUE_IN_TZS Float64 91372380.35, 243850792.55, 239687256.19
9 Unit UNIT string KGM, KGM, KGM
10 Quantity QUANTITY Float64 12264.0, 38400.0, 38400.0
11 HS_Code_Description HS_CODE_DESCRIPTION string Not decaffeinated, Not decaffeinated, Not deca...
12 Product_Description COMMERCIAL_DESCRIPTION string 6 BULK BAGS OF ARABICA COFFEE AA/AB RA CERTIFI...
13 Customs_Office_Code OFFICE_CODE string TZDL, TZDL, TZDL
14 Exporter_Code EXPORTER_CODE Int64 140277002, 110669011, 100127776
15 Exporter EXPORTER_NAME object UMOJA ULULU AGRICULTURAL AND MARKETING COOPERA...
16 Declarant_Code DECLARANT_CODE Int64 134787716, 100146487, 100324148
17 Declarant_Name DECLARANT_NAME string WOTTA LOGISTIC COMPANY LIMITED, UNITED YOUTH S...
18 Buyer BUYER_NAME string TCHIBO GMBH,UEBERSEERING 18,, MC AGRI ALLIANCE...
19 Port_Name PORT_NAME string DAR CUSTOMS SERVICE CENTRE, DAR CUSTOMS SERVIC...
20 HS_Code HS_CODE_ORIGINAL Int64 90111000000, 90111000000, 90111000000
21 Chapter CHAPTER Int64 9, 9, 9
22 TANSAD_SERIAL_NO TANSAD_SERIAL_NUMBER Int64 1385847, 1389073, 1046254
23 TANSAD_NO TANSAD_NUMBER string TZDL-20-1385847, TZDL-20-1389073, TZDL-20-1046254
24 S_N SERIAL_NUMBER Int64 216040, 216845, 206999
25 Processing_Status_Name PROCESSING_STATUS_NAME string Released, Released, Exited
26 Origin_county ORIGIN_COUNTRY string United Republic of Tanzania, United Republic o...
27 Consignment_Country CONSIGNMENT_COUNTRY string United Republic of Tanzania, Japan, United Rep...
28 Trading_country TRADING_COUNTRY string United Republic of Tanzania, Japan, United Rep...
29 Bill_No BILL_NUMBER object 574990, 206137777, nan
30 Container_YN CONTAINER_YN string N, Y, Y
31 Total_Container_No TOTAL_CONTAINER_NO Int64 0, 2, 2
32 Transport_Means_ID TRANSPORT_MEANS_ID string <NA>, <NA>, <NA>
33 Transport_Nationality_Code TRANSPORT_NATIONALITY_CODE string TZ, JP, TZ
34 Arrival_Day ARRIVAL_DAY Int64 <NA>, <NA>, <NA>
35 Discharge_Day DISCHARGE_DAY Int64 <NA>, <NA>, <NA>
36 Entry_Office_Code ENTRY_OFFICE_CODE string TZDW, TZDW, TZDW
37 Transport_Mode_Code TRANSPORT_MODE_CODE Int64 1, 1, 1
38 Exchange_Rate EXCHANGE_RATE Float64 2286.2574, 2286.2475, 2278.0495
39 Item_Invoice_Amount_in_FC ITEM_INVOICE_AMOUNT_IN_FC Float64 39965.92, 106659.84, 105216.0
40 Invoice_Currency INVOICE_CURRENCY string USD, USD, USD
41 Declared_Customs_Value_TZS DECLARED_CUSTOMS_VALUE_TZS Float64 91372380.348, 243850792.55, 239687256.192
42 Assessed_Customs_Value_TZS ASSESSED_CUSTOMS_VALUE_TZS Float64 91372380.348, 243850792.55, 239687256.192
sample_from_additional_fields_df
Additional Field Name Field Type 3 Sample Values
0 YEAR int64 2020, 2020, 2020
1 DAY int64 1, 1, 5
2 EXPORTER_NAME_ORIGINAL string UMOJA ULULU AGRICULTURAL AND MARKETING COOPERA...
3 EXPORTER_ID int64 15596538, 15596352, 15596349
4 EXPORTER_GROUP object UMOJA ULULU AGRICULTURAL AND MARKETING COOPERA...
5 trase_id object TZ-TRADER-140277002, TZ-TRADER-110669011, TZ-T...
6 tzs_per_usd Float64 2286.2574000548466, 2286.24749999625, 2278.049...
7 exchangerate_z_score Float64 4.998933574280233e-06, -0.6744982694724116, -1...
8 usd_per_kg Float64 3.2587997390737113, 2.7776, 2.74

Checking TZS - USD exchange rate outliers

  • 23 outliers where identified through a modified z-score using as reference the exchange rate values of a given month.
  • The value for those outliers was replaced by the mean of the exchange rate of that month, excluding those outliers from the calculation of the mean.
# Calculate the exchange rates in TZS per USD
export_data["tzs_per_usd"] = (
    export_data["FOB_VALUE_IN_TZS"] / export_data["FOB_VALUE_IN_USD"]
)

record_count = len(export_data)

# get the modified z score within each month
for month in export_data["MONTH"].unique():
    export_data.loc[
        export_data["MONTH"] == month, "exchangerate_z_score"
    ] = modified_z_score(export_data[export_data["MONTH"] == month]["tzs_per_usd"])

# Get the mean of the exchange rate for each month, excluding the outliers
# Defining outliers as z-scores greater than 5
exchange_rate_mean = {}
for month in export_data["MONTH"].unique():
    exchange_rate_mean[month] = export_data[
        (export_data["MONTH"] == month)
        & (export_data["exchangerate_z_score"].abs() <= 5)
    ]["tzs_per_usd"].mean()

# Create a new column for color-coding the data
export_data['Outlier'] = 'Normal'  # Creates a new column and sets all values to 'Normal'
export_data.loc[export_data['exchangerate_z_score'] > 5, 'Outlier'] = 'Outlier'  # Only modifies rows where the condition is True
outlier_count = len(export_data[export_data['exchangerate_z_score'] > 5])

plt.figure(figsize=(10, 6))
scatter_plot = sns.scatterplot(x='DATE', y='tzs_per_usd', hue='Outlier', 
                               palette={'Normal': 'blue', 'Outlier': 'red'}, 
                               data=export_data)
scatter_plot.set(xlabel='Date', ylabel='Exchange Rate (TZS per USD)')
plt.suptitle(f"Exchange Rate Outliers by Date. {outlier_count} outliers out of {record_count} records", y=0.95)
plt.title(f"Outliers are defined against the exchange rate values of a given month")
plt.legend(title='Record type')

plt.show()

export_data.drop('Outlier', axis=1, inplace=True)

png

Checking cost per kg outliers

  • There is 1 clear outlier
# Calculate the cost per net weight in USD, and do a boxplot
export_data["usd_per_kg"] = export_data["FOB_VALUE_IN_USD"] / export_data["NET_WEIGHT"]

# Do a boxplot of usd_per_kg
plt.figure(figsize=(10, 6))
sns.boxplot(x='HS_CODE', y='usd_per_kg', data=export_data)
plt.title("There is one outstanding outlier")
plt.show()

png

print("Outlier details:")
export_data[export_data['usd_per_kg'] > 50]
Outlier details:
DATE MONTH HEADING HS_CODE COUNTRY_DESTINATION GROSS_WEIGHT NET_WEIGHT FOB_VALUE_IN_USD FOB_VALUE_IN_TZS UNIT ... DECLARED_CUSTOMS_VALUE_TZS ASSESSED_CUSTOMS_VALUE_TZS YEAR DAY EXPORTER_NAME_ORIGINAL EXPORTER_ID EXPORTER_GROUP tzs_per_usd exchangerate_z_score usd_per_kg
1055 2020-10-01 10 901 090111 CHINA (MAINLAND) 3600.0 3600.0 670680.0 1533320554.1 KGM ... 1533320554.1 1533320554.1 2020 1 TAYLOR WINCH (TANZANIA) LTD 15596376 TAYLOR WINCH (TANZANIA) LTD 2286.2178 0.674503 186.3

1 rows × 51 columns

We replace the outlier by the mean of non-outliers going to China, and zoom again in the values, by HS_CODE

# Replacing this outlier by the mean of the non-outliers going to China
china_non_outliers_mean = export_data[
        (export_data["usd_per_kg"] < 50) & (export_data["COUNTRY_DESTINATION"] == "CHINA")
    ]["usd_per_kg"].mean()
export_data.loc[export_data["usd_per_kg"] > 50, "usd_per_kg"] = china_non_outliers_mean

# Show a boxplot of HS_CODE 090111, by month; and another one of 090190
hs_code_1_count = len(export_data[export_data["HS_CODE"] == "090111"])
plt.figure(figsize=(10, 6))
sns.boxplot(x='MONTH', y='usd_per_kg', data=export_data[export_data["HS_CODE"] == "090111"])
plt.title(f"HS_CODE 090111: {hs_code_1_count} records")
plt.show()

plt.figure(figsize=(10, 6))
hs_code_2_count = len(export_data[export_data["HS_CODE"] == "090190"])
sns.boxplot(x='MONTH', y='usd_per_kg', data=export_data[export_data["HS_CODE"] == "090190"])
plt.title(f"HS_CODE 090190: {hs_code_2_count} records")
plt.show()

png

png

For HS_CODE 090190 there doesn't seem to be much to do about the data.

For 090111 , will take a look at the records that are above 10 usd_per_kg (about double of the maximum in the boxplot)

outlier_review = export_data[(export_data["HS_CODE"] == "090111") & (export_data['usd_per_kg'] > 10)]
outlier_review
DATE MONTH HEADING HS_CODE COUNTRY_DESTINATION GROSS_WEIGHT NET_WEIGHT FOB_VALUE_IN_USD FOB_VALUE_IN_TZS UNIT ... DECLARED_CUSTOMS_VALUE_TZS ASSESSED_CUSTOMS_VALUE_TZS YEAR DAY EXPORTER_NAME_ORIGINAL EXPORTER_ID EXPORTER_GROUP tzs_per_usd exchangerate_z_score usd_per_kg
425 2020-03-21 3 901 090111 BELGIUM 194670.0 194400.0 2818800.0 6421003151.04 KGM ... 6421003151.04 6421003151.04 2020 21 DRK GENERAL MERCHANTS LTD 15596424 DRK GENERAL MERCHANTS LTD 2277.9208 -0.0 14.5
593 2020-04-23 4 901 090111 JAPAN 18330.0 1800.0 56880.0 129650918.26 KGM ... 129650918.256 129650918.256 2020 23 DORMAN (TANZANIA) LIMITED 15596349 DORMAN (TANZANIA) LIMITED 2279.3762 0.879783 31.6
838 2020-09-01 9 901 090111 GERMANY 122.0 122.0 2400.0 5486856.0 KGM ... 5486856.0 5486856.0 2020 1 NGILA ESTATE LIMITED 15596439 NGILA ESTATE LIMITED 2286.19 0.803803 19.672131
967 2020-10-01 10 901 090111 GERMANY 248.0 248.0 2640.0 6035562.72 KGM ... 6035562.72 6035562.72 2020 1 BLUE MOUNTAIN COFFEE FARMS LIMITED 15596517 BLUE MOUNTAIN COFFEE FARMS LIMITED 2286.198 0.0 10.645161
1357 2020-11-24 11 901 090111 MOZAMBIQUE 13464.0 13464.0 169803.48 388211100.59 KGM ... 388211100.59 388211100.59 2020 24 AFRI TEA AND COFFEE BLENDERS (1963) LIMITED 15596415 AFRI TEA AND COFFEE BLENDERS (1963) LIMITED 2286.2376 -1.348996 12.611667

5 rows × 51 columns

They seem to be special coffees, mainly exported through airport, so they might not be an issue. Will continue check with Comtrade.

Review against Comtrade

# Check the cost per kg in comtrade
comtrade_data["usd_per_kg"] = (
    comtrade_data["comtrade_usd"] / comtrade_data["comtrade_kg"]
)

# For export_data, group by 'HS_CODE' and sum the 'NET_WEIGHT' and 'FOB_VALUE_IN_USD'
export_data_aggregated = export_data.groupby("HS_CODE").agg(
    tot_kg=("NET_WEIGHT", "sum"), tot_usd=("FOB_VALUE_IN_USD", "sum")
)

hs_codes = export_data_aggregated.index.to_series()
# For comtrade_data, group by the 'HS_CODE's from export_data_aggregated and sum the 'comtrade_kg' and 'comtrade_usd'
comtrade_data_aggregated = (
    comtrade_data[comtrade_data["HS_CODE"].isin(hs_codes)]
    .groupby("HS_CODE")
    .agg(comtrade_kg=("comtrade_kg", "sum"), comtrade_usd=("comtrade_usd", "sum"))
)

# Join the aggregated comtrade data to the export data
comtrade_export_comparison = export_data_aggregated.join(comtrade_data_aggregated)
comtrade_export_comparison.reset_index(inplace=True)

# Add a 'ratio_kg' and 'ratio_us' to compare the total kg and usd between the two datasets
comtrade_export_comparison["ratio_kg"] = (
    comtrade_export_comparison["tot_kg"] / comtrade_export_comparison["comtrade_kg"]
)
comtrade_export_comparison["ratio_usd"] = (
    comtrade_export_comparison["tot_usd"] / comtrade_export_comparison["comtrade_usd"]
)

comtrade_export_comparison
HS_CODE tot_kg tot_usd comtrade_kg comtrade_usd ratio_kg ratio_usd
0 090111 60689267.0 138377667.72 64000237.8 145208327.54 0.948266 0.95296
1 090190 1665690.15 4268799.49 5343800.992 4518418.0 0.311705 0.944755

The 'ratio_kg' for '090190' seems off.. checking the data per country

export_data_per_country = export_data.groupby(["HS_CODE", "COUNTRY_DESTINATION"]).agg(
    tot_kg=("NET_WEIGHT", "sum"), tot_usd=("FOB_VALUE_IN_USD", "sum")
)

comtrade_data_per_country = (
    comtrade_data[comtrade_data["HS_CODE"].isin(hs_codes)]
    .groupby(["HS_CODE", "COUNTRY_DESTINATION"])
    .agg(comtrade_kg=("comtrade_kg", "sum"), comtrade_usd=("comtrade_usd", "sum"))
)

# Join the aggregated per country comtrade data to the export data
comtrade_export_comparison_per_country = export_data_per_country.join(
    comtrade_data_per_country
)
comtrade_export_comparison_per_country.reset_index(inplace=True)
# Add a 'ratio_kg' and 'ratio_us' to compare the total kg and usd between the two datasets
comtrade_export_comparison_per_country["ratio_kg"] = (
    comtrade_export_comparison_per_country["tot_kg"]
    / comtrade_export_comparison_per_country["comtrade_kg"]
)
comtrade_export_comparison_per_country["ratio_usd"] = (
    comtrade_export_comparison_per_country["tot_usd"]
    / comtrade_export_comparison_per_country["comtrade_usd"]
)

Ratios for HS_CODE 090111 seem ok

# Graph the top 10 countries with lower ratio_kg
# For HS_CODE '090111'
top_10_countries_low_ratio_kg = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090111"
    ]
    .sort_values(by="ratio_kg")
    .head(10)
)

top_10_countries_low_ratio_usd = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090111"
    ]
    .sort_values(by="ratio_usd")
    .head(10)
)
plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_kg, x="COUNTRY_DESTINATION", y="ratio_kg")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export weight for HS_CODE 090111")
plt.show()

plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_usd, x="COUNTRY_DESTINATION", y="ratio_usd")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export costs for HS_CODE 090111")
plt.show()

png

png

The values for Australia and Uganda for HS CODE 090190 seem off

# For HS_CODE '090190'
top_10_countries_low_ratio_kg2 = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090190"
    ]
    .sort_values(by="ratio_kg")
    .head(10)
)

# For HS_CODE '090111'
top_10_countries_low_ratio_usd2 = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090190"
    ]
    .sort_values(by="ratio_usd")
    .head(10)
)

plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_kg2, x="COUNTRY_DESTINATION", y="ratio_kg")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export weights for HS_CODE 090190")
plt.show()

plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_usd2, x="COUNTRY_DESTINATION", y="ratio_usd")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export costs for HS_CODE 090190")
plt.show()

png

png

When checking the records, we note: * the only record for '090190' from AUSTRALIA is a 1kg shipment with a cost of 5 USD vs. the corresponding 19,457 kg and 60,707 usd from Comtrade * for UGANDA, there are only 2 records for '090190' totalling 43,020kg and 18,939usd vs. 3,449,020kg and 88,172usd from Comtrade

Leaving these records (previously I removed them, but decided to keep them after talking with the team)

# export_data with COUNTRY_DESTINATION 'AUSTRALIA' or 'UGANDA', and with 'HS_CODE' '090190'
export_data_australia_uganda = export_data[
    ((export_data["COUNTRY_DESTINATION"] == "AUSTRALIA")
    | (export_data["COUNTRY_DESTINATION"] == "UGANDA"))
    & (export_data["HS_CODE"] == "090190")
]
export_data_australia_uganda
DATE MONTH HEADING HS_CODE COUNTRY_DESTINATION GROSS_WEIGHT NET_WEIGHT FOB_VALUE_IN_USD FOB_VALUE_IN_TZS UNIT ... DECLARED_CUSTOMS_VALUE_TZS ASSESSED_CUSTOMS_VALUE_TZS YEAR DAY EXPORTER_NAME_ORIGINAL EXPORTER_ID EXPORTER_GROUP tzs_per_usd exchangerate_z_score usd_per_kg
1353 2020-11-01 11 901 090190 UGANDA 16020.6 16020.6 7689.6 17580404.9 KGM ... 17580404.903 17580404.903 2020 1 NGARA FARMERS COOPERATIVE SOCIETY LTD 15596487 NGARA FARMERS COOPERATIVE SOCIETY LTD 2286.2574 -0.000026 0.479982
1355 2020-11-17 11 901 090190 UGANDA 27000.0 27000.0 11250.0 25720507.12 KGM ... 25720507.125 25720507.125 2020 17 KARAGWE DISTRICT CO-OPERATIVE UNION LIMITED 15596400 KARAGWE DISTRICT CO-OPERATIVE UNION LIMITED 2286.2673 0.67447 0.416667
1559 2020-12-28 12 901 090190 AUSTRALIA 1.0 1.0 5.0 11435.5 KGM ... 11435.495 11435.495 2020 28 NASRA OTHMAN 15596781 NASRA OTHMAN 2287.1 58.081759 5.0

3 rows × 51 columns

comtrade_data[
    ((comtrade_data['COUNTRY_DESTINATION'] == 'UGANDA') |
     (comtrade_data['COUNTRY_DESTINATION'] == 'AUSTRALIA')) &
    (comtrade_data['HS_CODE'] == '090190')]
COUNTRY_DESTINATION HS_CODE comtrade_kg comtrade_usd usd_per_kg
63 AUSTRALIA 090190 19457.0 60707.71 3.120096
82 UGANDA 090190 3449020.6 88172.98 0.025565

Note that additionally, 'JORDAN', 'NAMIBIA' and 'RWANDA' are in Comtrade but not in the export data

comtrade_countries = comtrade_data["COUNTRY_DESTINATION"].unique()
export_countries = export_data["COUNTRY_DESTINATION"].unique()
comtrade_countries_not_in_export = [
    country for country in comtrade_countries if country not in export_countries
]
print(f"\n Note that the following countries are in Comtrade but not in the export data: {comtrade_countries_not_in_export}")
 Note that the following countries are in Comtrade but not in the export data: ['JORDAN', 'NAMIBIA', 'RWANDA']