Qa tanzania coffee 2020

View or edit on GitHub

This page is synchronized from trase/data/tanzania/trade/y2020/qa_tanzania_coffee_2020.ipynb. Last modified on 2025-12-13 00:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

Author: Nicolás Martín (nicolas.martin@sei.org)

Date: 2024-06-26

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")

PATHS = {
    "input_silver_tanzania_coffee": "s3://trase-storage/tanzania/trade/2020/silver/silver_tanzania_coffee_2020.parquet",
    "input_silver_comtrade_tanzania_coffee": "s3://trase-storage/tanzania/trade/2020/silver/silver_comtrade_tanzania_coffee_2020.parquet",
    "input_gold_tanzania_coffee": "s3://trase-storage/tanzania/trade/2020/gold/gold_tanzania_coffee_2020.parquet"
}

export_data = pd.read_parquet(PATHS["input_silver_tanzania_coffee"])
comtrade_data = pd.read_parquet(PATHS["input_silver_comtrade_tanzania_coffee"])
export_data_gold = pd.read_parquet(PATHS["input_gold_tanzania_coffee"])

def modified_z_score(series):
    """
    The modified z-score is useful in identifying outliers in a dataset. 
    It relies in the median absolute deviation (MAD) instead of the standard deviation, making it less
    sensitive to outliers.
    """
    median = series.median()
    mad = (series - median).abs().median()
    modified_z = 0.6745 * (series - median) / mad
    return modified_z

Data description

The first dataframe sample_from_original_df shows the fields that are being loaded from the original excel file (they are loaded based on the EXPORT_COLUMN_MAPPING dictionary.
The second dataframe sample_from_additional_fields_df shows fields generated afterwards (which didn't exist in the original excel file)

EXPORT_COLUMN_MAPPING = {
    "Date": "DATE",
    "Month": "MONTH",
    "Heading": "HEADING",
    "Sub_Heading": "HS_CODE",
    "Destination_Country": "COUNTRY_DESTINATION",
    "Gross_Weight": "GROSS_WEIGHT",
    "Net_Weight": "NET_WEIGHT",
    "Item_Statistical_Value_USD": "FOB_VALUE_IN_USD",
    "Item_Customs_Value_TZS": "FOB_VALUE_IN_TZS",
    "Unit": "UNIT",
    "Quantity": "QUANTITY",
    "HS_Code_Description": "HS_CODE_DESCRIPTION",
    "Product_Description": "COMMERCIAL_DESCRIPTION",
    "Customs_Office_Code": "OFFICE_CODE",
    "Exporter_Code": "EXPORTER_CODE",
    "Exporter": "EXPORTER_NAME",
    "Declarant_Code": "DECLARANT_CODE",
    "Declarant_Name": "DECLARANT_NAME",
    "Buyer": "BUYER_NAME",
    "Port_Name": "PORT_NAME",
    "HS_Code": "HS_CODE_ORIGINAL",
    "Chapter": "CHAPTER",
    "TANSAD_SERIAL_NO": "TANSAD_SERIAL_NUMBER",
    "TANSAD_NO": "TANSAD_NUMBER",
    "S_N": "SERIAL_NUMBER",
    "Processing_Status_Name": "PROCESSING_STATUS_NAME",
    "Origin_county": "ORIGIN_COUNTRY",
    "Consignment_Country": "CONSIGNMENT_COUNTRY",
    "Trading_country": "TRADING_COUNTRY",
    "Bill_No": "BILL_NUMBER",
    "Container_YN": "CONTAINER_YN",
    "Total_Container_No": "TOTAL_CONTAINER_NO",
    "Transport_Means_ID": "TRANSPORT_MEANS_ID",
    "Transport_Nationality_Code": "TRANSPORT_NATIONALITY_CODE",
    "Arrival_Day": "ARRIVAL_DAY",
    "Discharge_Day": "DISCHARGE_DAY",
    "Entry_Office_Code": "ENTRY_OFFICE_CODE",
    "Transport_Mode_Code": "TRANSPORT_MODE_CODE",
    "Exchange_Rate": "EXCHANGE_RATE",
    "Item_Invoice_Amount_in_FC": "ITEM_INVOICE_AMOUNT_IN_FC",
    "Invoice_Currency": "INVOICE_CURRENCY",
    "Declared_Customs_Value_TZS": "DECLARED_CUSTOMS_VALUE_TZS",
    "Assessed_Customs_Value_TZS": "ASSESSED_CUSTOMS_VALUE_TZS",
}
data_mapped = []
data_unmapped = []
unmapped_fields = [field for field in export_data_gold.columns if field not in EXPORT_COLUMN_MAPPING.values()]
for origin_field, destination_field in EXPORT_COLUMN_MAPPING.items():
        field_type = export_data_gold[destination_field].dtype
        # Get 3 sample values, convert to string to handle any data type gracefully
        sample_values = export_data_gold[destination_field].sample(3, random_state=1).astype(str).tolist()
        data_mapped.append([origin_field, destination_field, field_type, ', '.join(sample_values)])
sample_from_original_df = pd.DataFrame(data_mapped, columns=['Origin Field', 'Destination Field', 'Field Type', '3 Sample Values'])

# Iterate through the unmapped fields
for field in unmapped_fields:
    field_type = str(export_data_gold[field].dtype)
    sample_values = export_data_gold[field].sample(3, random_state=1).astype(str).tolist()
    data_unmapped.append([field, field_type, ', '.join(sample_values)])
sample_from_additional_fields_df = pd.DataFrame(data_unmapped, columns=['Additional Field Name', 'Field Type', '3 Sample Values'])

sample_from_original_df

	Origin Field	Destination Field	Field Type	3 Sample Values
0	Date	DATE	datetime64[ns]	2020-11-01, 2020-11-01, 2020-02-05
1	Month	MONTH	Int64	11, 11, 2
2	Heading	HEADING	Int64	901, 901, 901
3	Sub_Heading	HS_CODE	object	090111, 090111, 090111
4	Destination_Country	COUNTRY_DESTINATION	object	GERMANY, JAPAN, ISRAEL
5	Gross_Weight	GROSS_WEIGHT	Float64	12264.0, 39104.0, 38400.0
6	Net_Weight	NET_WEIGHT	Float64	12264.0, 38400.0, 38400.0
7	Item_Statistical_Value_USD	FOB_VALUE_IN_USD	Float64	39965.92, 106659.84, 105216.0
8	Item_Customs_Value_TZS	FOB_VALUE_IN_TZS	Float64	91372380.35, 243850792.55, 239687256.19
9	Unit	UNIT	string	KGM, KGM, KGM
10	Quantity	QUANTITY	Float64	12264.0, 38400.0, 38400.0
11	HS_Code_Description	HS_CODE_DESCRIPTION	string	Not decaffeinated, Not decaffeinated, Not deca...
12	Product_Description	COMMERCIAL_DESCRIPTION	string	6 BULK BAGS OF ARABICA COFFEE AA/AB RA CERTIFI...
13	Customs_Office_Code	OFFICE_CODE	string	TZDL, TZDL, TZDL
14	Exporter_Code	EXPORTER_CODE	Int64	140277002, 110669011, 100127776
15	Exporter	EXPORTER_NAME	object	UMOJA ULULU AGRICULTURAL AND MARKETING COOPERA...
16	Declarant_Code	DECLARANT_CODE	Int64	134787716, 100146487, 100324148
17	Declarant_Name	DECLARANT_NAME	string	WOTTA LOGISTIC COMPANY LIMITED, UNITED YOUTH S...
18	Buyer	BUYER_NAME	string	TCHIBO GMBH,UEBERSEERING 18,, MC AGRI ALLIANCE...
19	Port_Name	PORT_NAME	string	DAR CUSTOMS SERVICE CENTRE, DAR CUSTOMS SERVIC...
20	HS_Code	HS_CODE_ORIGINAL	Int64	90111000000, 90111000000, 90111000000
21	Chapter	CHAPTER	Int64	9, 9, 9
22	TANSAD_SERIAL_NO	TANSAD_SERIAL_NUMBER	Int64	1385847, 1389073, 1046254
23	TANSAD_NO	TANSAD_NUMBER	string	TZDL-20-1385847, TZDL-20-1389073, TZDL-20-1046254
24	S_N	SERIAL_NUMBER	Int64	216040, 216845, 206999
25	Processing_Status_Name	PROCESSING_STATUS_NAME	string	Released, Released, Exited
26	Origin_county	ORIGIN_COUNTRY	string	United Republic of Tanzania, United Republic o...
27	Consignment_Country	CONSIGNMENT_COUNTRY	string	United Republic of Tanzania, Japan, United Rep...
28	Trading_country	TRADING_COUNTRY	string	United Republic of Tanzania, Japan, United Rep...
29	Bill_No	BILL_NUMBER	object	574990, 206137777, nan
30	Container_YN	CONTAINER_YN	string	N, Y, Y
31	Total_Container_No	TOTAL_CONTAINER_NO	Int64	0, 2, 2
32	Transport_Means_ID	TRANSPORT_MEANS_ID	string	<NA>, <NA>, <NA>
33	Transport_Nationality_Code	TRANSPORT_NATIONALITY_CODE	string	TZ, JP, TZ
34	Arrival_Day	ARRIVAL_DAY	Int64	<NA>, <NA>, <NA>
35	Discharge_Day	DISCHARGE_DAY	Int64	<NA>, <NA>, <NA>
36	Entry_Office_Code	ENTRY_OFFICE_CODE	string	TZDW, TZDW, TZDW
37	Transport_Mode_Code	TRANSPORT_MODE_CODE	Int64	1, 1, 1
38	Exchange_Rate	EXCHANGE_RATE	Float64	2286.2574, 2286.2475, 2278.0495
39	Item_Invoice_Amount_in_FC	ITEM_INVOICE_AMOUNT_IN_FC	Float64	39965.92, 106659.84, 105216.0
40	Invoice_Currency	INVOICE_CURRENCY	string	USD, USD, USD
41	Declared_Customs_Value_TZS	DECLARED_CUSTOMS_VALUE_TZS	Float64	91372380.348, 243850792.55, 239687256.192
42	Assessed_Customs_Value_TZS	ASSESSED_CUSTOMS_VALUE_TZS	Float64	91372380.348, 243850792.55, 239687256.192

sample_from_additional_fields_df

	Additional Field Name	Field Type	3 Sample Values
0	YEAR	int64	2020, 2020, 2020
1	DAY	int64	1, 1, 5
2	EXPORTER_NAME_ORIGINAL	string	UMOJA ULULU AGRICULTURAL AND MARKETING COOPERA...
3	EXPORTER_ID	int64	15596538, 15596352, 15596349
4	EXPORTER_GROUP	object	UMOJA ULULU AGRICULTURAL AND MARKETING COOPERA...
5	trase_id	object	TZ-TRADER-140277002, TZ-TRADER-110669011, TZ-T...
6	tzs_per_usd	Float64	2286.2574000548466, 2286.24749999625, 2278.049...
7	exchangerate_z_score	Float64	4.998933574280233e-06, -0.6744982694724116, -1...
8	usd_per_kg	Float64	3.2587997390737113, 2.7776, 2.74

Checking TZS - USD exchange rate outliers

23 outliers where identified through a modified z-score using as reference the exchange rate values of a given month.
The value for those outliers was replaced by the mean of the exchange rate of that month, excluding those outliers from the calculation of the mean.

# Calculate the exchange rates in TZS per USD
export_data["tzs_per_usd"] = (
    export_data["FOB_VALUE_IN_TZS"] / export_data["FOB_VALUE_IN_USD"]
)

record_count = len(export_data)

# get the modified z score within each month
for month in export_data["MONTH"].unique():
    export_data.loc[
        export_data["MONTH"] == month, "exchangerate_z_score"
    ] = modified_z_score(export_data[export_data["MONTH"] == month]["tzs_per_usd"])

# Get the mean of the exchange rate for each month, excluding the outliers
# Defining outliers as z-scores greater than 5
exchange_rate_mean = {}
for month in export_data["MONTH"].unique():
    exchange_rate_mean[month] = export_data[
        (export_data["MONTH"] == month)
        & (export_data["exchangerate_z_score"].abs() <= 5)
    ]["tzs_per_usd"].mean()

# Create a new column for color-coding the data
export_data['Outlier'] = 'Normal'  # Creates a new column and sets all values to 'Normal'
export_data.loc[export_data['exchangerate_z_score'] > 5, 'Outlier'] = 'Outlier'  # Only modifies rows where the condition is True
outlier_count = len(export_data[export_data['exchangerate_z_score'] > 5])

plt.figure(figsize=(10, 6))
scatter_plot = sns.scatterplot(x='DATE', y='tzs_per_usd', hue='Outlier', 
                               palette={'Normal': 'blue', 'Outlier': 'red'}, 
                               data=export_data)
scatter_plot.set(xlabel='Date', ylabel='Exchange Rate (TZS per USD)')
plt.suptitle(f"Exchange Rate Outliers by Date. {outlier_count} outliers out of {record_count} records", y=0.95)
plt.title(f"Outliers are defined against the exchange rate values of a given month")
plt.legend(title='Record type')

plt.show()

export_data.drop('Outlier', axis=1, inplace=True)

png

Checking cost per kg outliers

There is 1 clear outlier

# Calculate the cost per net weight in USD, and do a boxplot
export_data["usd_per_kg"] = export_data["FOB_VALUE_IN_USD"] / export_data["NET_WEIGHT"]

# Do a boxplot of usd_per_kg
plt.figure(figsize=(10, 6))
sns.boxplot(x='HS_CODE', y='usd_per_kg', data=export_data)
plt.title("There is one outstanding outlier")
plt.show()

png

print("Outlier details:")
export_data[export_data['usd_per_kg'] > 50]

Outlier details:

	DATE	MONTH	HEADING	HS_CODE	COUNTRY_DESTINATION	GROSS_WEIGHT	NET_WEIGHT	FOB_VALUE_IN_USD	FOB_VALUE_IN_TZS	UNIT	...	DECLARED_CUSTOMS_VALUE_TZS	ASSESSED_CUSTOMS_VALUE_TZS	YEAR	DAY	EXPORTER_NAME_ORIGINAL	EXPORTER_ID	EXPORTER_GROUP	tzs_per_usd	exchangerate_z_score	usd_per_kg
1055	2020-10-01	10	901	090111	CHINA (MAINLAND)	3600.0	3600.0	670680.0	1533320554.1	KGM	...	1533320554.1	1533320554.1	2020	1	TAYLOR WINCH (TANZANIA) LTD	15596376	TAYLOR WINCH (TANZANIA) LTD	2286.2178	0.674503	186.3

1 rows × 51 columns

We replace the outlier by the mean of non-outliers going to China, and zoom again in the values, by HS_CODE

# Replacing this outlier by the mean of the non-outliers going to China
china_non_outliers_mean = export_data[
        (export_data["usd_per_kg"] < 50) & (export_data["COUNTRY_DESTINATION"] == "CHINA")
    ]["usd_per_kg"].mean()
export_data.loc[export_data["usd_per_kg"] > 50, "usd_per_kg"] = china_non_outliers_mean

# Show a boxplot of HS_CODE 090111, by month; and another one of 090190
hs_code_1_count = len(export_data[export_data["HS_CODE"] == "090111"])
plt.figure(figsize=(10, 6))
sns.boxplot(x='MONTH', y='usd_per_kg', data=export_data[export_data["HS_CODE"] == "090111"])
plt.title(f"HS_CODE 090111: {hs_code_1_count} records")
plt.show()

plt.figure(figsize=(10, 6))
hs_code_2_count = len(export_data[export_data["HS_CODE"] == "090190"])
sns.boxplot(x='MONTH', y='usd_per_kg', data=export_data[export_data["HS_CODE"] == "090190"])
plt.title(f"HS_CODE 090190: {hs_code_2_count} records")
plt.show()

png

For HS_CODE 090190 there doesn't seem to be much to do about the data.

For 090111 , will take a look at the records that are above 10 usd_per_kg (about double of the maximum in the boxplot)

outlier_review = export_data[(export_data["HS_CODE"] == "090111") & (export_data['usd_per_kg'] > 10)]
outlier_review

	DATE	MONTH	HEADING	HS_CODE	COUNTRY_DESTINATION	GROSS_WEIGHT	NET_WEIGHT	FOB_VALUE_IN_USD	FOB_VALUE_IN_TZS	UNIT	...	DECLARED_CUSTOMS_VALUE_TZS	ASSESSED_CUSTOMS_VALUE_TZS	YEAR	DAY	EXPORTER_NAME_ORIGINAL	EXPORTER_ID	EXPORTER_GROUP	tzs_per_usd	exchangerate_z_score	usd_per_kg
425	2020-03-21	3	901	090111	BELGIUM	194670.0	194400.0	2818800.0	6421003151.04	KGM	...	6421003151.04	6421003151.04	2020	21	DRK GENERAL MERCHANTS LTD	15596424	DRK GENERAL MERCHANTS LTD	2277.9208	-0.0	14.5
593	2020-04-23	4	901	090111	JAPAN	18330.0	1800.0	56880.0	129650918.26	KGM	...	129650918.256	129650918.256	2020	23	DORMAN (TANZANIA) LIMITED	15596349	DORMAN (TANZANIA) LIMITED	2279.3762	0.879783	31.6
838	2020-09-01	9	901	090111	GERMANY	122.0	122.0	2400.0	5486856.0	KGM	...	5486856.0	5486856.0	2020	1	NGILA ESTATE LIMITED	15596439	NGILA ESTATE LIMITED	2286.19	0.803803	19.672131
967	2020-10-01	10	901	090111	GERMANY	248.0	248.0	2640.0	6035562.72	KGM	...	6035562.72	6035562.72	2020	1	BLUE MOUNTAIN COFFEE FARMS LIMITED	15596517	BLUE MOUNTAIN COFFEE FARMS LIMITED	2286.198	0.0	10.645161
1357	2020-11-24	11	901	090111	MOZAMBIQUE	13464.0	13464.0	169803.48	388211100.59	KGM	...	388211100.59	388211100.59	2020	24	AFRI TEA AND COFFEE BLENDERS (1963) LIMITED	15596415	AFRI TEA AND COFFEE BLENDERS (1963) LIMITED	2286.2376	-1.348996	12.611667

5 rows × 51 columns

They seem to be special coffees, mainly exported through airport, so they might not be an issue. Will continue check with Comtrade.

Review against Comtrade

# Check the cost per kg in comtrade
comtrade_data["usd_per_kg"] = (
    comtrade_data["comtrade_usd"] / comtrade_data["comtrade_kg"]
)

# For export_data, group by 'HS_CODE' and sum the 'NET_WEIGHT' and 'FOB_VALUE_IN_USD'
export_data_aggregated = export_data.groupby("HS_CODE").agg(
    tot_kg=("NET_WEIGHT", "sum"), tot_usd=("FOB_VALUE_IN_USD", "sum")
)

hs_codes = export_data_aggregated.index.to_series()
# For comtrade_data, group by the 'HS_CODE's from export_data_aggregated and sum the 'comtrade_kg' and 'comtrade_usd'
comtrade_data_aggregated = (
    comtrade_data[comtrade_data["HS_CODE"].isin(hs_codes)]
    .groupby("HS_CODE")
    .agg(comtrade_kg=("comtrade_kg", "sum"), comtrade_usd=("comtrade_usd", "sum"))
)

# Join the aggregated comtrade data to the export data
comtrade_export_comparison = export_data_aggregated.join(comtrade_data_aggregated)
comtrade_export_comparison.reset_index(inplace=True)

# Add a 'ratio_kg' and 'ratio_us' to compare the total kg and usd between the two datasets
comtrade_export_comparison["ratio_kg"] = (
    comtrade_export_comparison["tot_kg"] / comtrade_export_comparison["comtrade_kg"]
)
comtrade_export_comparison["ratio_usd"] = (
    comtrade_export_comparison["tot_usd"] / comtrade_export_comparison["comtrade_usd"]
)

comtrade_export_comparison

	HS_CODE	tot_kg	tot_usd	comtrade_kg	comtrade_usd	ratio_kg	ratio_usd
0	090111	60689267.0	138377667.72	64000237.8	145208327.54	0.948266	0.95296
1	090190	1665690.15	4268799.49	5343800.992	4518418.0	0.311705	0.944755

The 'ratio_kg' for '090190' seems off.. checking the data per country

export_data_per_country = export_data.groupby(["HS_CODE", "COUNTRY_DESTINATION"]).agg(
    tot_kg=("NET_WEIGHT", "sum"), tot_usd=("FOB_VALUE_IN_USD", "sum")
)

comtrade_data_per_country = (
    comtrade_data[comtrade_data["HS_CODE"].isin(hs_codes)]
    .groupby(["HS_CODE", "COUNTRY_DESTINATION"])
    .agg(comtrade_kg=("comtrade_kg", "sum"), comtrade_usd=("comtrade_usd", "sum"))
)

# Join the aggregated per country comtrade data to the export data
comtrade_export_comparison_per_country = export_data_per_country.join(
    comtrade_data_per_country
)
comtrade_export_comparison_per_country.reset_index(inplace=True)
# Add a 'ratio_kg' and 'ratio_us' to compare the total kg and usd between the two datasets
comtrade_export_comparison_per_country["ratio_kg"] = (
    comtrade_export_comparison_per_country["tot_kg"]
    / comtrade_export_comparison_per_country["comtrade_kg"]
)
comtrade_export_comparison_per_country["ratio_usd"] = (
    comtrade_export_comparison_per_country["tot_usd"]
    / comtrade_export_comparison_per_country["comtrade_usd"]
)

Ratios for HS_CODE 090111 seem ok

# Graph the top 10 countries with lower ratio_kg
# For HS_CODE '090111'
top_10_countries_low_ratio_kg = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090111"
    ]
    .sort_values(by="ratio_kg")
    .head(10)
)

top_10_countries_low_ratio_usd = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090111"
    ]
    .sort_values(by="ratio_usd")
    .head(10)
)
plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_kg, x="COUNTRY_DESTINATION", y="ratio_kg")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export weight for HS_CODE 090111")
plt.show()

plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_usd, x="COUNTRY_DESTINATION", y="ratio_usd")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export costs for HS_CODE 090111")
plt.show()

png

The values for Australia and Uganda for HS CODE 090190 seem off

# For HS_CODE '090190'
top_10_countries_low_ratio_kg2 = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090190"
    ]
    .sort_values(by="ratio_kg")
    .head(10)
)

# For HS_CODE '090111'
top_10_countries_low_ratio_usd2 = (
    comtrade_export_comparison_per_country[
        comtrade_export_comparison_per_country["HS_CODE"] == "090190"
    ]
    .sort_values(by="ratio_usd")
    .head(10)
)

plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_kg2, x="COUNTRY_DESTINATION", y="ratio_kg")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export weights for HS_CODE 090190")
plt.show()

plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_countries_low_ratio_usd2, x="COUNTRY_DESTINATION", y="ratio_usd")
plt.xticks(rotation=45)  # Rotate labels to avoid overlap
plt.title(f"Countries with lowest ratio between Comtrade and export costs for HS_CODE 090190")
plt.show()

png

When checking the records, we note: * the only record for '090190' from AUSTRALIA is a 1kg shipment with a cost of 5 USD vs. the corresponding 19,457 kg and 60,707 usd from Comtrade * for UGANDA, there are only 2 records for '090190' totalling 43,020kg and 18,939usd vs. 3,449,020kg and 88,172usd from Comtrade

Leaving these records (previously I removed them, but decided to keep them after talking with the team)

# export_data with COUNTRY_DESTINATION 'AUSTRALIA' or 'UGANDA', and with 'HS_CODE' '090190'
export_data_australia_uganda = export_data[
    ((export_data["COUNTRY_DESTINATION"] == "AUSTRALIA")
    | (export_data["COUNTRY_DESTINATION"] == "UGANDA"))
    & (export_data["HS_CODE"] == "090190")
]

export_data_australia_uganda

	DATE	MONTH	HEADING	HS_CODE	COUNTRY_DESTINATION	GROSS_WEIGHT	NET_WEIGHT	FOB_VALUE_IN_USD	FOB_VALUE_IN_TZS	UNIT	...	DECLARED_CUSTOMS_VALUE_TZS	ASSESSED_CUSTOMS_VALUE_TZS	YEAR	DAY	EXPORTER_NAME_ORIGINAL	EXPORTER_ID	EXPORTER_GROUP	tzs_per_usd	exchangerate_z_score	usd_per_kg
1353	2020-11-01	11	901	090190	UGANDA	16020.6	16020.6	7689.6	17580404.9	KGM	...	17580404.903	17580404.903	2020	1	NGARA FARMERS COOPERATIVE SOCIETY LTD	15596487	NGARA FARMERS COOPERATIVE SOCIETY LTD	2286.2574	-0.000026	0.479982
1355	2020-11-17	11	901	090190	UGANDA	27000.0	27000.0	11250.0	25720507.12	KGM	...	25720507.125	25720507.125	2020	17	KARAGWE DISTRICT CO-OPERATIVE UNION LIMITED	15596400	KARAGWE DISTRICT CO-OPERATIVE UNION LIMITED	2286.2673	0.67447	0.416667
1559	2020-12-28	12	901	090190	AUSTRALIA	1.0	1.0	5.0	11435.5	KGM	...	11435.495	11435.495	2020	28	NASRA OTHMAN	15596781	NASRA OTHMAN	2287.1	58.081759	5.0

3 rows × 51 columns

comtrade_data[
    ((comtrade_data['COUNTRY_DESTINATION'] == 'UGANDA') |
     (comtrade_data['COUNTRY_DESTINATION'] == 'AUSTRALIA')) &
    (comtrade_data['HS_CODE'] == '090190')]

	COUNTRY_DESTINATION	HS_CODE	comtrade_kg	comtrade_usd	usd_per_kg
63	AUSTRALIA	090190	19457.0	60707.71	3.120096
82	UGANDA	090190	3449020.6	88172.98	0.025565

Note that additionally, 'JORDAN', 'NAMIBIA' and 'RWANDA' are in Comtrade but not in the export data

comtrade_countries = comtrade_data["COUNTRY_DESTINATION"].unique()
export_countries = export_data["COUNTRY_DESTINATION"].unique()
comtrade_countries_not_in_export = [
    country for country in comtrade_countries if country not in export_countries
]
print(f"\n Note that the following countries are in Comtrade but not in the export data: {comtrade_countries_not_in_export}")

 Note that the following countries are in Comtrade but not in the export data: ['JORDAN', 'NAMIBIA', 'RWANDA']