View or edit on GitHub
This page is synchronized from trase/data/brazil/coffee/trade/y2020/qa_brazil_coffee_trade_bol_2020.ipynb. Last modified on 2025-12-15 23:00 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
2020 Brazil coffee - comparison between BOL and COMTRADE
The total volumes, and the distribution between HS Codes and recipient countries are largely the same, with differences below 1%.
Note however that BOL doesn't has FOB, and so prices checks can't be done with it. If needed, the values of COMTRADE could be used as reference.
General comparison
The sum of the BOL volume for Brazil is 0.45% above the related COMTRADE data.
* The cleaned data in s3://trase-storage/brazil/trade/bol/2020/BRAZIL_BOL_2020.csv was filtered to take only data for Coffee (HS codes 0901*), and placed in s3://trase-storage/brazil/coffee/trade/2020/bronze/bronze_brazil_bol_coffee_2020.parquet
* The COMTRADE Data for Brazil coffee 2020 was downloaded disaggregated by partner country, in case the average FOB are useful, as the FOB is not available in the BOL.
import duckdb
import pandas as pd
import matplotlib.pyplot as plt
import pycountry
# Configure duckdb and connect to data
conn = duckdb.connect()
conn.execute("""
SET s3_region='eu-west-1';
INSTALL httpfs; LOAD httpfs;
CALL load_aws_credentials('default', set_region= true);
""")
bol_clean = conn.sql(f"""
SELECT
hs6,
vol::int64 AS vol,
\"exporter.label\" AS exporter_name,
\"exporter.cnpj\" AS exporter_taxn,
\"country_of_destination.trase_id\" AS country_iso2
FROM 's3://trase-storage/brazil/coffee/trade/2020/bronze/bronze_brazil_bol_coffee_2020.parquet'
""").df()
bol_clean['country_iso3'] = bol_clean['country_iso2'].apply(lambda x: pycountry.countries.get(alpha_2=x).alpha_3)
comtrade = conn.sql("""
SELECT
CmdCode AS hs6,
NetWgt::int64 AS vol,
PartnerIso AS country_iso3
FROM 's3://trase-storage/brazil/coffee/trade/2020/bronze/brazil_coffee_comtrade_2020.csv'
WHERE PartnerDesc != 'World'
""").df()
# Compare the volumes
comtrade_tot_vol = comtrade['vol'].sum()
bol_tot_vol = bol_clean['vol'].sum()
# Plot the volumes as two bars, and show as a title the percentage of bol against comtrade
plt.bar(['BOL', 'COMTRADE'], [bol_tot_vol, comtrade_tot_vol], color=['blue', 'red'])
percentage = bol_tot_vol / comtrade_tot_vol * 100
plt.title(f'BOL Volume is {percentage:.2f}% of COMTRADE Volume')
plt.text(0, bol_tot_vol, f'{int(bol_tot_vol):,} kg', ha='center', va='bottom')
plt.text(1, comtrade_tot_vol, f'{int(comtrade_tot_vol):,} kg', ha='center', va='bottom')
plt.show()

# Note that the total volume of the cleaned BOL data is the same as the original BOL data.
bol_original_brazil_vol = conn.sql("SELECT SUM(vol) AS vol FROM 's3://trase-storage/brazil/trade/bol/2020/BRAZIL_BOL_2020.csv' WHERE hs6 LIKE '0901%';").df()
print(f"The total sum of exported volumes for Brazil Coffee based on the source files are:")
print(f"Original BOL file before cleaning: {int(bol_original_brazil_vol['vol']):,} kg")
print(f"Clean BOL file: {int(bol_tot_vol):,} kg")
The total sum of exported volumes for Brazil Coffee based on the source files are:
Original BOL file before cleaning: 2,389,701,777 kg
Clean BOL file: 2,389,701,778 kg
Comparison by HS Code
The great majority of the exports are from the HS Code 09011 - Coffee; not roasted or decaffeinated (99.12% for the BOL and 99.73% for COMTRADE), and some HS6 in the BOL's are undetermined, so a comparison desaggregated by HS Code doesn't seem so relevant.
hs6_bol_summary_df = bol_clean.groupby('hs6').sum('vol').sort_values('vol', ascending=False).reset_index()
hs6_comtrade_summary_df = comtrade.groupby('hs6').sum('vol').sort_values('vol', ascending=False).reset_index()
hs6_merged_df = pd.merge(hs6_bol_summary_df, hs6_comtrade_summary_df, on='hs6', how='outer', suffixes=('_BOL', '_COMTRADE'))
hs6_merged_df.fillna(0, inplace=True)
hs6_merged_df['vol_BOL_perc'] = hs6_merged_df['vol_BOL'] / hs6_merged_df['vol_BOL'].sum() * 100
hs6_merged_df['vol_COMTRADE_perc'] = hs6_merged_df['vol_COMTRADE'] / hs6_merged_df['vol_COMTRADE'].sum() * 100
hs6_merged_df['vol_BOL'] = hs6_merged_df['vol_BOL'].apply(lambda x: f'{int(x):,}')
hs6_merged_df['vol_COMTRADE'] = hs6_merged_df['vol_COMTRADE'].apply(lambda x: f'{int(x):,}')
hs6_merged_df['vol_BOL_perc'] = hs6_merged_df['vol_BOL_perc'].apply(lambda x: f'{x:.2f}%')
hs6_merged_df['vol_COMTRADE_perc'] = hs6_merged_df['vol_COMTRADE_perc'].apply(lambda x: f'{x:.2f}%')
print("Comparison of BOL and COMTRADE volumes by HS6:")
hs6_merged_df
Comparison of BOL and COMTRADE volumes by HS6:
| hs6 | vol_BOL | vol_COMTRADE | vol_BOL_perc | vol_COMTRADE_perc | |
|---|---|---|---|---|---|
| 0 | 090111 | 2,368,733,710 | 2,372,629,975 | 99.12% | 99.73% |
| 1 | 0901XX | 18,211,988 | 0 | 0.76% | 0.00% |
| 2 | 090110X | 1,594,164 | 0 | 0.07% | 0.00% |
| 3 | 090121 | 1,141,926 | 6,426,992 | 0.05% | 0.27% |
| 4 | 090190 | 19,360 | 23,302 | 0.00% | 0.00% |
| 5 | 090112 | 630 | 4,431 | 0.00% | 0.00% |
| 6 | 090122 | 0 | 15,875 | 0.00% | 0.00% |
Comparison by country
The volumes and percentages of coffee sent to different countries are very similar between BOL's and COMTRADE.
In all cases, differences are less than 0.5% of the total.
country_bol_summary_df = bol_clean.groupby('country_iso3').sum('vol').sort_values('vol', ascending=False).reset_index()
country_comtrade_summary_df = comtrade.groupby('country_iso3').sum('vol').sort_values('vol', ascending=False).reset_index()
country_merged_df = pd.merge(country_bol_summary_df, country_comtrade_summary_df, on='country_iso3', how='outer', suffixes=('_BOL', '_COMTRADE'))
country_merged_df.fillna(0, inplace=True)
country_merged_df['vol_BOL_perc'] = country_merged_df['vol_BOL'] / country_merged_df['vol_BOL'].sum() * 100
country_merged_df['vol_COMTRADE_perc'] = country_merged_df['vol_COMTRADE'] / country_merged_df['vol_COMTRADE'].sum() * 100
country_merged_df['vol_BOL'] = country_merged_df['vol_BOL'].apply(lambda x: f'{int(x):,}')
country_merged_df['vol_COMTRADE'] = country_merged_df['vol_COMTRADE'].apply(lambda x: f'{int(x):,}')
country_merged_df['vol_BOL_perc'] = country_merged_df['vol_BOL_perc'].apply(lambda x: f'{x:.2f}%')
country_merged_df['vol_COMTRADE_perc'] = country_merged_df['vol_COMTRADE_perc'].apply(lambda x: f'{x:.2f}%')
print("Comparison of BOL and COMTRADE volumes by Country of the top 10 countries:")
country_merged_df.head(10)
Comparison of BOL and COMTRADE volumes by Country of the top 10 countries:
| country_iso3 | vol_BOL | vol_COMTRADE | vol_BOL_perc | vol_COMTRADE_perc | |
|---|---|---|---|---|---|
| 0 | USA | 442,291,605 | 430,291,397 | 18.51% | 18.09% |
| 1 | DEU | 442,100,631 | 441,015,447 | 18.50% | 18.54% |
| 2 | BEL | 230,174,206 | 224,548,377 | 9.63% | 9.44% |
| 3 | ITA | 178,703,115 | 181,077,906 | 7.48% | 7.61% |
| 4 | JPN | 127,070,615 | 121,085,479 | 5.32% | 5.09% |
| 5 | TUR | 83,082,313 | 81,320,783 | 3.48% | 3.42% |
| 6 | MEX | 63,019,229 | 60,127,079 | 2.64% | 2.53% |
| 7 | ESP | 53,394,308 | 51,716,574 | 2.23% | 2.17% |
| 8 | FRA | 50,833,753 | 49,844,209 | 2.13% | 2.10% |
| 9 | RUS | 47,863,280 | 54,478,675 | 2.00% | 2.29% |