2020BR SOY BOL COMTRADE MDIC
View or edit on GitHub
This page is synchronized from trase/models/brazil/qa_bol/2020BR_SOY_BOL_COMTRADE_MDIC.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
from trase.tools.aws.aws_helpers_cached import get_pandas_df_once
from trase.tools.aws.aws_helpers import read_json
import pandas as pd
import numpy as np
def load_data():
"""Load BOL, MDIC, and Comtrade data"""
df_bol = get_pandas_df_once(
"brazil/trade/bol/2020/BRAZIL_BOL_2020.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_bol["vol"] = df_bol["vol"].astype(float)
df_comtrade = get_pandas_df_once(
"world/trade/statistical_data/comtrade/COMTRADE_2020.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
columns = {
"reporter.label": "country_of_export.name",
"volume": "vol",
}
df_comtrade = df_comtrade.rename(columns=columns, errors="raise")
df_comtrade = df_comtrade[df_comtrade["country_of_export.name"] == "BRAZIL"]
df_comtrade = df_comtrade[df_comtrade["vol"] != ""]
df_comtrade["vol"] = df_comtrade["vol"].astype(float)
df_comtrade["hs4"] = df_comtrade["hs6"].str[:4]
df_mdic = get_pandas_df_once(
"brazil/trade/mdic/port/archive/brazil_mdic_port_2020.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_mdic = df_mdic[
["month", "hs6", "country_of_destination.name", "vol", "fob", "via"]
]
df_mdic["vol"] = df_mdic["vol"].astype(float)
df_mdic["hs4"] = df_mdic["hs6"].str[:4]
return df_bol, df_comtrade, df_mdic
df_bol, df_comtrade, df_mdic = load_data()
Compare vol per hs4 between Comtrade, Mdic, and BOL
from trase.tools import uses_database
@uses_database
def get_hs4_codes(cnx=None):
hs4_codes = pd.read_sql(
"SELECT DISTINCT substr(hs_code, 0, 5) as hs4, commodity FROM views.commodities",
con=cnx.cnx,
)
return hs4_codes
hs4_list = list(get_hs4_codes()["hs4"].drop_duplicates())
import plotly.express as px
df_bol_hs4 = df_bol[df_bol["hs4"].isin(hs4_list)].copy()
df_bol_hs4 = df_bol_hs4.groupby("hs4")["vol"].sum().reset_index(name="vol")
df_bol_hs4["source"] = "BOL"
# filter and focus on maritime data in the three datasets
df_comtrade_hs4 = df_comtrade[df_comtrade["hs4"].isin(hs4_list)].copy()
df_comtrade_hs4 = df_comtrade_hs4[df_comtrade_hs4["transportation_method"] == "Sea"]
df_comtrade_hs4 = df_comtrade_hs4.groupby("hs4")["vol"].sum().reset_index(name="vol")
df_comtrade_hs4["source"] = "COMTRADE"
df_mdic_hs4 = df_mdic[df_mdic["hs4"].isin(hs4_list)].copy()
df_mdic_hs4 = df_mdic_hs4[df_mdic_hs4["via"] == "01"]
df_mdic_hs4 = df_mdic_hs4.groupby("hs4")["vol"].sum().reset_index(name="vol")
df_mdic_hs4["source"] = "MDIC"
df_comparison_hs4 = pd.concat([df_bol_hs4, df_comtrade_hs4, df_mdic_hs4])
# # plot
fig = px.bar(
df_comparison_hs4,
x="vol",
y="hs4",
color="source",
barmode="group",
orientation="h",
title="<b>Volume per HS4</b><br />Comtrade versus MDIC versus BOL",
height=1200,
)
fig.show("png")

interested_commodities = ["SOY", "BEEF", "CORN", "COCOA", "COFFEE", "COTTON"]
df_hs4 = get_hs4_codes()
for c in interested_commodities:
df_hs4_c = df_hs4[df_hs4["commodity"] == c]
list_hs4_c = list(df_hs4_c["hs4"])
df_comparison_hs4_c = df_comparison_hs4[df_comparison_hs4["hs4"].isin(list_hs4_c)]
# plot
fig = px.bar(
df_comparison_hs4_c,
x="vol",
y="hs4",
color="source",
barmode="group",
orientation="h",
title=f"<b>Volume per {c} HS4</b><br />Comtrade versus MDIC versus BOL",
height=400,
)
fig.show("png")
/tmp/ipykernel_6947/3064137518.py:4: UserWarning:
pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.






From the above graphs, we can see in most commodities, BOL shows a similar volume per hs4 to COMTRADE and MDIC, except for CORN. In addition, missing hs4 codes in BEEF and COCOA might need extra attention when we work on those contexts.
Compare exporter trend in BOL per year
def load_bols():
"""Load bol data in previous years"""
df_list = []
for year in [2017, 2018, 2019, 2020]:
df_bol = get_pandas_df_once(
f"brazil/trade/bol/{year}/BRAZIL_BOL_{year}.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
usecols=["hs4", "exporter.label", "vol"],
)
df_bol["vol"] = df_bol["vol"].astype(float)
df_bol["year"] = year
df_list.append(df_bol)
df_bols = pd.concat(df_list)
return df_bols
df_bols = load_bols()
df_hs4 = get_hs4_codes()
for c in interested_commodities:
df_hs4_c = df_hs4[df_hs4["commodity"] == c]
list_hs4_c = list(df_hs4_c["hs4"])
df_bols_c = df_bols[df_bols["hs4"].isin(list_hs4_c)]
df_bols_plot = (
df_bols_c.groupby(["year", "exporter.label"])["vol"].sum().reset_index()
)
df_bols_plot["percentage"] = (
df_bols_plot.groupby("year")["vol"].transform(lambda x: x / x.sum() * 100)
).round(3)
df_bols_plot = df_bols_plot.sort_values(by="percentage")
fig = px.bar(
df_bols_plot,
x="year",
y="percentage",
color="exporter.label",
title=f"Volume percentage per exporter per year in {c}",
text="exporter.label",
height=600,
width=1200,
)
fig.update_layout(yaxis={"categoryorder": "total ascending"})
fig.show("png")
/tmp/ipykernel_6947/3064137518.py:4: UserWarning:
pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.

Overall, the trader volume per exporter in 2021 BOL