Skip to content

2020BR SOY BOL COMTRADE MDIC

View or edit on GitHub

This page is synchronized from trase/models/brazil/qa_bol/2020BR_SOY_BOL_COMTRADE_MDIC.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

from trase.tools.aws.aws_helpers_cached import get_pandas_df_once
from trase.tools.aws.aws_helpers import read_json
import pandas as pd
import numpy as np
def load_data():
    """Load BOL, MDIC, and Comtrade data"""
    df_bol = get_pandas_df_once(
        "brazil/trade/bol/2020/BRAZIL_BOL_2020.csv",
        encoding="utf8",
        sep=";",
        dtype=str,
        keep_default_na=False,
    )
    df_bol["vol"] = df_bol["vol"].astype(float)

    df_comtrade = get_pandas_df_once(
        "world/trade/statistical_data/comtrade/COMTRADE_2020.csv",
        encoding="utf8",
        sep=";",
        dtype=str,
        keep_default_na=False,
    )

    columns = {
        "reporter.label": "country_of_export.name",
        "volume": "vol",
    }
    df_comtrade = df_comtrade.rename(columns=columns, errors="raise")
    df_comtrade = df_comtrade[df_comtrade["country_of_export.name"] == "BRAZIL"]
    df_comtrade = df_comtrade[df_comtrade["vol"] != ""]
    df_comtrade["vol"] = df_comtrade["vol"].astype(float)
    df_comtrade["hs4"] = df_comtrade["hs6"].str[:4]

    df_mdic = get_pandas_df_once(
        "brazil/trade/mdic/port/archive/brazil_mdic_port_2020.csv",
        encoding="utf8",
        sep=";",
        dtype=str,
        keep_default_na=False,
    )
    df_mdic = df_mdic[
        ["month", "hs6", "country_of_destination.name", "vol", "fob", "via"]
    ]
    df_mdic["vol"] = df_mdic["vol"].astype(float)
    df_mdic["hs4"] = df_mdic["hs6"].str[:4]
    return df_bol, df_comtrade, df_mdic
df_bol, df_comtrade, df_mdic = load_data()

Compare vol per hs4 between Comtrade, Mdic, and BOL

from trase.tools import uses_database


@uses_database
def get_hs4_codes(cnx=None):
    hs4_codes = pd.read_sql(
        "SELECT DISTINCT substr(hs_code, 0, 5) as hs4, commodity FROM views.commodities",
        con=cnx.cnx,
    )
    return hs4_codes


hs4_list = list(get_hs4_codes()["hs4"].drop_duplicates())
import plotly.express as px

df_bol_hs4 = df_bol[df_bol["hs4"].isin(hs4_list)].copy()
df_bol_hs4 = df_bol_hs4.groupby("hs4")["vol"].sum().reset_index(name="vol")
df_bol_hs4["source"] = "BOL"

# filter and focus on maritime data in the three datasets
df_comtrade_hs4 = df_comtrade[df_comtrade["hs4"].isin(hs4_list)].copy()
df_comtrade_hs4 = df_comtrade_hs4[df_comtrade_hs4["transportation_method"] == "Sea"]
df_comtrade_hs4 = df_comtrade_hs4.groupby("hs4")["vol"].sum().reset_index(name="vol")
df_comtrade_hs4["source"] = "COMTRADE"

df_mdic_hs4 = df_mdic[df_mdic["hs4"].isin(hs4_list)].copy()
df_mdic_hs4 = df_mdic_hs4[df_mdic_hs4["via"] == "01"]
df_mdic_hs4 = df_mdic_hs4.groupby("hs4")["vol"].sum().reset_index(name="vol")
df_mdic_hs4["source"] = "MDIC"

df_comparison_hs4 = pd.concat([df_bol_hs4, df_comtrade_hs4, df_mdic_hs4])

# # plot
fig = px.bar(
    df_comparison_hs4,
    x="vol",
    y="hs4",
    color="source",
    barmode="group",
    orientation="h",
    title="<b>Volume per HS4</b><br />Comtrade versus MDIC versus BOL",
    height=1200,
)
fig.show("png")

png

interested_commodities = ["SOY", "BEEF", "CORN", "COCOA", "COFFEE", "COTTON"]
df_hs4 = get_hs4_codes()
for c in interested_commodities:
    df_hs4_c = df_hs4[df_hs4["commodity"] == c]
    list_hs4_c = list(df_hs4_c["hs4"])
    df_comparison_hs4_c = df_comparison_hs4[df_comparison_hs4["hs4"].isin(list_hs4_c)]
    # plot
    fig = px.bar(
        df_comparison_hs4_c,
        x="vol",
        y="hs4",
        color="source",
        barmode="group",
        orientation="h",
        title=f"<b>Volume per {c} HS4</b><br />Comtrade versus MDIC versus BOL",
        height=400,
    )
    fig.show("png")
/tmp/ipykernel_6947/3064137518.py:4: UserWarning:

pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.

png

png

png

png

png

png

From the above graphs, we can see in most commodities, BOL shows a similar volume per hs4 to COMTRADE and MDIC, except for CORN. In addition, missing hs4 codes in BEEF and COCOA might need extra attention when we work on those contexts.

Compare exporter trend in BOL per year

def load_bols():
    """Load bol data in previous years"""
    df_list = []
    for year in [2017, 2018, 2019, 2020]:
        df_bol = get_pandas_df_once(
            f"brazil/trade/bol/{year}/BRAZIL_BOL_{year}.csv",
            encoding="utf8",
            sep=";",
            dtype=str,
            keep_default_na=False,
            usecols=["hs4", "exporter.label", "vol"],
        )
        df_bol["vol"] = df_bol["vol"].astype(float)
        df_bol["year"] = year
        df_list.append(df_bol)
    df_bols = pd.concat(df_list)
    return df_bols
df_bols = load_bols()
df_hs4 = get_hs4_codes()
for c in interested_commodities:
    df_hs4_c = df_hs4[df_hs4["commodity"] == c]
    list_hs4_c = list(df_hs4_c["hs4"])
    df_bols_c = df_bols[df_bols["hs4"].isin(list_hs4_c)]
    df_bols_plot = (
        df_bols_c.groupby(["year", "exporter.label"])["vol"].sum().reset_index()
    )
    df_bols_plot["percentage"] = (
        df_bols_plot.groupby("year")["vol"].transform(lambda x: x / x.sum() * 100)
    ).round(3)
    df_bols_plot = df_bols_plot.sort_values(by="percentage")
    fig = px.bar(
        df_bols_plot,
        x="year",
        y="percentage",
        color="exporter.label",
        title=f"Volume percentage per exporter per year in {c}",
        text="exporter.label",
        height=600,
        width=1200,
    )
    fig.update_layout(yaxis={"categoryorder": "total ascending"})
    fig.show("png")
/tmp/ipykernel_6947/3064137518.py:4: UserWarning:

pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.

png

Overall, the trader volume per exporter in 2021 BOL