Skip to content

View or edit on GitHub

This page is synchronized from trase/models/brazil/soy/brazil_soy_sei_pcs_vs_comtrade_and_mdic.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

Brazil Soy v2.6.1: Comparing to Comtrade and MDIC

We use the following data sources:

  • SEI-PCS results from S3, with 2017-18 coming from v2.6.0 and 2019-22 coming from v2.6.1
  • MDIC from S3
  • Comtrade from S3
from trase.tools import sps
import plotly.express as px
import pandas as pd

########################################################################################
# Read SEI-PCS results
########################################################################################
dfs = [
    sps.get_pandas_df_once(
        f"brazil/soy/sei_pcs/v{version}/SEIPCS_BRAZIL_SOY_{year}.csv",
        dtype=str,
        na_filter=False,
        print_version_id=True,
    )
    for version, year in [
        ("2.6.0", 2017),
        ("2.6.0", 2018),
        ("2.6.1", 2019),
        ("2.6.1", 2020),
        ("2.6.1", 2021),
        ("2.6.1", 2022),
    ]
]
common_columns = set.intersection(*[set(df.columns) for df in dfs])
df = sps.concat(df[list(common_columns)] for df in dfs)
df = df.astype({"VOLUME_RAW": float, "YEAR": int, "FOB": float})

########################################################################################
# read MDIC data
########################################################################################
SOY_HS4 = [
    "2304",
    "1208",
    "1507",
    "1201",
]  # not include SOYBEAN RESIDUAL because of no eq factor
df_mdic = sps.concat(
    sps.get_pandas_df_once(
        f"brazil/trade/mdic/port/brazil_mdic_port_{year}.csv",
        dtype=str,
        na_filter=False,
    )
    for year in [2017, 2018, 2019, 2020, 2021, 2022]
)
df_mdic = df_mdic[df_mdic["hs4"].isin(SOY_HS4)]
df_mdic = df_mdic.astype({"vol": float, "year": int, "fob": float}).rename(
    columns={"vol": "VOLUME_RAW", "year": "YEAR", "fob": "FOB"}
)

########################################################################################
# read BOL data
########################################################################################
df_bols = [
    sps.get_pandas_df_once(
        f"brazil/trade/bol/{year}/BRAZIL_BOL_{year}.csv",
        encoding="utf8",
        sep=";",
        dtype=str,
        keep_default_na=False,
        print_version_id=True,
    )
    for year in [2020, 2021, 2022]
]
common_columns = list(set.intersection(*[set(df.columns) for df in df_bols]))
df_bol = sps.concat(df[common_columns] for df in df_bols)
df_bol = df_bol[df_bol["hs4"].isin(SOY_HS4)]
df_bol = df_bol.astype({"vol": float, "year": int}).rename(
    columns={"vol": "VOLUME_RAW", "year": "YEAR"}
)

########################################################################################
# read comtrade data
########################################################################################
df_comtrade = sps.concat(
    sps.get_pandas_df_once(
        f"world/trade/statistical_data/comtrade/COMTRADE_{year}.csv",
        dtype=str,
        na_filter=False,
    )
    for year in [2020, 2021, 2022]
)
df_comtrade = df_comtrade[df_comtrade["volume"] != ""]
df_comtrade = df_comtrade[df_comtrade["transportation_method"] == "TOTAL MOT"]
df_comtrade = df_comtrade.astype({"year": int, "volume": float, "fob": float})
df_comtrade = df_comtrade.rename(
    columns={"year": "YEAR", "volume": "VOLUME_RAW", "fob": "FOB"}
)
df_comtrade = df_comtrade[df_comtrade["reporter.iso3"] == "BRA"]
df_comtrade = df_comtrade[df_comtrade["hs6"].str.slice(0, 4).isin(SOY_HS4)]
/Users/harrybiddle/dev/TRASE/trase/tools/sei_pcs/pandas_utilities.py:18: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  from pandas.util.testing import assert_frame_equal


Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2017.csv version_id=qKtDR2xK7EEjeI_6eRsVmJa.9A1qfH_b
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2018.csv version_id=j4zTGNLsjTIts0AAg8J.LGP4ycc6KEt9
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2019.csv version_id=a1fTkYc0FghpLMTtpD3_wAsHFE1U6FBi
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2020.csv version_id=nuArlOqWRXyEzXZHeIcaHEs0QNEH8i_c
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2021.csv version_id=ClAQ9M4tm7u4teAyw3ChcpEdjxOFxgef
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2022.csv version_id=Pr5.DDGjAGVkZo8pVrz0qO3kkLhqsX_I
Accessing s3://trase-storage/brazil/trade/bol/2020/BRAZIL_BOL_2020.csv version_id=k8XDQDmnnx5UiuXOdFvRJK.q0Igrl5KY
Accessing s3://trase-storage/brazil/trade/bol/2021/BRAZIL_BOL_2021.csv version_id=jQeZBzBLgQx.YUtDFfOPfBbW4O1FcBMI
Accessing s3://trase-storage/brazil/trade/bol/2022/BRAZIL_BOL_2022.csv version_id=cUVKG_.jj_GzgRNzmtJCvD.KID3pbEYG
df_bol.columns
df_export = df[df["BRANCH"] != "DOMESTIC"]
df_totals = sps.concat(
    sps.consolidate(_df, [metric], ["YEAR"])
    .assign(source=source, metric=metric)
    .rename(columns={metric: "value"})[["YEAR", "value", "source", "metric"]]
    for metric in ["VOLUME_RAW", "FOB"]
    for _df, source in [
        (df_export, "SEI-PCS"),
        (df_bol, "BOL"),
        (df_mdic, "SECEX / MDIC"),
        (df_comtrade, "Comtrade"),
    ]
    if metric in _df.columns
)
fig = px.line(
    df_totals,
    x="YEAR",
    y="value",
    color="source",
    facet_col="metric",
    facet_col_wrap=1,
    title=f"Comparing SEI-PCS, Comtrade and MDIC",
)

# make the facets have independent axes
fig.update_yaxes(matches=None)
fig.show("png")

png

df_ = df[(df["YEAR"] == 2021) & (df["BRANCH"] != "DOMESTIC")]