View or edit on GitHub
This page is synchronized from trase/models/brazil/soy/brazil_soy_sei_pcs_vs_comtrade_and_mdic.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
Brazil Soy v2.6.1: Comparing to Comtrade and MDIC
We use the following data sources:
- SEI-PCS results from S3, with 2017-18 coming from v2.6.0 and 2019-22 coming from v2.6.1
- MDIC from S3
- Comtrade from S3
from trase.tools import sps
import plotly.express as px
import pandas as pd
########################################################################################
# Read SEI-PCS results
########################################################################################
dfs = [
sps.get_pandas_df_once(
f"brazil/soy/sei_pcs/v{version}/SEIPCS_BRAZIL_SOY_{year}.csv",
dtype=str,
na_filter=False,
print_version_id=True,
)
for version, year in [
("2.6.0", 2017),
("2.6.0", 2018),
("2.6.1", 2019),
("2.6.1", 2020),
("2.6.1", 2021),
("2.6.1", 2022),
]
]
common_columns = set.intersection(*[set(df.columns) for df in dfs])
df = sps.concat(df[list(common_columns)] for df in dfs)
df = df.astype({"VOLUME_RAW": float, "YEAR": int, "FOB": float})
########################################################################################
# read MDIC data
########################################################################################
SOY_HS4 = [
"2304",
"1208",
"1507",
"1201",
] # not include SOYBEAN RESIDUAL because of no eq factor
df_mdic = sps.concat(
sps.get_pandas_df_once(
f"brazil/trade/mdic/port/brazil_mdic_port_{year}.csv",
dtype=str,
na_filter=False,
)
for year in [2017, 2018, 2019, 2020, 2021, 2022]
)
df_mdic = df_mdic[df_mdic["hs4"].isin(SOY_HS4)]
df_mdic = df_mdic.astype({"vol": float, "year": int, "fob": float}).rename(
columns={"vol": "VOLUME_RAW", "year": "YEAR", "fob": "FOB"}
)
########################################################################################
# read BOL data
########################################################################################
df_bols = [
sps.get_pandas_df_once(
f"brazil/trade/bol/{year}/BRAZIL_BOL_{year}.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
print_version_id=True,
)
for year in [2020, 2021, 2022]
]
common_columns = list(set.intersection(*[set(df.columns) for df in df_bols]))
df_bol = sps.concat(df[common_columns] for df in df_bols)
df_bol = df_bol[df_bol["hs4"].isin(SOY_HS4)]
df_bol = df_bol.astype({"vol": float, "year": int}).rename(
columns={"vol": "VOLUME_RAW", "year": "YEAR"}
)
########################################################################################
# read comtrade data
########################################################################################
df_comtrade = sps.concat(
sps.get_pandas_df_once(
f"world/trade/statistical_data/comtrade/COMTRADE_{year}.csv",
dtype=str,
na_filter=False,
)
for year in [2020, 2021, 2022]
)
df_comtrade = df_comtrade[df_comtrade["volume"] != ""]
df_comtrade = df_comtrade[df_comtrade["transportation_method"] == "TOTAL MOT"]
df_comtrade = df_comtrade.astype({"year": int, "volume": float, "fob": float})
df_comtrade = df_comtrade.rename(
columns={"year": "YEAR", "volume": "VOLUME_RAW", "fob": "FOB"}
)
df_comtrade = df_comtrade[df_comtrade["reporter.iso3"] == "BRA"]
df_comtrade = df_comtrade[df_comtrade["hs6"].str.slice(0, 4).isin(SOY_HS4)]
/Users/harrybiddle/dev/TRASE/trase/tools/sei_pcs/pandas_utilities.py:18: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
from pandas.util.testing import assert_frame_equal
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2017.csv version_id=qKtDR2xK7EEjeI_6eRsVmJa.9A1qfH_b
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2018.csv version_id=j4zTGNLsjTIts0AAg8J.LGP4ycc6KEt9
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2019.csv version_id=a1fTkYc0FghpLMTtpD3_wAsHFE1U6FBi
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2020.csv version_id=nuArlOqWRXyEzXZHeIcaHEs0QNEH8i_c
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2021.csv version_id=ClAQ9M4tm7u4teAyw3ChcpEdjxOFxgef
Accessing s3://trase-storage/brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2022.csv version_id=Pr5.DDGjAGVkZo8pVrz0qO3kkLhqsX_I
Accessing s3://trase-storage/brazil/trade/bol/2020/BRAZIL_BOL_2020.csv version_id=k8XDQDmnnx5UiuXOdFvRJK.q0Igrl5KY
Accessing s3://trase-storage/brazil/trade/bol/2021/BRAZIL_BOL_2021.csv version_id=jQeZBzBLgQx.YUtDFfOPfBbW4O1FcBMI
Accessing s3://trase-storage/brazil/trade/bol/2022/BRAZIL_BOL_2022.csv version_id=cUVKG_.jj_GzgRNzmtJCvD.KID3pbEYG
df_bol.columns
df_export = df[df["BRANCH"] != "DOMESTIC"]
df_totals = sps.concat(
sps.consolidate(_df, [metric], ["YEAR"])
.assign(source=source, metric=metric)
.rename(columns={metric: "value"})[["YEAR", "value", "source", "metric"]]
for metric in ["VOLUME_RAW", "FOB"]
for _df, source in [
(df_export, "SEI-PCS"),
(df_bol, "BOL"),
(df_mdic, "SECEX / MDIC"),
(df_comtrade, "Comtrade"),
]
if metric in _df.columns
)
fig = px.line(
df_totals,
x="YEAR",
y="value",
color="source",
facet_col="metric",
facet_col_wrap=1,
title=f"Comparing SEI-PCS, Comtrade and MDIC",
)
# make the facets have independent axes
fig.update_yaxes(matches=None)
fig.show("png")

df_ = df[(df["YEAR"] == 2021) & (df["BRANCH"] != "DOMESTIC")]