Analysis of variants
View or edit on GitHub
This page is synchronized from trase/models/brazil/customs_2019/analysis_of_variants.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
from trase.tools.aws.aws_helpers_cached import *
import plotly.express as px
# load mdic (port)
df_port = get_pandas_df_once(
"brazil/trade/mdic/port/brazil_mdic_port_2017.csv", dtype=str, keep_default_na=False
)
df_port = df_port.astype({"vol": int, "fob": int})
df_port["price"] = df_port["fob"] / df_port["vol"]
# load mdic (municipality)
df_mun = get_pandas_df_once(
"brazil/trade/mdic/municipality/brazil_mdic_municipality_2017.csv",
dtype=str,
keep_default_na=False,
)
df_mun = df_mun.astype({"vol": int, "fob": int})
df_mun["price"] = df_mun["fob"] / df_mun["vol"]
df_mun["exporter.microregion.trase_id"] = df_mun[
"exporter.municipality.trase_id"
].str.slice(0, 7)
df_mun["exporter.state.trase_id"] = df_mun["exporter.municipality.trase_id"].str.slice(
0, 5
)
# Pick one HS4
hs4s = [
# "0102", # BEEF - CATTLE
# "0201", # BEEF - BEEF
# "0201", # BEEF - BEEF BONELESS
# "0202", # BEEF - BEEF
# "0202", # BEEF - BEEF BONELESS
# "0206", # BEEF - BEEF OFFALS
# "0210", # BEEF - BEEF DRIED SALTED SMOKED
# "0504", # BEEF - BEEF OFFALS
# "1602", # BEEF - MEAT PREPARATIONS
# "0105", # CHICKEN - CHICKEN LIVE
# "0207", # CHICKEN - CHICKEN MEAT
# "1602", # CHICKEN - CHICKEN PREPARATIONS
# "1801", # COCOA - COCOA BEANS
# "1802", # COCOA - COCOA WASTE
# "1803", # COCOA - COCOA PASTE
# "1804", # COCOA - COCOA BUTTER
# "1805", # COCOA - COCOA POWDER
# "0901", # COFFEE - COFFEE BEAN
# "0901", # COFFEE - COFFEE PROCESSED
# "0901", # COFFEE - COFFEE ROASTED
# "2101", # COFFEE - COFFEE PROCESSED
# "2603", # COPPER ORE - COPPER ORE
# "1005", # CORN - CORN GRAINS
# "1102", # CORN - CORN FLOUR
# "1103", # CORN - CORN MEAL
# "1104", # CORN - CORN GRAINS, WORKED
# "1108", # CORN - CORN STARCH
# "1515", # CORN - CORN OIL
# "2302", # CORN - CORN RESIDUES
# "1207", # COTTON - COTTON SEED
# "1404", # COTTON - COTTON LINTERS
# "1512", # COTTON - COTTON OIL
# "2306", # COTTON - COTTON CAKE
# "5201", # COTTON - COTTON WASTE
# "5202", # COTTON - COTTON WASTE
# "0103", # PORK - PORK LIVE
# "0203", # PORK - PORK MEAT
# "0206", # PORK - PORK OFFALS
# "0210", # PORK - PORK MEAT SALTED
# "1602", # PORK - PORK PREPARATIONS
# "1201", # SOY - SOYBEANS
# "1208", # SOY - SOYBEAN CAKE
# "1507", # SOY - SOYBEAN OIL
# "2302", # SOY - SOYBEAN RESIDUE
"2304", # SOY - SOYBEAN CAKE
]
# df_port = df_port[df_port["hs4"].isin(hs4s)]
# df_mun = df_mun[df_mun["hs4"].isin(hs4s)]
# # df_port =
# #df_port = df_port[df_port["hs6"] == "0210990"]
# # plots
# px.box(
# df_port,
# x="hs6",
# y="price",
# points="all",
# title="Price per HS6"
# ).show()
# px.box(
# df_port,
# x="hs8",
# y="price",
# points="all",
# title="Price per HS8"
# ).show()
# px.box(
# df_mun,
# x="exporter.municipality.trase_id",
# y="price",
# points=False, # hide outliers
# title="Price per municipality of taxation"
# ).show()
# px.box(
# df_mun,
# x="exporter.microregion.trase_id",
# y="price",
# points=False, # hide outliers
# title="Price per microregion of taxation"
# ).show()
# px.box(
# df_mun,
# x="exporter.state.trase_id",
# y="price",
# points=False,
# title="Price per state of taxation"
# ).show()
# px.box(
# df_mun,
# x="port.name",
# y="price",
# points=False,
# title="Price per Port"
# ).show()
# px.box(
# df_mun,
# x="state.trase_id",
# y="price",
# points=False,
# title="Price per State of production"
# ).show()
# px.box(
# df_port,
# x="via",
# y="price",
# points="all",
# title="Price per via (sea/air/etc)"
# ).show()
COUNTRY_GROUPS = {
"AUSTRIA": "EU",
"BELGIUM": "EU",
"DENMARK": "EU",
"FINLAND": "EU",
"FRANCE": "EU",
"GERMANY": "EU",
"ITALY": "EU",
"LUXEMBOURG": "EU",
"POLAND": "EU",
"ROMANIA": "EU",
"SPAIN": "EU",
"SWEDEN": "EU",
"UNITED KINGDOM": "EU",
"CHINA (HONG KONG)": "CHINA",
"CHINA (MAINLAND)": "CHINA",
}
df_port["country_of_destination.group"] = (
df_port["country_of_destination.name"].map(COUNTRY_GROUPS).fillna("OTHER")
)