Analysis of variants

View or edit on GitHub

This page is synchronized from trase/models/brazil/customs_2019/analysis_of_variants.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

from trase.tools.aws.aws_helpers_cached import *

import plotly.express as px

# load mdic (port)
df_port = get_pandas_df_once(
    "brazil/trade/mdic/port/brazil_mdic_port_2017.csv", dtype=str, keep_default_na=False
)
df_port = df_port.astype({"vol": int, "fob": int})
df_port["price"] = df_port["fob"] / df_port["vol"]

# load mdic (municipality)
df_mun = get_pandas_df_once(
    "brazil/trade/mdic/municipality/brazil_mdic_municipality_2017.csv",
    dtype=str,
    keep_default_na=False,
)
df_mun = df_mun.astype({"vol": int, "fob": int})
df_mun["price"] = df_mun["fob"] / df_mun["vol"]
df_mun["exporter.microregion.trase_id"] = df_mun[
    "exporter.municipality.trase_id"
].str.slice(0, 7)
df_mun["exporter.state.trase_id"] = df_mun["exporter.municipality.trase_id"].str.slice(
    0, 5
)

# Pick one HS4
hs4s = [
    #     "0102",  # BEEF        - CATTLE
    #     "0201",  # BEEF        - BEEF
    #     "0201",  # BEEF        - BEEF BONELESS
    #     "0202",  # BEEF        - BEEF
    #     "0202",  # BEEF        - BEEF BONELESS
    #     "0206",  # BEEF        - BEEF OFFALS
    #    "0210",  # BEEF        - BEEF DRIED SALTED SMOKED
    #     "0504",  # BEEF        - BEEF OFFALS
    #     "1602",  # BEEF        - MEAT PREPARATIONS
    #     "0105",  # CHICKEN     - CHICKEN LIVE
    #     "0207",  # CHICKEN     - CHICKEN MEAT
    #     "1602",  # CHICKEN     - CHICKEN PREPARATIONS
    #     "1801",  # COCOA       - COCOA BEANS
    #     "1802",  # COCOA       - COCOA WASTE
    #     "1803",  # COCOA       - COCOA PASTE
    #     "1804",  # COCOA       - COCOA BUTTER
    #     "1805",  # COCOA       - COCOA POWDER
    #     "0901",  # COFFEE      - COFFEE BEAN
    #     "0901",  # COFFEE      - COFFEE PROCESSED
    #     "0901",  # COFFEE      - COFFEE ROASTED
    #     "2101",  # COFFEE      - COFFEE PROCESSED
    #     "2603",  # COPPER ORE  - COPPER ORE
    #     "1005",  # CORN        - CORN GRAINS
    #     "1102",  # CORN        - CORN FLOUR
    #     "1103",  # CORN        - CORN MEAL
    #     "1104",  # CORN        - CORN GRAINS, WORKED
    #     "1108",  # CORN        - CORN STARCH
    #     "1515",  # CORN        - CORN OIL
    #     "2302",  # CORN        - CORN RESIDUES
    #     "1207",  # COTTON      - COTTON SEED
    #     "1404",  # COTTON      - COTTON LINTERS
    #     "1512",  # COTTON      - COTTON OIL
    #     "2306",  # COTTON      - COTTON CAKE
    #     "5201",  # COTTON      - COTTON WASTE
    #     "5202",  # COTTON      - COTTON WASTE
    #     "0103",  # PORK        - PORK LIVE
    #     "0203",  # PORK        - PORK MEAT
    #     "0206",  # PORK        - PORK OFFALS
    #     "0210",  # PORK        - PORK MEAT SALTED
    #     "1602",  # PORK        - PORK PREPARATIONS
    #     "1201",  # SOY         - SOYBEANS
    #     "1208",  # SOY         - SOYBEAN CAKE
    #     "1507",  # SOY         - SOYBEAN OIL
    #     "2302",  # SOY         - SOYBEAN RESIDUE
    "2304",  # SOY         - SOYBEAN CAKE
]

# df_port = df_port[df_port["hs4"].isin(hs4s)]
# df_mun = df_mun[df_mun["hs4"].isin(hs4s)]

# # df_port =

# #df_port = df_port[df_port["hs6"] == "0210990"]

# # plots
# px.box(
#     df_port,
#     x="hs6",
#     y="price",
#     points="all",
#     title="Price per HS6"
# ).show()
# px.box(
#     df_port,
#     x="hs8",
#     y="price",
#     points="all",
#     title="Price per HS8"
# ).show()
# px.box(
#     df_mun,
#     x="exporter.municipality.trase_id",
#     y="price",
#     points=False,  # hide outliers
#     title="Price per municipality of taxation"
# ).show()
# px.box(
#     df_mun,
#     x="exporter.microregion.trase_id",
#     y="price",
#     points=False,  # hide outliers
#     title="Price per microregion of taxation"
# ).show()
# px.box(
#     df_mun,
#     x="exporter.state.trase_id",
#     y="price",
#     points=False,
#     title="Price per state of taxation"
# ).show()
# px.box(
#     df_mun,
#     x="port.name",
#     y="price",
#     points=False,
#     title="Price per Port"
# ).show()
# px.box(
#     df_mun,
#     x="state.trase_id",
#     y="price",
#     points=False,
#     title="Price per State of production"
# ).show()
# px.box(
#     df_port,
#     x="via",
#     y="price",
#     points="all",
#     title="Price per via (sea/air/etc)"
# ).show()
COUNTRY_GROUPS = {
    "AUSTRIA": "EU",
    "BELGIUM": "EU",
    "DENMARK": "EU",
    "FINLAND": "EU",
    "FRANCE": "EU",
    "GERMANY": "EU",
    "ITALY": "EU",
    "LUXEMBOURG": "EU",
    "POLAND": "EU",
    "ROMANIA": "EU",
    "SPAIN": "EU",
    "SWEDEN": "EU",
    "UNITED KINGDOM": "EU",
    "CHINA (HONG KONG)": "CHINA",
    "CHINA (MAINLAND)": "CHINA",
}
df_port["country_of_destination.group"] = (
    df_port["country_of_destination.name"].map(COUNTRY_GROUPS).fillna("OTHER")
)