comex bolivia soy 201X
View or edit on GitHub
This page is synchronized from trase/models/bolivia/soy/archive/comex_bolivia_soy_201X.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
import pandas as pd
from trase.tools.aws import get_pandas_df
from trase.tools.aws.metadata import (
write_csv_for_upload,
S3_OBJECTS_ACCESSED_IN_CURRENT_SESSION,
)
path = f"bolivia/trade/comex/originals/"
for year in [2018, 2019, 2020, 2021]:
S3_OBJECTS_ACCESSED_IN_CURRENT_SESSION.clear()
comex_df = get_pandas_df(
path + f"exp{year}.txt",
sep="|",
encoding="latin-1",
dtype={"CIIU3": str, "CODACT": str, "CUCIR3": str},
)
dep_df = get_pandas_df(path + "departamento.txt", sep="|", encoding="latin-1")
acteco_df = get_pandas_df(
path + "acteco.txt", sep="|", encoding="latin-1", dtype={"cod_acteco": str}
)
ciiu_df = get_pandas_df(
path + "ciiu3.txt", sep="|", encoding="latin-1", dtype={"cod_ciiu3": str}
)
cuci_df = get_pandas_df(path + "cuci3.txt", sep="|", encoding="latin-1", dtype=str)
tnt_df = get_pandas_df(path + "tnt.txt", sep="|", encoding="latin-1")
FLOUR = ["1208100000"]
SEED = ["1201100000"]
OIL = ["1507100000", "1507909000"]
CAKE = ["2304000000"]
clean_country_names = {
"VIET-NAM": "VIETNAM",
"BRASIL.": "BRAZIL",
"POLONIA": "POLAND",
"ECUADOR": "ECUADOR",
"ARGENTINA": "ARGENTINA",
"PERU": "PERU",
"CHILE": "CHILE",
"COLOMBIA": "COLOMBIA",
"PANAMA": "PANAMA",
"MYANMAR": "MYANMAR",
"AFGANISTAN": "AFGANISTAN",
"BANGLADESH": "BANGLADESH",
}
# Merge in metadata
original_length = len(comex_df)
comex_df = pd.merge(
comex_df, dep_df, left_on="Departamento", right_on="cod_departamento"
)
assert len(comex_df) == original_length
comex_df = pd.merge(comex_df, ciiu_df, left_on="CIIU3", right_on="cod_ciiu3")
assert len(comex_df) == original_length
comex_df = pd.merge(comex_df, cuci_df, left_on="CUCIR3", right_on="cod_cuci3")
assert len(comex_df) == original_length
comex_df = pd.merge(comex_df, tnt_df, left_on="TNT", right_on="cod_tnt")
assert len(comex_df) == original_length
comex_df = pd.merge(comex_df, acteco_df, left_on="CODACT", right_on="cod_acteco")
for c in comex_df.columns:
assert len(comex_df[comex_df[c].isna()]) == 0
# Filter to soy
soya = {"D5": "Soya y Productos de Soya", "A4": "Semillas y Habas de Soya"}
comex_df = comex_df[
comex_df.des_tnt == "SOYA"
] # same as comex_df.CODACT.isin(soya.keys())
# filter to ALL hscodes
comex_df["HS6"] = comex_df["NANDINA"].astype(str).str[0:6]
comex_df["COUNTRY_OF_DESTINATION"] = comex_df.des_Pais.map(clean_country_names)
# write_csv_for_upload(comex_df, f"bolivia/trade/comex/comex_bolivia_soy_{year}.csv"
comex_df.to_csv(f"comex_bolivia_soy_{year}.csv")