comex bolivia soy 201X

View or edit on GitHub

This page is synchronized from trase/models/bolivia/soy/archive/comex_bolivia_soy_201X.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

import pandas as pd
from trase.tools.aws import get_pandas_df
from trase.tools.aws.metadata import (
    write_csv_for_upload,
    S3_OBJECTS_ACCESSED_IN_CURRENT_SESSION,
)
path = f"bolivia/trade/comex/originals/"
for year in [2018, 2019, 2020, 2021]:
    S3_OBJECTS_ACCESSED_IN_CURRENT_SESSION.clear()
    comex_df = get_pandas_df(
        path + f"exp{year}.txt",
        sep="|",
        encoding="latin-1",
        dtype={"CIIU3": str, "CODACT": str, "CUCIR3": str},
    )
    dep_df = get_pandas_df(path + "departamento.txt", sep="|", encoding="latin-1")
    acteco_df = get_pandas_df(
        path + "acteco.txt", sep="|", encoding="latin-1", dtype={"cod_acteco": str}
    )
    ciiu_df = get_pandas_df(
        path + "ciiu3.txt", sep="|", encoding="latin-1", dtype={"cod_ciiu3": str}
    )
    cuci_df = get_pandas_df(path + "cuci3.txt", sep="|", encoding="latin-1", dtype=str)
    tnt_df = get_pandas_df(path + "tnt.txt", sep="|", encoding="latin-1")
    FLOUR = ["1208100000"]
    SEED = ["1201100000"]
    OIL = ["1507100000", "1507909000"]
    CAKE = ["2304000000"]
    clean_country_names = {
        "VIET-NAM": "VIETNAM",
        "BRASIL.": "BRAZIL",
        "POLONIA": "POLAND",
        "ECUADOR": "ECUADOR",
        "ARGENTINA": "ARGENTINA",
        "PERU": "PERU",
        "CHILE": "CHILE",
        "COLOMBIA": "COLOMBIA",
        "PANAMA": "PANAMA",
        "MYANMAR": "MYANMAR",
        "AFGANISTAN": "AFGANISTAN",
        "BANGLADESH": "BANGLADESH",
    }

    # Merge in metadata
    original_length = len(comex_df)
    comex_df = pd.merge(
        comex_df, dep_df, left_on="Departamento", right_on="cod_departamento"
    )
    assert len(comex_df) == original_length
    comex_df = pd.merge(comex_df, ciiu_df, left_on="CIIU3", right_on="cod_ciiu3")
    assert len(comex_df) == original_length
    comex_df = pd.merge(comex_df, cuci_df, left_on="CUCIR3", right_on="cod_cuci3")
    assert len(comex_df) == original_length
    comex_df = pd.merge(comex_df, tnt_df, left_on="TNT", right_on="cod_tnt")
    assert len(comex_df) == original_length
    comex_df = pd.merge(comex_df, acteco_df, left_on="CODACT", right_on="cod_acteco")
    for c in comex_df.columns:
        assert len(comex_df[comex_df[c].isna()]) == 0

    # Filter to soy
    soya = {"D5": "Soya y Productos de Soya", "A4": "Semillas y Habas de Soya"}
    comex_df = comex_df[
        comex_df.des_tnt == "SOYA"
    ]  # same as comex_df.CODACT.isin(soya.keys())

    # filter to ALL hscodes
    comex_df["HS6"] = comex_df["NANDINA"].astype(str).str[0:6]
    comex_df["COUNTRY_OF_DESTINATION"] = comex_df.des_Pais.map(clean_country_names)
    # write_csv_for_upload(comex_df, f"bolivia/trade/comex/comex_bolivia_soy_{year}.csv"
    comex_df.to_csv(f"comex_bolivia_soy_{year}.csv")