Skip to content

Brazil Beef Exporters

s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/brazil_beef_exporters.parquet

Dbt path: trase_production.main_brazil.brazil_beef_exporters

Explore on Metabase: Full table; summary statistics

Containing yaml file link: trase/data_pipeline/models/brazil/beef/sei_pcs/v2_2_1/_schema_sei_pcs_v2_2_1.yml

Model file link: trase/data_pipeline/models/brazil/beef/sei_pcs/v2_2_1/brazil_beef_exporters.py

Calls script: trase/data/brazil/beef/sei_pcs/v2_2_1/brazil_beef_exporters.py

Dbt test runs & lineage: Test results ยท Lineage

Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)

Tags: beef, brazil, sei_pcs


brazil_beef_exporters

Description

Exporter columns selected from the SEI-PCS data.


Details

Column Type Description
YEAR INTEGER
EXPORTER_CNPJ VARCHAR
EXPORTER VARCHAR
VOLUME_RAW FLOAT

Models / Seeds

  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2010
  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2011
  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2012
  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2013
  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2014
  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2016
  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2017
  • source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2018
  • model.trase_duckdb.seipcs_brazil_beef_2015
  • model.trase_duckdb.seipcs_brazil_beef_2019
  • model.trase_duckdb.seipcs_brazil_beef_2020
  • model.trase_duckdb.seipcs_brazil_beef_2021
  • model.trase_duckdb.seipcs_brazil_beef_2022
  • model.trase_duckdb.seipcs_brazil_beef_2023

Sources

  • ['trase-storage-raw', 'seipcs_brazil_beef_2010']
  • ['trase-storage-raw', 'seipcs_brazil_beef_2011']
  • ['trase-storage-raw', 'seipcs_brazil_beef_2012']
  • ['trase-storage-raw', 'seipcs_brazil_beef_2013']
  • ['trase-storage-raw', 'seipcs_brazil_beef_2014']
  • ['trase-storage-raw', 'seipcs_brazil_beef_2016']
  • ['trase-storage-raw', 'seipcs_brazil_beef_2017']
  • ['trase-storage-raw', 'seipcs_brazil_beef_2018']
import polars as pl
from tqdm import tqdm

from trase.tools.aws.metadata import write_parquet_for_upload

S3_KEYS = [
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2010.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2011.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2012.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2013.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2014.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2015.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2016.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2017.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2018.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2019.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2020.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/SEIPCS_BRAZIL_BEEF_2021.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/SEIPCS_BRAZIL_BEEF_2022.csv",
    "s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/SEIPCS_BRAZIL_BEEF_2023.csv",
]


def read_exporters(s3_key):
    df = pl.read_csv(
        s3_key,
        separator=";",
        infer_schema=False,
        null_values=[],
        columns=["YEAR", "EXPORTER_CNPJ", "EXPORTER", "VOLUME_RAW"],
    )
    df = df.cast({"YEAR": pl.Int32, "VOLUME_RAW": pl.Float32})
    return df.group_by(["YEAR", "EXPORTER_CNPJ", "EXPORTER"]).sum()


def process():
    df = pl.concat(read_exporters(s3_key) for s3_key in tqdm(S3_KEYS))

    assert (
        df.select(["YEAR", "EXPORTER_CNPJ", "EXPORTER"]).is_unique().all()
    ), "DataFrame is not unique on YEAR, EXPORTER_CNPJ, and EXPORTER"

    return df


if __name__ == "__main__":
    df = process()
    write_parquet_for_upload(
        df, "brazil/beef/sei_pcs/v2.2.1/brazil_beef_exporters.parquet", is_polars=True
    )
from trase.data.brazil.beef.sei_pcs.v2_2_1.brazil_beef_exporters import process


def model(dbt, cursor):
    dbt.config(materialized="external")

    # we include upstream models as references for the lineage. However, the actual
    # processing code reads directly from S3 rather than through DuckDB. This is because
    # I need to read the CSV files as strings (in particular, EXPORTER_CNPJ), and I
    # can't figure out how to do that with DuckDB

    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2010")
    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2011")
    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2012")
    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2013")
    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2014")
    dbt.ref("seipcs_brazil_beef_2015")
    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2016")
    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2017")
    dbt.source("trase-storage-raw", "seipcs_brazil_beef_2018")
    dbt.ref("seipcs_brazil_beef_2019")
    dbt.ref("seipcs_brazil_beef_2020")
    dbt.ref("seipcs_brazil_beef_2021")
    dbt.ref("seipcs_brazil_beef_2022")
    dbt.ref("seipcs_brazil_beef_2023")

    return process()