Brazil Beef Exporters
s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/brazil_beef_exporters.parquet
Dbt path: trase_production.main_brazil.brazil_beef_exporters
Explore on Metabase: Full table; summary statistics
Containing yaml file link: trase/data_pipeline/models/brazil/beef/sei_pcs/v2_2_1/_schema_sei_pcs_v2_2_1.yml
Model file link: trase/data_pipeline/models/brazil/beef/sei_pcs/v2_2_1/brazil_beef_exporters.py
Calls script: trase/data/brazil/beef/sei_pcs/v2_2_1/brazil_beef_exporters.py
Dbt test runs & lineage: Test results ยท Lineage
Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)
Tags: beef, brazil, sei_pcs
brazil_beef_exporters
Description
Exporter columns selected from the SEI-PCS data.
Details
| Column | Type | Description |
|---|---|---|
YEAR |
INTEGER |
|
EXPORTER_CNPJ |
VARCHAR |
|
EXPORTER |
VARCHAR |
|
VOLUME_RAW |
FLOAT |
Models / Seeds
source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2010source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2011source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2012source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2013source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2014source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2016source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2017source.trase_duckdb.trase-storage-raw.seipcs_brazil_beef_2018model.trase_duckdb.seipcs_brazil_beef_2015model.trase_duckdb.seipcs_brazil_beef_2019model.trase_duckdb.seipcs_brazil_beef_2020model.trase_duckdb.seipcs_brazil_beef_2021model.trase_duckdb.seipcs_brazil_beef_2022model.trase_duckdb.seipcs_brazil_beef_2023
Sources
['trase-storage-raw', 'seipcs_brazil_beef_2010']['trase-storage-raw', 'seipcs_brazil_beef_2011']['trase-storage-raw', 'seipcs_brazil_beef_2012']['trase-storage-raw', 'seipcs_brazil_beef_2013']['trase-storage-raw', 'seipcs_brazil_beef_2014']['trase-storage-raw', 'seipcs_brazil_beef_2016']['trase-storage-raw', 'seipcs_brazil_beef_2017']['trase-storage-raw', 'seipcs_brazil_beef_2018']
import polars as pl
from tqdm import tqdm
from trase.tools.aws.metadata import write_parquet_for_upload
S3_KEYS = [
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2010.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2011.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2012.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2013.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2014.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2015.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2016.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2017.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2018.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2019.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.0/SEIPCS_BRAZIL_BEEF_2020.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/SEIPCS_BRAZIL_BEEF_2021.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/SEIPCS_BRAZIL_BEEF_2022.csv",
"s3://trase-storage/brazil/beef/sei_pcs/v2.2.1/SEIPCS_BRAZIL_BEEF_2023.csv",
]
def read_exporters(s3_key):
df = pl.read_csv(
s3_key,
separator=";",
infer_schema=False,
null_values=[],
columns=["YEAR", "EXPORTER_CNPJ", "EXPORTER", "VOLUME_RAW"],
)
df = df.cast({"YEAR": pl.Int32, "VOLUME_RAW": pl.Float32})
return df.group_by(["YEAR", "EXPORTER_CNPJ", "EXPORTER"]).sum()
def process():
df = pl.concat(read_exporters(s3_key) for s3_key in tqdm(S3_KEYS))
assert (
df.select(["YEAR", "EXPORTER_CNPJ", "EXPORTER"]).is_unique().all()
), "DataFrame is not unique on YEAR, EXPORTER_CNPJ, and EXPORTER"
return df
if __name__ == "__main__":
df = process()
write_parquet_for_upload(
df, "brazil/beef/sei_pcs/v2.2.1/brazil_beef_exporters.parquet", is_polars=True
)
from trase.data.brazil.beef.sei_pcs.v2_2_1.brazil_beef_exporters import process
def model(dbt, cursor):
dbt.config(materialized="external")
# we include upstream models as references for the lineage. However, the actual
# processing code reads directly from S3 rather than through DuckDB. This is because
# I need to read the CSV files as strings (in particular, EXPORTER_CNPJ), and I
# can't figure out how to do that with DuckDB
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2010")
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2011")
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2012")
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2013")
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2014")
dbt.ref("seipcs_brazil_beef_2015")
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2016")
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2017")
dbt.source("trase-storage-raw", "seipcs_brazil_beef_2018")
dbt.ref("seipcs_brazil_beef_2019")
dbt.ref("seipcs_brazil_beef_2020")
dbt.ref("seipcs_brazil_beef_2021")
dbt.ref("seipcs_brazil_beef_2022")
dbt.ref("seipcs_brazil_beef_2023")
return process()