Sigsif Slaughter 2022
s3://trase-storage/brazil/production/statistics/sigsif/original/SIGSIF_SLAUGHTER_2022.csv
Dbt path: trase_production.main_brazil.sigsif_slaughter_2022
Explore on Metabase: Full table; summary statistics
Containing yaml file link: trase/data_pipeline/models/brazil/production/statistics/sigsif/original/_schema.yml
Model file link: trase/data_pipeline/models/brazil/production/statistics/sigsif/original/sigsif_slaughter_2022.py
Calls script: trase/data/brazil/production/statistics/sigsif/original/SIGSIF_SLAUGHTER_2022.py
Dbt test runs & lineage: Test results · Lineage
Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)
Tags: brazil, original, production, sigsif, statistics
sigsif_slaughter_2022
Description
No description
Details
| Column | Type | Description |
|---|---|---|
"""
Adquire SIGSIF data
Save a csv file in sigsif/original/
Last run: 07/03/2023
Author: Yan Prada Moro
"""
import unicodedata
import pandas as pd
from unidecode import unidecode
from trase.tools.aws.metadata import write_csv_for_upload
COMMODITY_DESCRIPTION_TRANSLATIONS = {
#
# Beef
#
"BOVINO": "CATTLE",
"BOVINO FEMEA": "CATTLE",
"BOVINO MACHO": "CATTLE",
"NOVILHA INTERMEDIARIA": "CATTLE",
"NOVILHA PRECOCE": "CATTLE",
"NOVILHAO": "CATTLE",
"NOVILHO INTERMEDIARIO": "CATTLE",
"NOVILHO PRECOCE": "CATTLE",
"NOVILHONA": "CATTLE",
"TOUROTOURUNO": "CATTLE",
"VACA": "CATTLE",
"VITELO": "CATTLE",
#
# Chicken
#
"FRANGO ESPECIAL": "CHICKEN",
"FRANGO": "CHICKEN",
#
# Pork
#
"LEITAO": "PORK",
"LEITOA": "PORK",
"PORCA MATRIZEIRA": "PORK",
"PORCO MATRIZEIRO": "PORK",
"PORCO NAO CLASSIFICADO": "PORK",
"SUINO": "PORK",
"SUINO FEMEA": "PORK",
"SUINO MACHO": "PORK",
"SUINO TIPO BANHA FEMEA": "PORK",
"SUINO TIPO BANHA MACHO": "PORK",
"SUINO TIPO CARNE FEMEA": "PORK",
"SUINO TIPO CARNE MACHO": "PORK",
#
# These animal products are not interesting to use
# Intentionally keeping this as an explicit list!
#
"ASININO": "OTHER",
"ASININO FEMEA": "OTHER",
"ASININO MACHO": "OTHER",
"AVES": "OTHER",
"AVES NAO DESTINADAS A PRODUCAO DE CARNE OU OVOS ORNAMENTAISSILVESTRES": "OTHER",
"AVESTRUZ": "OTHER",
"AVESTRUZ FEMEA": "OTHER",
"AVESTRUZ MACHO": "OTHER",
"BAGRE AFRICANO": "OTHER",
"BUBALINO": "OTHER",
"BUBALINO JOVEM": "OTHER",
"BUFALA": "OTHER",
"BUFALO": "OTHER",
"CABRITA": "OTHER",
"CABRITO": "OTHER",
"CAMARAO": "OTHER",
"CAPAO": "OTHER",
"CAPIVARA": "OTHER",
"CAPRINO": "OTHER",
"CAPRINO FEMEA": "OTHER",
"CAPRINO MACHO": "OTHER",
"CATETO": "OTHER",
"CODORNA": "OTHER",
"COELHA": "OTHER",
"COELHO": "OTHER",
"CORDEIRA": "OTHER",
"CORDEIRO": "OTHER",
"CUTIA FEMEA": "OTHER",
"CUTIA MACHO": "OTHER",
"EMA": "OTHER",
"EQUINO": "OTHER",
"EQUINO FEMEA": "OTHER",
"EQUINO MACHO": "OTHER",
"FAIZAO": "OTHER",
"GALINHA-D'ANGOLA": "OTHER",
"GALINHA-DANGOLA": "OTHER",
"GANSO": "OTHER",
"JACARE": "OTHER",
"JACARE DO PANTANAL": "OTHER",
"JACARE-DO-PAPO-AMARELO": "OTHER",
"JAVALI": "OTHER",
"JAVALI FEMEA": "OTHER",
"JAVALI MACHO": "OTHER",
"LAMBARI": "OTHER",
"MARRECO": "OTHER",
"MUAR": "OTHER",
"MUAR FEMEA": "OTHER",
"MUAR MACHO": "OTHER",
"NONATO": "OTHER",
"OUTROS ANFIBIOS": "OTHER",
"OVELHA": "OTHER",
"OVINO": "OTHER",
"OVINO FEMEA": "OTHER",
"OVINO MACHO": "OTHER",
"PACA FEMEA": "OTHER",
"PACA MACHO": "OTHER",
"PANGA PANGASIUS": "OTHER",
"PATO": "OTHER",
"PEIXE": "OTHER",
"PERDIZ": "OTHER",
"PERU": "OTHER",
"PIAUCU": "OTHER",
"PINTADO": "OTHER",
"PINTADO DA AMAZONIA": "OTHER",
"PIRAPITINGA": "OTHER",
"PIRARUCU": "OTHER",
"QUEIXADA": "OTHER",
"RA": "OTHER",
"RA TOURO": "OTHER",
"RA-TOURO": "OTHER",
"RATITAS": "OTHER",
"SAVELHA": "OTHER",
"TAMBAQUI": "OTHER",
"TAMBATINGA": "OTHER",
"TILAPIA": "OTHER",
"TILAPIA DO NILO": "OTHER",
"TILAPIA TILAPIA-DO-NILO": "OTHER",
"TRUTA TRUTA-ARCO-IRIS": "OTHER",
"UMA OU MAIS ESPECIES": "OTHER",
#
# These products are chicken, but they are intentionally excluded
# We look for broilers, meat
#
"GALINHA DANGOLA": "OTHER",
"GALINHA DE POSTURA": "OTHER",
"GALINHA": "OTHER",
"GALO": "OTHER",
"GALETO": "OTHER",
}
def clean_string(text, upper=True):
"""
Take a string and clean it!
- Remove double-whitespace
- Remove tab, newline, return, formfeed, etc.
- Replace accented characters (e.g. ö becomes o)
- Trim leading and trailing whitespace
- Convert to upper-case
"""
def keep(character):
category = unicodedata.category(character)
return (
category[0] != "C" # ignore control characters
and category != "Zl" # ignore line separator
and category != "Zp" # ignore paragraph separator
and category != "Pe" # ignore parentheses left
and category != "Ps" # ignore parentheses right
and category != "Po" # ignore asterisk
)
text = "".join(c for c in text if keep(c))
text = " ".join(text.split())
text = unidecode(text)
return text.upper() if upper else text
def main():
url = "https://dados.agricultura.gov.br/dataset/062166e3-b515-4274-8e7d-68aadd64b820/resource/239eaa90-35cd-4b67-8902-d34eda3dca53/download/sigsifquantitativoanimaisabatidoscategoriauf.csv"
df = pd.read_csv(url, sep=";", dtype=str, encoding="utf-8")
df.columns = [
"YEAR",
"MONTH",
"STATE_SLAUGHTER",
"MUNICIPALITY_LABEL",
"DESCRIPTION",
"QUANTITY",
]
# Clean strings (uppercase, ASCII, no accented characters)
df["DESCRIPTION"] = df["DESCRIPTION"].apply(clean_string)
df["MUNICIPALITY_LABEL"] = df["MUNICIPALITY_LABEL"].apply(clean_string)
# Parse portugese category to english commodity
df["TYPE"] = df["DESCRIPTION"].apply(COMMODITY_DESCRIPTION_TRANSLATIONS.__getitem__)
return df
if __name__ == "__main__":
df = main()
write_csv_for_upload(
df, "brazil/production/statistics/sigsif/original/SIGSIF_SLAUGHTER_2022.csv"
)
from trase.data.brazil.production.statistics.sigsif.original.SIGSIF_SLAUGHTER_2022 import (
main,
)
def model(dbt, cursor):
return main()