Skip to content

Sigsif Slaughter 2022

s3://trase-storage/brazil/production/statistics/sigsif/original/SIGSIF_SLAUGHTER_2022.csv

Dbt path: trase_production.main_brazil.sigsif_slaughter_2022

Explore on Metabase: Full table; summary statistics

Containing yaml file link: trase/data_pipeline/models/brazil/production/statistics/sigsif/original/_schema.yml

Model file link: trase/data_pipeline/models/brazil/production/statistics/sigsif/original/sigsif_slaughter_2022.py

Calls script: trase/data/brazil/production/statistics/sigsif/original/SIGSIF_SLAUGHTER_2022.py

Dbt test runs & lineage: Test results · Lineage

Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)

Tags: brazil, original, production, sigsif, statistics


sigsif_slaughter_2022

Description

No description


Details

Column Type Description
"""
Adquire SIGSIF data
Save a csv file in sigsif/original/

Last run: 07/03/2023
Author: Yan Prada Moro
"""

import unicodedata
import pandas as pd

from unidecode import unidecode

from trase.tools.aws.metadata import write_csv_for_upload

COMMODITY_DESCRIPTION_TRANSLATIONS = {
    #
    # Beef
    #
    "BOVINO": "CATTLE",
    "BOVINO FEMEA": "CATTLE",
    "BOVINO MACHO": "CATTLE",
    "NOVILHA INTERMEDIARIA": "CATTLE",
    "NOVILHA PRECOCE": "CATTLE",
    "NOVILHAO": "CATTLE",
    "NOVILHO INTERMEDIARIO": "CATTLE",
    "NOVILHO PRECOCE": "CATTLE",
    "NOVILHONA": "CATTLE",
    "TOUROTOURUNO": "CATTLE",
    "VACA": "CATTLE",
    "VITELO": "CATTLE",
    #
    # Chicken
    #
    "FRANGO ESPECIAL": "CHICKEN",
    "FRANGO": "CHICKEN",
    #
    # Pork
    #
    "LEITAO": "PORK",
    "LEITOA": "PORK",
    "PORCA MATRIZEIRA": "PORK",
    "PORCO MATRIZEIRO": "PORK",
    "PORCO NAO CLASSIFICADO": "PORK",
    "SUINO": "PORK",
    "SUINO FEMEA": "PORK",
    "SUINO MACHO": "PORK",
    "SUINO TIPO BANHA FEMEA": "PORK",
    "SUINO TIPO BANHA MACHO": "PORK",
    "SUINO TIPO CARNE FEMEA": "PORK",
    "SUINO TIPO CARNE MACHO": "PORK",
    #
    # These animal products are not interesting to use
    # Intentionally keeping this as an explicit list!
    #
    "ASININO": "OTHER",
    "ASININO FEMEA": "OTHER",
    "ASININO MACHO": "OTHER",
    "AVES": "OTHER",
    "AVES NAO DESTINADAS A PRODUCAO DE CARNE OU OVOS ORNAMENTAISSILVESTRES": "OTHER",
    "AVESTRUZ": "OTHER",
    "AVESTRUZ FEMEA": "OTHER",
    "AVESTRUZ MACHO": "OTHER",
    "BAGRE AFRICANO": "OTHER",
    "BUBALINO": "OTHER",
    "BUBALINO JOVEM": "OTHER",
    "BUFALA": "OTHER",
    "BUFALO": "OTHER",
    "CABRITA": "OTHER",
    "CABRITO": "OTHER",
    "CAMARAO": "OTHER",
    "CAPAO": "OTHER",
    "CAPIVARA": "OTHER",
    "CAPRINO": "OTHER",
    "CAPRINO FEMEA": "OTHER",
    "CAPRINO MACHO": "OTHER",
    "CATETO": "OTHER",
    "CODORNA": "OTHER",
    "COELHA": "OTHER",
    "COELHO": "OTHER",
    "CORDEIRA": "OTHER",
    "CORDEIRO": "OTHER",
    "CUTIA FEMEA": "OTHER",
    "CUTIA MACHO": "OTHER",
    "EMA": "OTHER",
    "EQUINO": "OTHER",
    "EQUINO FEMEA": "OTHER",
    "EQUINO MACHO": "OTHER",
    "FAIZAO": "OTHER",
    "GALINHA-D'ANGOLA": "OTHER",
    "GALINHA-DANGOLA": "OTHER",
    "GANSO": "OTHER",
    "JACARE": "OTHER",
    "JACARE DO PANTANAL": "OTHER",
    "JACARE-DO-PAPO-AMARELO": "OTHER",
    "JAVALI": "OTHER",
    "JAVALI FEMEA": "OTHER",
    "JAVALI MACHO": "OTHER",
    "LAMBARI": "OTHER",
    "MARRECO": "OTHER",
    "MUAR": "OTHER",
    "MUAR FEMEA": "OTHER",
    "MUAR MACHO": "OTHER",
    "NONATO": "OTHER",
    "OUTROS ANFIBIOS": "OTHER",
    "OVELHA": "OTHER",
    "OVINO": "OTHER",
    "OVINO FEMEA": "OTHER",
    "OVINO MACHO": "OTHER",
    "PACA FEMEA": "OTHER",
    "PACA MACHO": "OTHER",
    "PANGA PANGASIUS": "OTHER",
    "PATO": "OTHER",
    "PEIXE": "OTHER",
    "PERDIZ": "OTHER",
    "PERU": "OTHER",
    "PIAUCU": "OTHER",
    "PINTADO": "OTHER",
    "PINTADO DA AMAZONIA": "OTHER",
    "PIRAPITINGA": "OTHER",
    "PIRARUCU": "OTHER",
    "QUEIXADA": "OTHER",
    "RA": "OTHER",
    "RA TOURO": "OTHER",
    "RA-TOURO": "OTHER",
    "RATITAS": "OTHER",
    "SAVELHA": "OTHER",
    "TAMBAQUI": "OTHER",
    "TAMBATINGA": "OTHER",
    "TILAPIA": "OTHER",
    "TILAPIA DO NILO": "OTHER",
    "TILAPIA TILAPIA-DO-NILO": "OTHER",
    "TRUTA TRUTA-ARCO-IRIS": "OTHER",
    "UMA OU MAIS ESPECIES": "OTHER",
    #
    # These products are chicken, but they are intentionally excluded
    # We look for broilers, meat
    #
    "GALINHA DANGOLA": "OTHER",
    "GALINHA DE POSTURA": "OTHER",
    "GALINHA": "OTHER",
    "GALO": "OTHER",
    "GALETO": "OTHER",
}


def clean_string(text, upper=True):
    """
    Take a string and clean it!

    - Remove double-whitespace
    - Remove tab, newline, return, formfeed, etc.
    - Replace accented characters (e.g. ö becomes o)
    - Trim leading and trailing whitespace
    - Convert to upper-case
    """

    def keep(character):
        category = unicodedata.category(character)
        return (
            category[0] != "C"  # ignore control characters
            and category != "Zl"  # ignore line separator
            and category != "Zp"  # ignore paragraph separator
            and category != "Pe"  # ignore parentheses left
            and category != "Ps"  # ignore parentheses right
            and category != "Po"  # ignore asterisk
        )

    text = "".join(c for c in text if keep(c))
    text = " ".join(text.split())
    text = unidecode(text)
    return text.upper() if upper else text


def main():
    url = "https://dados.agricultura.gov.br/dataset/062166e3-b515-4274-8e7d-68aadd64b820/resource/239eaa90-35cd-4b67-8902-d34eda3dca53/download/sigsifquantitativoanimaisabatidoscategoriauf.csv"
    df = pd.read_csv(url, sep=";", dtype=str, encoding="utf-8")
    df.columns = [
        "YEAR",
        "MONTH",
        "STATE_SLAUGHTER",
        "MUNICIPALITY_LABEL",
        "DESCRIPTION",
        "QUANTITY",
    ]

    # Clean strings (uppercase, ASCII, no accented characters)
    df["DESCRIPTION"] = df["DESCRIPTION"].apply(clean_string)
    df["MUNICIPALITY_LABEL"] = df["MUNICIPALITY_LABEL"].apply(clean_string)

    # Parse portugese category to english commodity
    df["TYPE"] = df["DESCRIPTION"].apply(COMMODITY_DESCRIPTION_TRANSLATIONS.__getitem__)

    return df


if __name__ == "__main__":
    df = main()
    write_csv_for_upload(
        df, "brazil/production/statistics/sigsif/original/SIGSIF_SLAUGHTER_2022.csv"
    )
from trase.data.brazil.production.statistics.sigsif.original.SIGSIF_SLAUGHTER_2022 import (
    main,
)


def model(dbt, cursor):
    return main()