Tac Signatories
s3://trase-storage/brazil/indicators/actors/zd_commitments/out/TAC_SIGNATORIES.csv
Dbt path: trase_production.main_brazil.tac_signatories
Explore on Metabase: Full table; summary statistics
Containing yaml file link: trase/data_pipeline/models/brazil/indicators/actors/zd_commitments/out/_schema.yml
Model file link: trase/data_pipeline/models/brazil/indicators/actors/zd_commitments/out/tac_signatories.py
Calls script: trase/tools/sei_pcs_old/brazil/beef/scripts/01_DATA_CLEANING/ZDC/CLEAN_TAC.R
Dbt test runs & lineage: Test results ยท Lineage
Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)
Tags: mock_model, actors, brazil, indicators, out, zd_commitments
tac_signatories
Description
No description
Details
| Column | Type | Description |
|---|---|---|
Models / Seeds
source.trase_duckdb.trase-storage-raw.ticiana_tac_list_july_2016_reformatted_rvsource.trase_duckdb.trase-storage-raw.slaughterhouses_brazil_2018_09_18_shpsource.trase_duckdb.trase-storage-raw.cnpj_all_20190102
Sources
['trase-storage-raw', 'ticiana_tac_list_july_2016_reformatted_rv']['trase-storage-raw', 'slaughterhouses_brazil_2018_09_18_shp']['trase-storage-raw', 'cnpj_all_20190102']
# Clean the TAC data, shared by Holly Gibbs
# Join to our slaughterhouse data
# !!! I may be able to get more CNPJs by checking against the pdf documents here:
# ... http://www.mpf.mp.br/mt/sala-de-imprensa/pecuaria-sustentavel
# ... e.g. Click on Burger King to get a pdf with CNPJs of slaughterhouses
# packages
library(tidyverse)
library(readxl)
library(stringdist)
library(fs)
library(sf)
library(stringi)
library(aws.s3)
# location code run
working <- "offline" # OR RstudioServer
# set AWS credentials
if (working == "RstudioServer") {
aws.signature::use_credentials(file = "/storepelican/erasmus/.aws/credentials")
setwd("/storepelican/erasmus/R/GTAs_on_cloud")
} else {
aws.signature::use_credentials()
}
# Load helper functions and data
source("scripts/USEFUL_SNIPPETS.R")
# Load TAC signatories
# ... nb these data are also online
if (fs::file_exists("data/TAC/Ticiana_TAC_list_July_2016_reformatted_RV.xlsx")) {
tac <- read_excel("data/TAC/Ticiana_TAC_list_July_2016_reformatted_RV.xlsx")
} else {
# excel files can't be read from raw, so save them locally before reading
bucket <- "trase-storage"
path <- "data/5-INDICATORS/ACTORS/BRAZIL/ZD_COMMITMENTS/IN/"
files <- map(get_bucket(bucket, prefix = path), "Key")
file_name <- files[str_detect(files, "Ticiana_TAC_list_July_2016_reformatted_RV")]
input_dir <- file.path(getwd(), "data/TAC/")
if (!dir.exists(input_dir)) {
dir.create(input_dir)
}
walk(file_name, ~ save_object(., bucket, file = str_c(input_dir, basename(.))))
tac <- read_excel("data/TAC/Ticiana_TAC_list_July_2016_reformatted_RV.xlsx")
rm(bucket, path, files, file_name, input_dir) # tidy up
}
# Clean column names
tac <-
tac %>%
select(
ID = X__1,
COMPANY = company,
YEAR_TAC_SIGNED = `TAC year`,
MONTH_TAC_SIGNED = `TAC month`,
STATE = `state that TAC was originally signed in`,
APPLIES_TO_LEGAL_AMAZON = `The TAC signed was the new model ( for the whole Legal Amazon )`
) %>%
mutate_if(is.character, str_to_upper) %>%
mutate(COMPANY = fn_clean_spaced_names(str_trans(COMPANY)))
# Load slaughterhouses
shs <- read_sf("shp/LOGISTICS_MAP/slaughterhouses_brazil_2018-09-18.shp")
# Try to match TACs to slaughterhouses by name and state
shs_to_join <- shs %>%
select(COMPA,
CNPJ,
INSPECTI_1,
SH_CNPJ = CNPJ,
SH_STATE = STATE,
GEOCODE = GEOCO,
OTHER
) %>%
mutate(COMPANY = fn_clean_spaced_names(str_trans(COMPA))) %>%
select(-COMPA) %>%
distinct()
rm(shs) # tidy up
# Load CNPJ database
cnpjs <- read_delim("data/CNPJs/OUT/CNPJ_ALL_20190102.csv", delim = ";")
colnames(cnpjs)[c(1:3, 8, 9, 13, 14)] <- c("CNPJ", "business_name", "facility_name", "MUNICIPALITY", "STATE", "primary_CNAE", "secondary_CNAE")
cnpjs <- cnpjs %>%
select(CNPJ, business_name, facility_name, GEOCODE, primary_CNAE)
# Matching ----------------------------------------------------------------
# (A) Join by matching 'COMPANY' and STATE
tac_cnpj <-
left_join(tac, shs_to_join, by = c("STATE" = "SH_STATE")) %>%
filter(stringdist(COMPANY.x, COMPANY.y) <= 4) %>%
filter(
!(COMPANY.x == "JBS" & COMPANY.y == "BRF"),
!(COMPANY.x == "RS" & COMPANY.y == "BRF"),
!(COMPANY.x == "BRF" & COMPANY.y == "JBS"),
!(COMPANY.x == "JBS" & COMPANY.y == "BRF"),
!(COMPANY.x == "B COMERCIO DE CARNES E FRIOS" & COMPANY.y == "JM COMERCIO DE CARNES E FRIOS"),
!(COMPANY.x == "BR COMERCIO DE CARNES" & COMPANY.y == "GM COMERCIO DE CARNES"),
!(COMPANY.x == "BRF" & COMPANY.y == "BRASFRI"),
!(COMPANY.x == "CASA DE CARNE MINEIRA" & COMPANY.y == "CASA DE CARNE VITORIA"),
!(COMPANY.x == "F I E L" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "FAMA" & COMPANY.y == "JBS"),
!(COMPANY.x == "FRICAL FRIGORIFICO" & COMPANY.y == "FRIBOA FRIGORIFICO"),
!(COMPANY.x == "FRICAL FRIGORIFICO" & COMPANY.y == "FRIOURO FRIGORIFICO"),
!(COMPANY.x == "FRICAL FRIGORIFICO" & COMPANY.y == "FRIGONE FRIGORIFICO"),
!(COMPANY.x == "FRICARNE" & COMPANY.y == "FRIBOMCARNE"),
!(COMPANY.x == "FRIG WEST FRIGORIFICO" & COMPANY.y == "FRIGONE FRIGORIFICO"),
!(COMPANY.x == "FRIGOBEL" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "FRIGOCARNE" & COMPANY.y == "FRIBOMCARNE"),
!(COMPANY.x == "FRIGOMAR" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "FRIGOMAR" & COMPANY.y == "FRIBOMCARNE"),
!(COMPANY.x == "FRIGOSAN" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "FRICARNE" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "FRIMON ALIMENTOS" & COMPANY.y == "UNIBOI ALIMENTOS"),
!(COMPANY.x == "FRIPAGO" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "FRIGOSUL" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "J S COMERCIO DE CARNES" & COMPANY.y == "BR COMERCIO DE CARNES"),
!(COMPANY.x == "J S COMERCIO DE CARNES" & COMPANY.y == "GM COMERCIO DE CARNES"),
!(COMPANY.x == "J S COMERCIO DE CARNES" & COMPANY.y == "F E L COMERCIO DE CARNES"),
!(COMPANY.x == "JBS" & COMPANY.y == "RAMOS"),
!(COMPANY.x == "MARFRIG" & COMPANY.y == "BRASFRI"),
!(COMPANY.x == "MARFRIG" & COMPANY.y == "MARGEN"),
!(COMPANY.x == "NATURAFRIG ALIMENTOS" & COMPANY.y == "NUTRIFRIGO ALIMENTOS"),
!(COMPANY.x == "PANTANEIRA INDUSTRIA E COMERCIO DE CARNES" & COMPANY.y == "PANTANAL INDUSTRIA E COMERCIO DE CARNES"),
!(COMPANY.x == "RIO MARIA" & COMPANY.y == "J R MAIA"),
!(COMPANY.x == "RS" & COMPANY.y == "JBS"),
!(COMPANY.x == "FRIPAGO" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "FRIPAGO" & COMPANY.y == "FRIGOL"),
!(COMPANY.x == "VITORIA" & COMPANY.y == "MINERVA")
)
# Check the non-exact matches
tac_cnpj %>%
filter(COMPANY.x != COMPANY.y) %>%
select(COMPANY.x, COMPANY.y) %>%
unique()
# (B) Exact matches with 'ALTERNATIVE_NAMES'
non_matched_tac <-
tac %>%
filter(!ID %in% tac_cnpj$ID)
shs_unnested <- shs_to_join %>%
mutate(
ALTERNATIVE_NAMES = str_split(OTHER, pattern = ","),
ALTERNATIVE_NAMES = map(ALTERNATIVE_NAMES, str_trans),
ALTERNATIVE_NAMES = map(ALTERNATIVE_NAMES, fn_clean_spaced_names)
) %>%
unnest() %>%
distinct()
tac_cnpj2 <-
left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
filter(COMPANY.x == ALTERNATIVE_NAMES)
# (C) Inexact matches with ALTERNATIVE_NAMES
non_matched_tac <-
tac %>%
filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID))
tac_cnpj3 <-
left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
filter(stringdist(COMPANY.x, ALTERNATIVE_NAMES) <= 4) %>%
filter(
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRINORTE"),
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOI VERDE"),
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOVINORTE"),
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRIFORT"),
!(COMPANY.x == "JBS" & ALTERNATIVE_NAMES == "BRF"),
!(COMPANY.x == "RS" & ALTERNATIVE_NAMES == "BRF"),
!(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "JBS"),
!(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "FARM"),
!(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNES PIONEIRA"),
!(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNE VITORIA"),
!(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIMAR"),
!(COMPANY.x == "CENTAURO" & ALTERNATIVE_NAMES == "COAGRO"),
!(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL DANDI"),
!(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL MARANHAO"),
!(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIBAT"),
!(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRISA"),
!(COMPANY.x == "COOPERMEAT" & ALTERNATIVE_NAMES == "COOPERSAL"),
!(COMPANY.x == "DURLICOUROS" & ALTERNATIVE_NAMES == "F R COUROS"),
!(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRIGOL"),
!(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRICAL"),
!(COMPANY.x == "FAMA" & ALTERNATIVE_NAMES %in% c(
"PMTS", "JBS", "FRICAL", "FECAF", "GUARA", "COMAB", "FRICAM",
"JBL", "FBR", "SM", "PMA"
)),
!(COMPANY.x == "FRICARNE" & ALTERNATIVE_NAMES %in% c(
"FRINORTE", "FRIPAGO", "FRIPAGO", "FRICAL", "FRIGOCARNES", "LACARNE", "BB CARNES",
"FRICAM", "FRICABOMCARNES", "SO CARNE", "FRIBOMCARNES", "FRIFORT"
)),
!(COMPANY.x == "FRIG WEST FRIGORIFICO" & ALTERNATIVE_NAMES == "FRIGONE FRIGORIFICO"),
!(COMPANY.x == "FRIGOBEL" & ALTERNATIVE_NAMES %in% c(
"FRIGONORT", "FRIGOVAN", "FRINORTE", "MFRIGOL", "FRIGOL", "FRIBOI",
"FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN", "FRIGOMAR", "FRIGOSUL",
"FRIGOVETE", "FRIGOMAX", "FRIGOKAR", "FRIFORT"
)),
!(COMPANY.x == "FRIGOCARNE" & ALTERNATIVE_NAMES %in% c(
"COOPERSAL", "FRIGONORT", "FRIGONORTE", "FRIGOVAN", "FRIGOFRUT", "FRIGOTAI",
"FRIGOSAN", "FRIGOMAR", "FRINORTE", "FRIGOVETE", "FRIGOMAX", "FRIBOMCARNES",
"FRIGOKAR"
)),
!(COMPANY.x == "FRIGOMAR" & ALTERNATIVE_NAMES %in% c(
"MAFRIMAR", "FRINORT", "FRIGONORTE", "FRIGONORT", "FRIGOVAN", "FRINORTE",
"MFRIGOL", "FRIGOL", "FRIBOI", "FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN",
"FRIGOSUL", "FRIGOCARNES", "FRIGOVETE", "FRIGOMAX", "FRIGOCAM", "FRIGOKAR", "FRIFORT"
)),
!(COMPANY.x == "FRIMON ALIMENTOS" & ALTERNATIVE_NAMES %in% c("UNIBOI ALIMENTOS", "ATIVO ALIMENTOS", "FRIBAR ALIMENTOS")),
!(COMPANY.x == "J S COMERCIO DE CARNES" & ALTERNATIVE_NAMES %in% c("VP COMERCIO DE CARNE", "BR COMERCIO DE CARNES")),
!(COMPANY.x == "MARFRIG" & ALTERNATIVE_NAMES %in% c("SUIFRI")),
!(COMPANY.x == "RIBEIRO" & ALTERNATIVE_NAMES %in% c("FRIBOI")),
!(COMPANY.x == "RIO MARIA" & ALTERNATIVE_NAMES %in% c("COOMARCA")),
!(COMPANY.x == "RS" & ALTERNATIVE_NAMES %in% c("JBS", "", "F", "AGRA", "CORSAL", "ERPPA")),
!(COMPANY.x == "VITORIA" & ALTERNATIVE_NAMES %in% c("MINERVA"))
)
# Check the matches are genuine
tac_cnpj3 %>%
select(COMPANY.x, ALTERNATIVE_NAMES, STATE) %>%
filter(COMPANY.x != ALTERNATIVE_NAMES) %>%
distinct()
# (D) Try matching by the jw distance for ALTERNATIVE_NAMES
non_matched_tac <-
tac %>%
filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID))
tac_cnpj4 <-
left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
filter(stringdist(COMPANY.x, ALTERNATIVE_NAMES, method = "jw") <= 0.18) %>%
filter(
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRINORTE"),
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOI VERDE"),
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOVINORTE"),
!(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRIFORT"),
!(COMPANY.x == "JBS" & ALTERNATIVE_NAMES == "BRF"),
!(COMPANY.x == "RS" & ALTERNATIVE_NAMES == "BRF"),
!(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "JBS"),
!(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "FARM"),
!(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNES PIONEIRA"),
!(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNE VITORIA"),
!(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIMAR"),
!(COMPANY.x == "CENTAURO" & ALTERNATIVE_NAMES == "COAGRO"),
!(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL DANDI"),
!(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL MARANHAO"),
!(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIBAT"),
!(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRISA"),
!(COMPANY.x == "COOPERMEAT" & ALTERNATIVE_NAMES == "COOPERSAL"),
!(COMPANY.x == "DURLICOUROS" & ALTERNATIVE_NAMES == "F R COUROS"),
!(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRIGOL"),
!(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRICAL"),
!(COMPANY.x == "FAMA" & ALTERNATIVE_NAMES %in% c(
"PMTS", "JBS", "FRICAL", "FECAF", "GUARA", "COMAB", "FRICAM",
"JBL", "FBR", "SM", "PMA"
)),
!(COMPANY.x == "FRICARNE" & ALTERNATIVE_NAMES %in% c(
"FRINORTE", "FRIPAGO", "FRIPAGO", "FRICAL", "FRIGOCARNES", "LACARNE", "BB CARNES",
"FRICAM", "FRICABOMCARNES", "SO CARNE", "FRIBOMCARNES", "FRIFORT"
)),
!(COMPANY.x == "FRIG WEST FRIGORIFICO" & ALTERNATIVE_NAMES == "FRIGONE FRIGORIFICO"),
!(COMPANY.x == "FRIGOBEL" & ALTERNATIVE_NAMES %in% c(
"FRIGONORT", "FRIGOVAN", "FRINORTE", "MFRIGOL", "FRIGOL", "FRIBOI",
"FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN", "FRIGOMAR", "FRIGOSUL",
"FRIGOVETE", "FRIGOMAX", "FRIGOKAR", "FRIFORT"
)),
!(COMPANY.x == "FRIGOCARNE" & ALTERNATIVE_NAMES %in% c(
"COOPERSAL", "FRIGONORT", "FRIGONORTE", "FRIGOVAN", "FRIGOFRUT", "FRIGOTAI",
"FRIGOSAN", "FRIGOMAR", "FRINORTE", "FRIGOVETE", "FRIGOMAX", "FRIBOMCARNES",
"FRIGOKAR"
)),
!(COMPANY.x == "FRIGOMAR" & ALTERNATIVE_NAMES %in% c(
"MAFRIMAR", "FRINORT", "FRIGONORTE", "FRIGONORT", "FRIGOVAN", "FRINORTE",
"MFRIGOL", "FRIGOL", "FRIBOI", "FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN",
"FRIGOSUL", "FRIGOCARNES", "FRIGOVETE", "FRIGOMAX", "FRIGOCAM", "FRIGOKAR", "FRIFORT"
)),
!(COMPANY.x == "FRIMON ALIMENTOS" & ALTERNATIVE_NAMES %in% c("UNIBOI ALIMENTOS", "ATIVO ALIMENTOS", "FRIBAR ALIMENTOS")),
!(COMPANY.x == "J S COMERCIO DE CARNES" & ALTERNATIVE_NAMES %in% c("VP COMERCIO DE CARNE", "BR COMERCIO DE CARNES")),
!(COMPANY.x == "MARFRIG" & ALTERNATIVE_NAMES %in% c("SUIFRI")),
!(COMPANY.x == "RIBEIRO" & ALTERNATIVE_NAMES %in% c("FRIBOI")),
!(COMPANY.x == "RIO MARIA" & ALTERNATIVE_NAMES %in% c("COOMARCA")),
!(COMPANY.x == "RS" & ALTERNATIVE_NAMES %in% c("JBS", "", "F", "AGRA", "CORSAL", "ERPPA")),
!(COMPANY.x == "VITORIA" & ALTERNATIVE_NAMES %in% c("MINERVA")),
!(COMPANY.x == "CASA DE CARNES NOVILHO DE OURO" & ALTERNATIVE_NAMES %in% c("CASA DE CARNES PIONEIRA", "CASA DE CARNE MODELO")),
!(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES %in% c("COMERCIAL LIMA", "COMERCIAL FERREIRA", "COMERCIAL CARVALHO")),
!(COMPANY.x == "FRIGO FREITAS" & ALTERNATIVE_NAMES %in% c("FRIGOFRUT")),
!(COMPANY.x == "FRIMON ALIMENTOS" & ALTERNATIVE_NAMES %in% c("AMAZONIA ALIMENTOS", "MERCURIO ALIMENTOS")),
!(COMPANY.x == "MARCHANTERIA POPULAR" & ALTERNATIVE_NAMES %in% c("MARCHANTARIA ELDORADO", "MARCHANTERIA GOMES", "MARCHANTERIA CRUZEIRO", "MARCHANTERIA CARAJAS", "MARCHANTERIA REAL")),
!(COMPANY.x == "NAVI CARNES INDUSTRIA E COMERCIO DE ALIMENTOS" & ALTERNATIVE_NAMES %in% c("VALE GRANDE INDUSTRIA E COMERCIO DE ALIMENTOS")),
!(COMPANY.x == "REI DA CARNE" & ALTERNATIVE_NAMES %in% c("IMPERIO DA CARNE"))
)
# Check the matches are genuine
tac_cnpj4 %>%
select(COMPANY.x, ALTERNATIVE_NAMES, STATE) %>%
filter(COMPANY.x != ALTERNATIVE_NAMES) %>%
distinct()
# (E) Look for where the the tac name is contained within slaughterhouse name
# ... and vice-versa
# ... Nb there are a few matadouros with names containing VITORIA... I link the TAC
# ... to the one with the official name "MATADOURO VITORIA"
# ... Nb2 there area also several synonyms of FRIGOVALE, which I filter
non_matched_tac <-
tac %>%
filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID, tac_cnpj4$ID))
tac_cnpj5 <-
left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
filter(
stri_detect_fixed(COMPANY.x, COMPANY.y) |
stri_detect_fixed(COMPANY.y, COMPANY.x) |
stri_detect_fixed(COMPANY.x, ALTERNATIVE_NAMES) |
stri_detect_fixed(ALTERNATIVE_NAMES, COMPANY.x)
) %>%
filter(
!ALTERNATIVE_NAMES %in% c("F", "SM"),
!(COMPANY.x == "RIBEIRO" & COMPANY.y == "RODRIGUES RIBEIRO COMERCIO"),
!(COMPANY.x == "RS" & COMPANY.y != "FRIGORIFICO RS"),
!(COMPANY.x == "VITORIA" & COMPANY.y != "MATADOURO VITORIA"),
!(COMPANY.x == "FRIGOVALE DO GUAPORE" & COMPANY.y == "OLAIR GONCALVES"),
!(COMPANY.x == "FRIGOVALE DO GUAPORE" & COMPANY.y == "FRIGORIFICO GL")
)
# Confirm the matches are genuine
tac_cnpj5 %>%
select(COMPANY.x, COMPANY.y, ALTERNATIVE_NAMES, STATE) %>%
distinct() %>%
as.data.frame()
# Select remaining non-matches
non_matched_tac <-
tac %>%
filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID, tac_cnpj4$ID, tac_cnpj5$ID))
# (F) Manual matches
# (Fi) Make df from slaughterhouse dataset, where names match the TAC data
shs_to_join2 <-
shs_unnested %>%
mutate(COMPANY = case_when(
COMPANY == "BOMBONATTO INDUSTRIA DE ALIMENTOS" ~ "BOMBONATTO INDUSTRIA E COMERCIO DE ALIMENTOS",
COMPANY == "CASA DE CARNE MINEIRA" |
ALTERNATIVE_NAMES == "CASA DE CARNE MINEIRA" ~ "CARNE MINEIRA",
COMPANY == "R R FREITAS OLIVEIRA" |
ALTERNATIVE_NAMES == "R R FREITAS OLIVEIRA" ~ "FRIGO FREITAS",
COMPANY == "FRIGORIFICO MERCURIO" |
ALTERNATIVE_NAMES == "FRIGORIFICO MERCURIO" ~ "MERCURIO FIGRORIFICO FABRIL E EXP DE ALIM",
COMPANY == "PINHEIRO MEDEIROS IND E COM DE CARNES E DERIVADOS E SERVICOS" |
ALTERNATIVE_NAMES == "PINHEIRO MEDEIROS IND E COM DE CARNES E DERIVADOS E SERVICOS" ~ "PINHEIRO MEDEIROS IND E COM DE CARNES R DERIVADOS E SERVICOS FRIGOVAVA",
COMPANY == "A M SANTOS" |
ALTERNATIVE_NAMES == "A M SANTOS" ~ "A SOMBRA DOS SANTOS FRIGORIFICO QUINDER",
COMPANY == "ABATEDOURO BOI FORTE" |
ALTERNATIVE_NAMES == "ABATEDOURO BOI FORTE" ~ "BOI FORTE",
COMPANY == "FRIGORIFICO REDENTOR" |
ALTERNATIVE_NAMES == "FRIGORIFICO REDENTOR" ~ "REDENTOR",
(COMPANY == "FRIGOSUL" |
ALTERNATIVE_NAMES == "FRIGORIFICO SUL") ~ "RODRIGO MORAES DA SILVA FRIGOSUL",
COMPANY == "FRIGORIFICO VALE DO BUGRES" |
ALTERNATIVE_NAMES == "FRIGORIFICO VALE DO BUGRES" ~ "VALE BUGRES",
COMPANY == "FRIGORIFICO REDENTOR" |
ALTERNATIVE_NAMES == "FRIGORIFICO REDENTOR" ~ "REDENTOR",
TRUE ~ COMPANY
))
# Add in manual matches
tac_cnpj6 <-
inner_join(non_matched_tac, shs_to_join2, by = c("COMPANY", "STATE" = "SH_STATE"))
# (Fii) Add in CNPJ (where known) for cases which don't appear in the slaughterhouse data
# ... some of these are slaughterhouses missing from our mapping
# ... some of these businesses may not be slaughterhouses (e.g. tannery, distributor, retailer)
tac_cnpj7 <-
non_matched_tac %>%
mutate(SH_CNPJ = case_when(
COMPANY == "CASA DE CARNES NOVILHO DE OURO" & STATE == "PA" ~ "09609271000113",
COMPANY == "CENTAURO" ~ "22973077000180",
COMPANY == "NAVI CARNES INDUSTRIA E COMERCIO DE ALIMENTOS" & STATE == "MT" ~ "02982267000661",
COMPANY == "OFERTAO DAS CARNES" & STATE == "PA" ~ "05982770000109",
COMPANY == "AGROPASTORIL ESTEVAM" & STATE == "AC" ~ "64611213000647",
COMPANY == "AGROPEL AGROPECUARIA COMERCIO REPRESENTACOES" ~ "05042346000176",
COMPANY == "AMAZON MEAT INDUSTRIA DE ALIMENTOS" & STATE == "AC" ~ "08467304000507",
COMPANY == "AMAZONIA SA INDUSTRIA ALIMENTICIA" & STATE == "PA" ~ "05057179000218",
COMPANY == "BOI BRANCO COMERCIAL EXPORTACAO E IMPORTACAO" ~ "15294317000155",
COMPANY == "BRULINE DISTRIBUIDORA" ~ "83755660000100",
COMPANY == "COURO DO NORTE" & STATE == "PA" ~ "04134342000155",
COMPANY == "CURTUME IDEAL IMPORTACAO E EXPORTACAO" ~ "04333952000188",
COMPANY == "DO SOCORRO S NASCIMENTO" ~ "21873909268",
COMPANY == "DONALDO GOMES" ~ "32024673791",
COMPANY == "DURLICOUROS" ~ "00105229000433",
COMPANY == "ELISMAR FIGUEIRA MENDES" ~ "06702570287",
COMPANY == "F O NASCIMENTO" ~ "05626152000118",
COMPANY == "FAMA" ~ "84144815000134",
COMPANY == "FRIGOBEL" ~ "05829958000103",
COMPANY == "FRIMON ALIMENTOS" ~ "02739700000128",
COMPANY == "J S COMERCIO DE CARNES" ~ "09024379000144",
COMPANY == "JOSE ARLINDO DA SILVA SANTOS" ~ "22875026291",
COMPANY == "KAIAPOS" ~ "63880884000136",
COMPANY == "MERCANTIL BENEVIDES" ~ "10659809000183",
COMPANY == "PORTAL DO BOI" ~ "09688838000194",
COMPANY == "REI DA CARNE" ~ "02861780000190",
COMPANY == "SUPERMERCADO GUANABARA" ~ "07919251000113",
COMPANY == "JOSE BONIFACIO" ~ "54964457000145",
COMPANY == "FRIG WEST FRIGORIFICO" ~ "01426902000148",
COMPANY == "PLENA ALIMENTOS" ~ "10198974000185",
COMPANY == "MARCELO SAMPAIO CORREA" ~ "12941444000100",
TRUE ~ NA_character_
)) %>%
filter(!is.na(SH_CNPJ))
# Look at the remaining non-matches
non_matched_tac <-
tac %>%
filter(!ID %in% c(
tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID, tac_cnpj4$ID,
tac_cnpj5$ID, tac_cnpj6$ID, tac_cnpj7$ID
))
non_matched_tac %>%
select(COMPANY, STATE) %>%
distinct()
# Join all the TAC-CNPJ data.frames together
fn_keep_cols <- function(df) {
if ("COMPANY.x" %in% names(df)) {
df %>%
select(
COMPANY = COMPANY.x,
YEAR_TAC_SIGNED, MONTH_TAC_SIGNED,
TAC_STATE = STATE, APPLIES_TO_LEGAL_AMAZON, SH_CNPJ
)
} else if ("SH_CNPJ" %in% names(df)) {
df %>%
select(COMPANY,
YEAR_TAC_SIGNED, MONTH_TAC_SIGNED,
TAC_STATE = STATE, APPLIES_TO_LEGAL_AMAZON, SH_CNPJ
)
} else {
df %>%
select(COMPANY,
YEAR_TAC_SIGNED, MONTH_TAC_SIGNED,
TAC_STATE = STATE, APPLIES_TO_LEGAL_AMAZON
) %>%
mutate(SH_CNPJ = NA_character_)
}
}
tac_cnpj_all <- list(
tac_cnpj, tac_cnpj2,
tac_cnpj3, tac_cnpj4,
tac_cnpj5, tac_cnpj6,
tac_cnpj7, non_matched_tac
)
tac_cnpj_all <- map_df(tac_cnpj_all, fn_keep_cols) %>%
distinct()
# Check no TACs were dropped
if (length(unique(tac_cnpj_all$COMPANY)) != length(unique(tac$COMPANY))) {
stop("Error in TAC matching - go back and check")
}
# Add some CNPJs where missing because of multiple CNPJs per business
# ... MERCURIO FIGRORIFICO FABRIL E EXP DE ALIM also operates slaughterhouses
# ... under Mercurio Alimentos (I've confirmed this by looking at the SIF numbers on their website)
# ... http://www.mercuriofabril.com.br/index.php?url=nav/sobrenos
new_row <-
tac_cnpj_all %>%
filter(grepl("MERCURIO", COMPANY)) %>%
mutate(SH_CNPJ = "11831785000241")
new_rows <-
tac_cnpj_all %>%
filter(grepl("MERCURIO", COMPANY)) %>%
mutate(SH_CNPJ = "11831785000322") %>%
bind_rows(new_row)
tac_cnpj_all <-
bind_rows(tac_cnpj_all, new_rows)
# tidy up
rm(
tac_cnpj, tac_cnpj2,
tac_cnpj3, tac_cnpj4,
tac_cnpj5, tac_cnpj6,
tac_cnpj7, non_matched_tac,
fn_keep_cols, shs_to_join, shs_to_join2,
shs_unnested, cnpjs, tac,
new_row, new_rows
)
# Export data -------------------------------------------------------------
# (A) Export to S3
# write to an in-memory raw connection
zz <- rawConnection(raw(0), "r+")
write_delim(tac_cnpj_all, zz, delim = ";")
# upload the object to S3
aws.s3::put_object(file = rawConnectionValue(zz), bucket = "trase-storage", object = "brazil/indicators/actors/zd_commitments/out/TAC_SIGNATORIES.csv")
# close the connection
close(zz)
# (B) Export to local folder
output_dir <- file.path(getwd(), "data/TAC/OUT")
if (!dir.exists(output_dir)) {
dir.create(output_dir)
} else {
print("data/TAC/OUT directory already exists")
}
# All TACs are linked to CNPJs
tac_cnpj_all %>%
write_delim("data/TAC/OUT/TAC_SIGNATORIES.csv",
delim = ";"
)
# tidy up
rm(tac_cnpj_all, zz, output_dir)
import pandas as pd
def model(dbt, cursor):
dbt.source("trase-storage-raw", "ticiana_tac_list_july_2016_reformatted_rv")
dbt.source("trase-storage-raw", "slaughterhouses_brazil_2018_09_18_shp")
dbt.source("trase-storage-raw", "cnpj_all_20190102")
raise NotImplementedError()
return pd.DataFrame({"hello": ["world"]})