Skip to content

Tac Signatories

s3://trase-storage/brazil/indicators/actors/zd_commitments/out/TAC_SIGNATORIES.csv

Dbt path: trase_production.main_brazil.tac_signatories

Explore on Metabase: Full table; summary statistics

Containing yaml file link: trase/data_pipeline/models/brazil/indicators/actors/zd_commitments/out/_schema.yml

Model file link: trase/data_pipeline/models/brazil/indicators/actors/zd_commitments/out/tac_signatories.py

Calls script: trase/tools/sei_pcs_old/brazil/beef/scripts/01_DATA_CLEANING/ZDC/CLEAN_TAC.R

Dbt test runs & lineage: Test results ยท Lineage

Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)

Tags: mock_model, actors, brazil, indicators, out, zd_commitments


tac_signatories

Description

No description


Details

Column Type Description

Models / Seeds

  • source.trase_duckdb.trase-storage-raw.ticiana_tac_list_july_2016_reformatted_rv
  • source.trase_duckdb.trase-storage-raw.slaughterhouses_brazil_2018_09_18_shp
  • source.trase_duckdb.trase-storage-raw.cnpj_all_20190102

Sources

  • ['trase-storage-raw', 'ticiana_tac_list_july_2016_reformatted_rv']
  • ['trase-storage-raw', 'slaughterhouses_brazil_2018_09_18_shp']
  • ['trase-storage-raw', 'cnpj_all_20190102']
# Clean the TAC data, shared by Holly Gibbs
# Join to our slaughterhouse data

# !!! I may be able to get more CNPJs by checking against the pdf documents here:
# ... http://www.mpf.mp.br/mt/sala-de-imprensa/pecuaria-sustentavel
# ... e.g. Click on Burger King to get a pdf with CNPJs of slaughterhouses


# packages
library(tidyverse)
library(readxl)
library(stringdist)
library(fs)
library(sf)
library(stringi)
library(aws.s3)


# location code run
working <- "offline" # OR RstudioServer


# set AWS credentials
if (working == "RstudioServer") {
  aws.signature::use_credentials(file = "/storepelican/erasmus/.aws/credentials")
  setwd("/storepelican/erasmus/R/GTAs_on_cloud")
} else {
  aws.signature::use_credentials()
}


# Load helper functions and data
source("scripts/USEFUL_SNIPPETS.R")


# Load TAC signatories
# ... nb these data are also online
if (fs::file_exists("data/TAC/Ticiana_TAC_list_July_2016_reformatted_RV.xlsx")) {
  tac <- read_excel("data/TAC/Ticiana_TAC_list_July_2016_reformatted_RV.xlsx")
} else {
  # excel files can't be read from raw, so save them locally before reading
  bucket <- "trase-storage"
  path <- "data/5-INDICATORS/ACTORS/BRAZIL/ZD_COMMITMENTS/IN/"
  files <- map(get_bucket(bucket, prefix = path), "Key")
  file_name <- files[str_detect(files, "Ticiana_TAC_list_July_2016_reformatted_RV")]
  input_dir <- file.path(getwd(), "data/TAC/")
  if (!dir.exists(input_dir)) {
    dir.create(input_dir)
  }
  walk(file_name, ~ save_object(., bucket, file = str_c(input_dir, basename(.))))
  tac <- read_excel("data/TAC/Ticiana_TAC_list_July_2016_reformatted_RV.xlsx")
  rm(bucket, path, files, file_name, input_dir) # tidy up
}


# Clean column names
tac <-
  tac %>%
  select(
    ID = X__1,
    COMPANY = company,
    YEAR_TAC_SIGNED = `TAC year`,
    MONTH_TAC_SIGNED = `TAC month`,
    STATE = `state that TAC was originally signed in`,
    APPLIES_TO_LEGAL_AMAZON = `The TAC signed was the new model ( for the whole Legal Amazon )`
  ) %>%
  mutate_if(is.character, str_to_upper) %>%
  mutate(COMPANY = fn_clean_spaced_names(str_trans(COMPANY)))


# Load slaughterhouses
shs <- read_sf("shp/LOGISTICS_MAP/slaughterhouses_brazil_2018-09-18.shp")


# Try to match TACs to slaughterhouses by name and state
shs_to_join <- shs %>%
  select(COMPA,
    CNPJ,
    INSPECTI_1,
    SH_CNPJ = CNPJ,
    SH_STATE = STATE,
    GEOCODE = GEOCO,
    OTHER
  ) %>%
  mutate(COMPANY = fn_clean_spaced_names(str_trans(COMPA))) %>%
  select(-COMPA) %>%
  distinct()
rm(shs) # tidy up


# Load CNPJ database
cnpjs <- read_delim("data/CNPJs/OUT/CNPJ_ALL_20190102.csv", delim = ";")
colnames(cnpjs)[c(1:3, 8, 9, 13, 14)] <- c("CNPJ", "business_name", "facility_name", "MUNICIPALITY", "STATE", "primary_CNAE", "secondary_CNAE")
cnpjs <- cnpjs %>%
  select(CNPJ, business_name, facility_name, GEOCODE, primary_CNAE)


# Matching ----------------------------------------------------------------


# (A) Join by matching 'COMPANY' and STATE
tac_cnpj <-
  left_join(tac, shs_to_join, by = c("STATE" = "SH_STATE")) %>%
  filter(stringdist(COMPANY.x, COMPANY.y) <= 4) %>%
  filter(
    !(COMPANY.x == "JBS" & COMPANY.y == "BRF"),
    !(COMPANY.x == "RS" & COMPANY.y == "BRF"),
    !(COMPANY.x == "BRF" & COMPANY.y == "JBS"),
    !(COMPANY.x == "JBS" & COMPANY.y == "BRF"),
    !(COMPANY.x == "B COMERCIO DE CARNES E FRIOS" & COMPANY.y == "JM COMERCIO DE CARNES E FRIOS"),
    !(COMPANY.x == "BR COMERCIO DE CARNES" & COMPANY.y == "GM COMERCIO DE CARNES"),
    !(COMPANY.x == "BRF" & COMPANY.y == "BRASFRI"),
    !(COMPANY.x == "CASA DE CARNE MINEIRA" & COMPANY.y == "CASA DE CARNE VITORIA"),
    !(COMPANY.x == "F I E L" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "FAMA" & COMPANY.y == "JBS"),
    !(COMPANY.x == "FRICAL FRIGORIFICO" & COMPANY.y == "FRIBOA FRIGORIFICO"),
    !(COMPANY.x == "FRICAL FRIGORIFICO" & COMPANY.y == "FRIOURO FRIGORIFICO"),
    !(COMPANY.x == "FRICAL FRIGORIFICO" & COMPANY.y == "FRIGONE FRIGORIFICO"),
    !(COMPANY.x == "FRICARNE" & COMPANY.y == "FRIBOMCARNE"),
    !(COMPANY.x == "FRIG WEST FRIGORIFICO" & COMPANY.y == "FRIGONE FRIGORIFICO"),
    !(COMPANY.x == "FRIGOBEL" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "FRIGOCARNE" & COMPANY.y == "FRIBOMCARNE"),
    !(COMPANY.x == "FRIGOMAR" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "FRIGOMAR" & COMPANY.y == "FRIBOMCARNE"),
    !(COMPANY.x == "FRIGOSAN" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "FRICARNE" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "FRIMON ALIMENTOS" & COMPANY.y == "UNIBOI ALIMENTOS"),
    !(COMPANY.x == "FRIPAGO" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "FRIGOSUL" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "J S COMERCIO DE CARNES" & COMPANY.y == "BR COMERCIO DE CARNES"),
    !(COMPANY.x == "J S COMERCIO DE CARNES" & COMPANY.y == "GM COMERCIO DE CARNES"),
    !(COMPANY.x == "J S COMERCIO DE CARNES" & COMPANY.y == "F E L COMERCIO DE CARNES"),
    !(COMPANY.x == "JBS" & COMPANY.y == "RAMOS"),
    !(COMPANY.x == "MARFRIG" & COMPANY.y == "BRASFRI"),
    !(COMPANY.x == "MARFRIG" & COMPANY.y == "MARGEN"),
    !(COMPANY.x == "NATURAFRIG ALIMENTOS" & COMPANY.y == "NUTRIFRIGO ALIMENTOS"),
    !(COMPANY.x == "PANTANEIRA INDUSTRIA E COMERCIO DE CARNES" & COMPANY.y == "PANTANAL INDUSTRIA E COMERCIO DE CARNES"),
    !(COMPANY.x == "RIO MARIA" & COMPANY.y == "J R MAIA"),
    !(COMPANY.x == "RS" & COMPANY.y == "JBS"),
    !(COMPANY.x == "FRIPAGO" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "FRIPAGO" & COMPANY.y == "FRIGOL"),
    !(COMPANY.x == "VITORIA" & COMPANY.y == "MINERVA")
  )


# Check the non-exact matches
tac_cnpj %>%
  filter(COMPANY.x != COMPANY.y) %>%
  select(COMPANY.x, COMPANY.y) %>%
  unique()


# (B) Exact matches with 'ALTERNATIVE_NAMES'
non_matched_tac <-
  tac %>%
  filter(!ID %in% tac_cnpj$ID)
shs_unnested <- shs_to_join %>%
  mutate(
    ALTERNATIVE_NAMES = str_split(OTHER, pattern = ","),
    ALTERNATIVE_NAMES = map(ALTERNATIVE_NAMES, str_trans),
    ALTERNATIVE_NAMES = map(ALTERNATIVE_NAMES, fn_clean_spaced_names)
  ) %>%
  unnest() %>%
  distinct()
tac_cnpj2 <-
  left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
  filter(COMPANY.x == ALTERNATIVE_NAMES)


# (C) Inexact matches with ALTERNATIVE_NAMES
non_matched_tac <-
  tac %>%
  filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID))
tac_cnpj3 <-
  left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
  filter(stringdist(COMPANY.x, ALTERNATIVE_NAMES) <= 4) %>%
  filter(
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRINORTE"),
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOI VERDE"),
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOVINORTE"),
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRIFORT"),
    !(COMPANY.x == "JBS" & ALTERNATIVE_NAMES == "BRF"),
    !(COMPANY.x == "RS" & ALTERNATIVE_NAMES == "BRF"),
    !(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "JBS"),
    !(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "FARM"),
    !(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNES PIONEIRA"),
    !(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNE VITORIA"),
    !(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIMAR"),
    !(COMPANY.x == "CENTAURO" & ALTERNATIVE_NAMES == "COAGRO"),
    !(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL DANDI"),
    !(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL MARANHAO"),
    !(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIBAT"),
    !(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRISA"),
    !(COMPANY.x == "COOPERMEAT" & ALTERNATIVE_NAMES == "COOPERSAL"),
    !(COMPANY.x == "DURLICOUROS" & ALTERNATIVE_NAMES == "F R COUROS"),
    !(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRIGOL"),
    !(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRICAL"),
    !(COMPANY.x == "FAMA" & ALTERNATIVE_NAMES %in% c(
      "PMTS", "JBS", "FRICAL", "FECAF", "GUARA", "COMAB", "FRICAM",
      "JBL", "FBR", "SM", "PMA"
    )),
    !(COMPANY.x == "FRICARNE" & ALTERNATIVE_NAMES %in% c(
      "FRINORTE", "FRIPAGO", "FRIPAGO", "FRICAL", "FRIGOCARNES", "LACARNE", "BB CARNES",
      "FRICAM", "FRICABOMCARNES", "SO CARNE", "FRIBOMCARNES", "FRIFORT"
    )),
    !(COMPANY.x == "FRIG WEST FRIGORIFICO" & ALTERNATIVE_NAMES == "FRIGONE FRIGORIFICO"),
    !(COMPANY.x == "FRIGOBEL" & ALTERNATIVE_NAMES %in% c(
      "FRIGONORT", "FRIGOVAN", "FRINORTE", "MFRIGOL", "FRIGOL", "FRIBOI",
      "FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN", "FRIGOMAR", "FRIGOSUL",
      "FRIGOVETE", "FRIGOMAX", "FRIGOKAR", "FRIFORT"
    )),
    !(COMPANY.x == "FRIGOCARNE" & ALTERNATIVE_NAMES %in% c(
      "COOPERSAL", "FRIGONORT", "FRIGONORTE", "FRIGOVAN", "FRIGOFRUT", "FRIGOTAI",
      "FRIGOSAN", "FRIGOMAR", "FRINORTE", "FRIGOVETE", "FRIGOMAX", "FRIBOMCARNES",
      "FRIGOKAR"
    )),
    !(COMPANY.x == "FRIGOMAR" & ALTERNATIVE_NAMES %in% c(
      "MAFRIMAR", "FRINORT", "FRIGONORTE", "FRIGONORT", "FRIGOVAN", "FRINORTE",
      "MFRIGOL", "FRIGOL", "FRIBOI", "FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN",
      "FRIGOSUL", "FRIGOCARNES", "FRIGOVETE", "FRIGOMAX", "FRIGOCAM", "FRIGOKAR", "FRIFORT"
    )),
    !(COMPANY.x == "FRIMON ALIMENTOS" & ALTERNATIVE_NAMES %in% c("UNIBOI ALIMENTOS", "ATIVO ALIMENTOS", "FRIBAR ALIMENTOS")),
    !(COMPANY.x == "J S COMERCIO DE CARNES" & ALTERNATIVE_NAMES %in% c("VP COMERCIO DE CARNE", "BR COMERCIO DE CARNES")),
    !(COMPANY.x == "MARFRIG" & ALTERNATIVE_NAMES %in% c("SUIFRI")),
    !(COMPANY.x == "RIBEIRO" & ALTERNATIVE_NAMES %in% c("FRIBOI")),
    !(COMPANY.x == "RIO MARIA" & ALTERNATIVE_NAMES %in% c("COOMARCA")),
    !(COMPANY.x == "RS" & ALTERNATIVE_NAMES %in% c("JBS", "", "F", "AGRA", "CORSAL", "ERPPA")),
    !(COMPANY.x == "VITORIA" & ALTERNATIVE_NAMES %in% c("MINERVA"))
  )


# Check the matches are genuine
tac_cnpj3 %>%
  select(COMPANY.x, ALTERNATIVE_NAMES, STATE) %>%
  filter(COMPANY.x != ALTERNATIVE_NAMES) %>%
  distinct()


# (D) Try matching by the jw distance for ALTERNATIVE_NAMES
non_matched_tac <-
  tac %>%
  filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID))
tac_cnpj4 <-
  left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
  filter(stringdist(COMPANY.x, ALTERNATIVE_NAMES, method = "jw") <= 0.18) %>%
  filter(
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRINORTE"),
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOI VERDE"),
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "BOVINORTE"),
    !(COMPANY.x == "BOI FORTE" & ALTERNATIVE_NAMES == "FRIFORT"),
    !(COMPANY.x == "JBS" & ALTERNATIVE_NAMES == "BRF"),
    !(COMPANY.x == "RS" & ALTERNATIVE_NAMES == "BRF"),
    !(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "JBS"),
    !(COMPANY.x == "BRF" & ALTERNATIVE_NAMES == "FARM"),
    !(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNES PIONEIRA"),
    !(COMPANY.x == "CASA DE CARNE MINEIRA" & ALTERNATIVE_NAMES == "CASA DE CARNE VITORIA"),
    !(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIMAR"),
    !(COMPANY.x == "CENTAURO" & ALTERNATIVE_NAMES == "COAGRO"),
    !(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL DANDI"),
    !(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES == "COMERCIAL MARANHAO"),
    !(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRIBAT"),
    !(COMPANY.x == "CASFRISA" & ALTERNATIVE_NAMES == "MAFRISA"),
    !(COMPANY.x == "COOPERMEAT" & ALTERNATIVE_NAMES == "COOPERSAL"),
    !(COMPANY.x == "DURLICOUROS" & ALTERNATIVE_NAMES == "F R COUROS"),
    !(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRIGOL"),
    !(COMPANY.x == "F I E L" & ALTERNATIVE_NAMES == "FRICAL"),
    !(COMPANY.x == "FAMA" & ALTERNATIVE_NAMES %in% c(
      "PMTS", "JBS", "FRICAL", "FECAF", "GUARA", "COMAB", "FRICAM",
      "JBL", "FBR", "SM", "PMA"
    )),
    !(COMPANY.x == "FRICARNE" & ALTERNATIVE_NAMES %in% c(
      "FRINORTE", "FRIPAGO", "FRIPAGO", "FRICAL", "FRIGOCARNES", "LACARNE", "BB CARNES",
      "FRICAM", "FRICABOMCARNES", "SO CARNE", "FRIBOMCARNES", "FRIFORT"
    )),
    !(COMPANY.x == "FRIG WEST FRIGORIFICO" & ALTERNATIVE_NAMES == "FRIGONE FRIGORIFICO"),
    !(COMPANY.x == "FRIGOBEL" & ALTERNATIVE_NAMES %in% c(
      "FRIGONORT", "FRIGOVAN", "FRINORTE", "MFRIGOL", "FRIGOL", "FRIBOI",
      "FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN", "FRIGOMAR", "FRIGOSUL",
      "FRIGOVETE", "FRIGOMAX", "FRIGOKAR", "FRIFORT"
    )),
    !(COMPANY.x == "FRIGOCARNE" & ALTERNATIVE_NAMES %in% c(
      "COOPERSAL", "FRIGONORT", "FRIGONORTE", "FRIGOVAN", "FRIGOFRUT", "FRIGOTAI",
      "FRIGOSAN", "FRIGOMAR", "FRINORTE", "FRIGOVETE", "FRIGOMAX", "FRIBOMCARNES",
      "FRIGOKAR"
    )),
    !(COMPANY.x == "FRIGOMAR" & ALTERNATIVE_NAMES %in% c(
      "MAFRIMAR", "FRINORT", "FRIGONORTE", "FRIGONORT", "FRIGOVAN", "FRINORTE",
      "MFRIGOL", "FRIGOL", "FRIBOI", "FRICAL", "FRIGOFRUT", "FRIGOTAI", "FRIGOSAN",
      "FRIGOSUL", "FRIGOCARNES", "FRIGOVETE", "FRIGOMAX", "FRIGOCAM", "FRIGOKAR", "FRIFORT"
    )),
    !(COMPANY.x == "FRIMON ALIMENTOS" & ALTERNATIVE_NAMES %in% c("UNIBOI ALIMENTOS", "ATIVO ALIMENTOS", "FRIBAR ALIMENTOS")),
    !(COMPANY.x == "J S COMERCIO DE CARNES" & ALTERNATIVE_NAMES %in% c("VP COMERCIO DE CARNE", "BR COMERCIO DE CARNES")),
    !(COMPANY.x == "MARFRIG" & ALTERNATIVE_NAMES %in% c("SUIFRI")),
    !(COMPANY.x == "RIBEIRO" & ALTERNATIVE_NAMES %in% c("FRIBOI")),
    !(COMPANY.x == "RIO MARIA" & ALTERNATIVE_NAMES %in% c("COOMARCA")),
    !(COMPANY.x == "RS" & ALTERNATIVE_NAMES %in% c("JBS", "", "F", "AGRA", "CORSAL", "ERPPA")),
    !(COMPANY.x == "VITORIA" & ALTERNATIVE_NAMES %in% c("MINERVA")),
    !(COMPANY.x == "CASA DE CARNES NOVILHO DE OURO" & ALTERNATIVE_NAMES %in% c("CASA DE CARNES PIONEIRA", "CASA DE CARNE MODELO")),
    !(COMPANY.x == "COMERCIAL FRANCO" & ALTERNATIVE_NAMES %in% c("COMERCIAL LIMA", "COMERCIAL FERREIRA", "COMERCIAL CARVALHO")),
    !(COMPANY.x == "FRIGO FREITAS" & ALTERNATIVE_NAMES %in% c("FRIGOFRUT")),
    !(COMPANY.x == "FRIMON ALIMENTOS" & ALTERNATIVE_NAMES %in% c("AMAZONIA ALIMENTOS", "MERCURIO ALIMENTOS")),
    !(COMPANY.x == "MARCHANTERIA POPULAR" & ALTERNATIVE_NAMES %in% c("MARCHANTARIA ELDORADO", "MARCHANTERIA GOMES", "MARCHANTERIA CRUZEIRO", "MARCHANTERIA CARAJAS", "MARCHANTERIA REAL")),
    !(COMPANY.x == "NAVI CARNES INDUSTRIA E COMERCIO DE ALIMENTOS" & ALTERNATIVE_NAMES %in% c("VALE GRANDE INDUSTRIA E COMERCIO DE ALIMENTOS")),
    !(COMPANY.x == "REI DA CARNE" & ALTERNATIVE_NAMES %in% c("IMPERIO DA CARNE"))
  )


# Check the matches are genuine
tac_cnpj4 %>%
  select(COMPANY.x, ALTERNATIVE_NAMES, STATE) %>%
  filter(COMPANY.x != ALTERNATIVE_NAMES) %>%
  distinct()


# (E) Look for where the the tac name is contained within slaughterhouse name
# ... and vice-versa
# ... Nb there are a few matadouros with names containing VITORIA... I link the TAC
# ... to the one with the official name "MATADOURO VITORIA"
# ... Nb2 there area also several synonyms of FRIGOVALE, which I filter
non_matched_tac <-
  tac %>%
  filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID, tac_cnpj4$ID))
tac_cnpj5 <-
  left_join(non_matched_tac, shs_unnested, by = c("STATE" = "SH_STATE")) %>%
  filter(
    stri_detect_fixed(COMPANY.x, COMPANY.y) |
      stri_detect_fixed(COMPANY.y, COMPANY.x) |
      stri_detect_fixed(COMPANY.x, ALTERNATIVE_NAMES) |
      stri_detect_fixed(ALTERNATIVE_NAMES, COMPANY.x)
  ) %>%
  filter(
    !ALTERNATIVE_NAMES %in% c("F", "SM"),
    !(COMPANY.x == "RIBEIRO" & COMPANY.y == "RODRIGUES  RIBEIRO COMERCIO"),
    !(COMPANY.x == "RS" & COMPANY.y != "FRIGORIFICO RS"),
    !(COMPANY.x == "VITORIA" & COMPANY.y != "MATADOURO VITORIA"),
    !(COMPANY.x == "FRIGOVALE DO GUAPORE" & COMPANY.y == "OLAIR GONCALVES"),
    !(COMPANY.x == "FRIGOVALE DO GUAPORE" & COMPANY.y == "FRIGORIFICO GL")
  )


# Confirm the matches are genuine
tac_cnpj5 %>%
  select(COMPANY.x, COMPANY.y, ALTERNATIVE_NAMES, STATE) %>%
  distinct() %>%
  as.data.frame()


# Select remaining non-matches
non_matched_tac <-
  tac %>%
  filter(!ID %in% c(tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID, tac_cnpj4$ID, tac_cnpj5$ID))


# (F) Manual matches
# (Fi) Make df from slaughterhouse dataset, where names match the TAC data
shs_to_join2 <-
  shs_unnested %>%
  mutate(COMPANY = case_when(
    COMPANY == "BOMBONATTO INDUSTRIA DE ALIMENTOS" ~ "BOMBONATTO INDUSTRIA E COMERCIO DE ALIMENTOS",
    COMPANY == "CASA DE CARNE MINEIRA" |
      ALTERNATIVE_NAMES == "CASA DE CARNE MINEIRA" ~ "CARNE MINEIRA",
    COMPANY == "R R FREITAS OLIVEIRA" |
      ALTERNATIVE_NAMES == "R R FREITAS OLIVEIRA" ~ "FRIGO FREITAS",
    COMPANY == "FRIGORIFICO MERCURIO" |
      ALTERNATIVE_NAMES == "FRIGORIFICO MERCURIO" ~ "MERCURIO FIGRORIFICO FABRIL E EXP DE ALIM",
    COMPANY == "PINHEIRO  MEDEIROS IND E COM DE CARNES E DERIVADOS E SERVICOS" |
      ALTERNATIVE_NAMES == "PINHEIRO  MEDEIROS IND E COM DE CARNES E DERIVADOS E SERVICOS" ~ "PINHEIRO  MEDEIROS IND E COM DE CARNES R DERIVADOS E SERVICOS  FRIGOVAVA",
    COMPANY == "A M SANTOS" |
      ALTERNATIVE_NAMES == "A M SANTOS" ~ "A SOMBRA DOS SANTOS  FRIGORIFICO QUINDER",
    COMPANY == "ABATEDOURO BOI FORTE" |
      ALTERNATIVE_NAMES == "ABATEDOURO BOI FORTE" ~ "BOI FORTE",
    COMPANY == "FRIGORIFICO REDENTOR" |
      ALTERNATIVE_NAMES == "FRIGORIFICO REDENTOR" ~ "REDENTOR",
    (COMPANY == "FRIGOSUL" |
      ALTERNATIVE_NAMES == "FRIGORIFICO SUL") ~ "RODRIGO MORAES DA SILVA FRIGOSUL",
    COMPANY == "FRIGORIFICO VALE DO BUGRES" |
      ALTERNATIVE_NAMES == "FRIGORIFICO VALE DO BUGRES" ~ "VALE BUGRES",
    COMPANY == "FRIGORIFICO REDENTOR" |
      ALTERNATIVE_NAMES == "FRIGORIFICO REDENTOR" ~ "REDENTOR",
    TRUE ~ COMPANY
  ))


# Add in manual matches
tac_cnpj6 <-
  inner_join(non_matched_tac, shs_to_join2, by = c("COMPANY", "STATE" = "SH_STATE"))


# (Fii) Add in CNPJ (where known) for cases which don't appear in the slaughterhouse data
# ... some of these are slaughterhouses missing from our mapping
# ... some of these businesses may not be slaughterhouses (e.g. tannery, distributor, retailer)
tac_cnpj7 <-
  non_matched_tac %>%
  mutate(SH_CNPJ = case_when(
    COMPANY == "CASA DE CARNES NOVILHO DE OURO" & STATE == "PA" ~ "09609271000113",
    COMPANY == "CENTAURO" ~ "22973077000180",
    COMPANY == "NAVI CARNES INDUSTRIA E COMERCIO DE ALIMENTOS" & STATE == "MT" ~ "02982267000661",
    COMPANY == "OFERTAO DAS CARNES" & STATE == "PA" ~ "05982770000109",
    COMPANY == "AGROPASTORIL ESTEVAM" & STATE == "AC" ~ "64611213000647",
    COMPANY == "AGROPEL AGROPECUARIA COMERCIO REPRESENTACOES" ~ "05042346000176",
    COMPANY == "AMAZON MEAT INDUSTRIA DE ALIMENTOS" & STATE == "AC" ~ "08467304000507",
    COMPANY == "AMAZONIA SA INDUSTRIA ALIMENTICIA" & STATE == "PA" ~ "05057179000218",
    COMPANY == "BOI BRANCO COMERCIAL EXPORTACAO E IMPORTACAO" ~ "15294317000155",
    COMPANY == "BRULINE DISTRIBUIDORA" ~ "83755660000100",
    COMPANY == "COURO DO NORTE" & STATE == "PA" ~ "04134342000155",
    COMPANY == "CURTUME IDEAL IMPORTACAO E EXPORTACAO" ~ "04333952000188",
    COMPANY == "DO SOCORRO S NASCIMENTO" ~ "21873909268",
    COMPANY == "DONALDO GOMES" ~ "32024673791",
    COMPANY == "DURLICOUROS" ~ "00105229000433",
    COMPANY == "ELISMAR FIGUEIRA MENDES" ~ "06702570287",
    COMPANY == "F O NASCIMENTO" ~ "05626152000118",
    COMPANY == "FAMA" ~ "84144815000134",
    COMPANY == "FRIGOBEL" ~ "05829958000103",
    COMPANY == "FRIMON ALIMENTOS" ~ "02739700000128",
    COMPANY == "J S COMERCIO DE CARNES" ~ "09024379000144",
    COMPANY == "JOSE ARLINDO DA SILVA SANTOS" ~ "22875026291",
    COMPANY == "KAIAPOS" ~ "63880884000136",
    COMPANY == "MERCANTIL BENEVIDES" ~ "10659809000183",
    COMPANY == "PORTAL DO BOI" ~ "09688838000194",
    COMPANY == "REI DA CARNE" ~ "02861780000190",
    COMPANY == "SUPERMERCADO GUANABARA" ~ "07919251000113",
    COMPANY == "JOSE BONIFACIO" ~ "54964457000145",
    COMPANY == "FRIG WEST FRIGORIFICO" ~ "01426902000148",
    COMPANY == "PLENA ALIMENTOS" ~ "10198974000185",
    COMPANY == "MARCELO SAMPAIO CORREA" ~ "12941444000100",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(SH_CNPJ))


# Look at the remaining non-matches
non_matched_tac <-
  tac %>%
  filter(!ID %in% c(
    tac_cnpj$ID, tac_cnpj2$ID, tac_cnpj3$ID, tac_cnpj4$ID,
    tac_cnpj5$ID, tac_cnpj6$ID, tac_cnpj7$ID
  ))
non_matched_tac %>%
  select(COMPANY, STATE) %>%
  distinct()


# Join all the TAC-CNPJ data.frames together
fn_keep_cols <- function(df) {
  if ("COMPANY.x" %in% names(df)) {
    df %>%
      select(
        COMPANY = COMPANY.x,
        YEAR_TAC_SIGNED, MONTH_TAC_SIGNED,
        TAC_STATE = STATE, APPLIES_TO_LEGAL_AMAZON, SH_CNPJ
      )
  } else if ("SH_CNPJ" %in% names(df)) {
    df %>%
      select(COMPANY,
        YEAR_TAC_SIGNED, MONTH_TAC_SIGNED,
        TAC_STATE = STATE, APPLIES_TO_LEGAL_AMAZON, SH_CNPJ
      )
  } else {
    df %>%
      select(COMPANY,
        YEAR_TAC_SIGNED, MONTH_TAC_SIGNED,
        TAC_STATE = STATE, APPLIES_TO_LEGAL_AMAZON
      ) %>%
      mutate(SH_CNPJ = NA_character_)
  }
}
tac_cnpj_all <- list(
  tac_cnpj, tac_cnpj2,
  tac_cnpj3, tac_cnpj4,
  tac_cnpj5, tac_cnpj6,
  tac_cnpj7, non_matched_tac
)
tac_cnpj_all <- map_df(tac_cnpj_all, fn_keep_cols) %>%
  distinct()


# Check no TACs were dropped
if (length(unique(tac_cnpj_all$COMPANY)) != length(unique(tac$COMPANY))) {
  stop("Error in TAC matching - go back and check")
}


# Add some CNPJs where missing because of multiple CNPJs per business
# ... MERCURIO FIGRORIFICO FABRIL E EXP DE ALIM also operates slaughterhouses
# ... under Mercurio Alimentos (I've confirmed this by looking at the SIF numbers on their website)
# ... http://www.mercuriofabril.com.br/index.php?url=nav/sobrenos
new_row <-
  tac_cnpj_all %>%
  filter(grepl("MERCURIO", COMPANY)) %>%
  mutate(SH_CNPJ = "11831785000241")
new_rows <-
  tac_cnpj_all %>%
  filter(grepl("MERCURIO", COMPANY)) %>%
  mutate(SH_CNPJ = "11831785000322") %>%
  bind_rows(new_row)
tac_cnpj_all <-
  bind_rows(tac_cnpj_all, new_rows)


# tidy up
rm(
  tac_cnpj, tac_cnpj2,
  tac_cnpj3, tac_cnpj4,
  tac_cnpj5, tac_cnpj6,
  tac_cnpj7, non_matched_tac,
  fn_keep_cols, shs_to_join, shs_to_join2,
  shs_unnested, cnpjs, tac,
  new_row, new_rows
)


# Export data -------------------------------------------------------------



# (A) Export to S3


# write to an in-memory raw connection
zz <- rawConnection(raw(0), "r+")
write_delim(tac_cnpj_all, zz, delim = ";")


# upload the object to S3
aws.s3::put_object(file = rawConnectionValue(zz), bucket = "trase-storage", object = "brazil/indicators/actors/zd_commitments/out/TAC_SIGNATORIES.csv")


# close the connection
close(zz)


# (B) Export to local folder
output_dir <- file.path(getwd(), "data/TAC/OUT")
if (!dir.exists(output_dir)) {
  dir.create(output_dir)
} else {
  print("data/TAC/OUT directory already exists")
}


# All TACs are linked to CNPJs
tac_cnpj_all %>%
  write_delim("data/TAC/OUT/TAC_SIGNATORIES.csv",
    delim = ";"
  )


# tidy up
rm(tac_cnpj_all, zz, output_dir)
import pandas as pd


def model(dbt, cursor):
    dbt.source("trase-storage-raw", "ticiana_tac_list_july_2016_reformatted_rv")
    dbt.source("trase-storage-raw", "slaughterhouses_brazil_2018_09_18_shp")
    dbt.source("trase-storage-raw", "cnpj_all_20190102")

    raise NotImplementedError()
    return pd.DataFrame({"hello": ["world"]})