Compare volumes per exporting CNPJ

Look at large mis-matches

These facilities have very high volumes in 2019 BoL. They are slaughterhouses and processing facilities in the south.

# Which facilities have very large volumes in BoL?
# JBS in LINS and MOZARLANDIA
over_represented_bol <- 
  cnpj_vol_comparison %>% 
  filter(SUM_VOL > 75e06) %>%
  pull(cnpj)
sif %>%
  filter(CNPJ %in% c(over_represented_bol)) %>%
  distinct(SIF, COMPANY_CLEAN, STATE, MUNICIPALITY, LEVEL, DATE_RESERVED, DATE_REGISTERED) %>%
  kable()
SIF COMPANY_CLEAN STATE MUNICIPALITY LEVEL DATE_RESERVED DATE_REGISTERED
4507 JBS GO MOZARLANDIA 2 24/05/2000 17/07/2008
337 JBS SP LINS 2 14/04/1977 14/10/1996

These facilities have 5x higher volume in 2019 than 2017 data:

# Which facilities over-appear in BoL?
# ... i.e. BoL 5x CD volumne
over_represented_bol <- 
  full_join(bol_cnpj, cd_cnpj, by = "cnpj", suffix = c("_BOL", "_CD")) %>%
  filter(cnpj != "00000000000000") %>%
  mutate(BOL_VS_CD = SUM_VOL_BOL / SUM_VOL_CD) %>%
  filter(BOL_VS_CD > 5) %>%
  pull(cnpj)
sif %>%
  filter(CNPJ %in% over_represented_bol) %>%
  distinct(SIF, COMPANY_CLEAN, STATE, MUNICIPALITY, LEVEL, DATE_RESERVED, DATE_REGISTERED) %>%
   kable()
SIF COMPANY_CLEAN STATE MUNICIPALITY LEVEL DATE_RESERVED DATE_REGISTERED
2007 MARFRIG GLOBAL FOODS RS ALEGRETE 2 29/06/1976 23/02/2007
3448 FRIGORIFICO NORDESTE ALIMENTOS BA TEIXEIRA DE FREITAS 2 29/07/1992 30/07/2015
137 FBZ COMERCIO DE CARNES GO CIDADE OCIDENTAL 2 15/10/1990 31/07/2015
267 INDUSTRIA E COMERCIO DE ALIMENTOS SUPREMO MG CAMPO BELO 2 25/11/1948 25/11/1948
3215 PLENA ALIMENTOS TO PARAISO DO TOCANTINS 2 12/11/2002 06/08/2015
1438 FRIGORIFICO NOSSO SP BARUERI 3 02/02/2005 14/08/2015

These facilities have 5x higher volume in 2017 than 2019:

 # Which facilities over-appear in BoL?
# ... i.e. CD 5x BoL volume
over_represented_cd <- 
  full_join(bol_cnpj, cd_cnpj, by = "cnpj", suffix = c("_BOL", "_CD")) %>%
  filter(cnpj != "00000000000000") %>%
  mutate(BOL_VS_CD = SUM_VOL_BOL / SUM_VOL_CD) %>%
  filter(BOL_VS_CD < 0.2) %>%
  pull(cnpj)
sif %>%
  filter(CNPJ %in% over_represented_cd) %>%
  distinct(SIF, COMPANY_CLEAN, STATE, MUNICIPALITY, LEVEL, DATE_RESERVED, DATE_REGISTERED) %>%
   kable()
SIF COMPANY_CLEAN STATE MUNICIPALITY LEVEL DATE_RESERVED DATE_REGISTERED
1156 PAMPLONA ALIMENTOS SC RIO DO SUL 2 17/12/1974 13/08/2015
1163 FRIGORIFICO BIG BOI PR PAICANDU 2 05/12/1989 07/08/2015
4625 FRIGORIFICO PARAISO TO PARAISO DO TOCANTINS 2 26/01/2001 06/08/2015
4293 MAXI BEEF ALIMENTOS DO BRASIL MG CARLOS CHAGAS 2 10/07/1998 06/08/2015
1883 FRIGORIFICO VALE DO SAPUCAI MG ITAJUBA 2 10/12/1975 21/10/2013
2231 PREMIUM FOODS BRASIL NA NA 2 19/04/2017 19/04/2017
3557 CAJURU INDUSTRIA E COMERCIO DE ALIMENTOS SP CAJURU 3 04/03/1993 14/08/2015
1002 MELLORE ALIMENTOS MG BETIM 3 28/10/2015 28/10/2015

Geographic coverage of matched BoL

# Prepare summary tables
bol_2019_state <- 
  bol_2019 %>%
  mutate(hs6 = str_pad(hs6, side = "left", width = 6, pad = "0")) %>%
  mutate(MATCHED = ifelse(branch == 0.0, F, T)) %>%
  group_by(MATCHED, hs6) %>%
  mutate(TOTAL_VOL = sum(vol)) %>%
  group_by(MATCHED, hs6, state) %>%
  summarise(PERC_VOL = sum(vol) / unique(TOTAL_VOL) * 100) %>%
  ungroup() %>%
  filter(state %in% names(state_colors))
cd_2017_state <- 
  cd_2017 %>%
  mutate(ncm = str_pad(ncm, side = "left", width = 8, pad = "0")) %>%
  mutate(hs6 = str_sub(ncm, start = 1, end = 6)) %>%
  group_by(hs6) %>%
  mutate(TOTAL_VOL = sum(vol)) %>%
  group_by(state, hs6) %>%
  summarise(PERC_VOL = sum(vol) / unique(TOTAL_VOL) * 100) %>%
  ungroup() %>% 
  mutate(MATCHED = TRUE) %>%
  filter(state %in% names(state_colors))


# Plot
bind_rows(cd_2017_state, bol_2019_state, .id = "SOURCE") %>%
  mutate(SOURCE = ifelse(SOURCE == "1", "CD 2017", "BoL 2019" )) %>%
  ggplot(aes(SOURCE, PERC_VOL, fill = state)) + 
  geom_bar(stat = "identity") + 
  coord_flip() + 
  theme_bw() + 
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1), 
        axis.title.y = element_blank()) +
  facet_grid(cols = vars(hs6), 
             scales = "free_y") +
  scale_fill_manual(values = state_colors) +
  labs(title = "State of origin per HS6 code",
       subtitle = "Unmatched have HS6 code UNKNOWN")

Proportion per company

TBD.