These facilities have very high volumes in 2019 BoL. They are slaughterhouses and processing facilities in the south.
# Which facilities have very large volumes in BoL?
# JBS in LINS and MOZARLANDIA
over_represented_bol <-
cnpj_vol_comparison %>%
filter(SUM_VOL > 75e06) %>%
pull(cnpj)
sif %>%
filter(CNPJ %in% c(over_represented_bol)) %>%
distinct(SIF, COMPANY_CLEAN, STATE, MUNICIPALITY, LEVEL, DATE_RESERVED, DATE_REGISTERED) %>%
kable()
| SIF | COMPANY_CLEAN | STATE | MUNICIPALITY | LEVEL | DATE_RESERVED | DATE_REGISTERED |
|---|---|---|---|---|---|---|
| 4507 | JBS | GO | MOZARLANDIA | 2 | 24/05/2000 | 17/07/2008 |
| 337 | JBS | SP | LINS | 2 | 14/04/1977 | 14/10/1996 |
These facilities have 5x higher volume in 2019 than 2017 data:
# Which facilities over-appear in BoL?
# ... i.e. BoL 5x CD volumne
over_represented_bol <-
full_join(bol_cnpj, cd_cnpj, by = "cnpj", suffix = c("_BOL", "_CD")) %>%
filter(cnpj != "00000000000000") %>%
mutate(BOL_VS_CD = SUM_VOL_BOL / SUM_VOL_CD) %>%
filter(BOL_VS_CD > 5) %>%
pull(cnpj)
sif %>%
filter(CNPJ %in% over_represented_bol) %>%
distinct(SIF, COMPANY_CLEAN, STATE, MUNICIPALITY, LEVEL, DATE_RESERVED, DATE_REGISTERED) %>%
kable()
| SIF | COMPANY_CLEAN | STATE | MUNICIPALITY | LEVEL | DATE_RESERVED | DATE_REGISTERED |
|---|---|---|---|---|---|---|
| 2007 | MARFRIG GLOBAL FOODS | RS | ALEGRETE | 2 | 29/06/1976 | 23/02/2007 |
| 3448 | FRIGORIFICO NORDESTE ALIMENTOS | BA | TEIXEIRA DE FREITAS | 2 | 29/07/1992 | 30/07/2015 |
| 137 | FBZ COMERCIO DE CARNES | GO | CIDADE OCIDENTAL | 2 | 15/10/1990 | 31/07/2015 |
| 267 | INDUSTRIA E COMERCIO DE ALIMENTOS SUPREMO | MG | CAMPO BELO | 2 | 25/11/1948 | 25/11/1948 |
| 3215 | PLENA ALIMENTOS | TO | PARAISO DO TOCANTINS | 2 | 12/11/2002 | 06/08/2015 |
| 1438 | FRIGORIFICO NOSSO | SP | BARUERI | 3 | 02/02/2005 | 14/08/2015 |
These facilities have 5x higher volume in 2017 than 2019:
# Which facilities over-appear in BoL?
# ... i.e. CD 5x BoL volume
over_represented_cd <-
full_join(bol_cnpj, cd_cnpj, by = "cnpj", suffix = c("_BOL", "_CD")) %>%
filter(cnpj != "00000000000000") %>%
mutate(BOL_VS_CD = SUM_VOL_BOL / SUM_VOL_CD) %>%
filter(BOL_VS_CD < 0.2) %>%
pull(cnpj)
sif %>%
filter(CNPJ %in% over_represented_cd) %>%
distinct(SIF, COMPANY_CLEAN, STATE, MUNICIPALITY, LEVEL, DATE_RESERVED, DATE_REGISTERED) %>%
kable()
| SIF | COMPANY_CLEAN | STATE | MUNICIPALITY | LEVEL | DATE_RESERVED | DATE_REGISTERED |
|---|---|---|---|---|---|---|
| 1156 | PAMPLONA ALIMENTOS | SC | RIO DO SUL | 2 | 17/12/1974 | 13/08/2015 |
| 1163 | FRIGORIFICO BIG BOI | PR | PAICANDU | 2 | 05/12/1989 | 07/08/2015 |
| 4625 | FRIGORIFICO PARAISO | TO | PARAISO DO TOCANTINS | 2 | 26/01/2001 | 06/08/2015 |
| 4293 | MAXI BEEF ALIMENTOS DO BRASIL | MG | CARLOS CHAGAS | 2 | 10/07/1998 | 06/08/2015 |
| 1883 | FRIGORIFICO VALE DO SAPUCAI | MG | ITAJUBA | 2 | 10/12/1975 | 21/10/2013 |
| 2231 | PREMIUM FOODS BRASIL | NA | NA | 2 | 19/04/2017 | 19/04/2017 |
| 3557 | CAJURU INDUSTRIA E COMERCIO DE ALIMENTOS | SP | CAJURU | 3 | 04/03/1993 | 14/08/2015 |
| 1002 | MELLORE ALIMENTOS | MG | BETIM | 3 | 28/10/2015 | 28/10/2015 |
# Prepare summary tables
bol_2019_state <-
bol_2019 %>%
mutate(hs6 = str_pad(hs6, side = "left", width = 6, pad = "0")) %>%
mutate(MATCHED = ifelse(branch == 0.0, F, T)) %>%
group_by(MATCHED, hs6) %>%
mutate(TOTAL_VOL = sum(vol)) %>%
group_by(MATCHED, hs6, state) %>%
summarise(PERC_VOL = sum(vol) / unique(TOTAL_VOL) * 100) %>%
ungroup() %>%
filter(state %in% names(state_colors))
cd_2017_state <-
cd_2017 %>%
mutate(ncm = str_pad(ncm, side = "left", width = 8, pad = "0")) %>%
mutate(hs6 = str_sub(ncm, start = 1, end = 6)) %>%
group_by(hs6) %>%
mutate(TOTAL_VOL = sum(vol)) %>%
group_by(state, hs6) %>%
summarise(PERC_VOL = sum(vol) / unique(TOTAL_VOL) * 100) %>%
ungroup() %>%
mutate(MATCHED = TRUE) %>%
filter(state %in% names(state_colors))
# Plot
bind_rows(cd_2017_state, bol_2019_state, .id = "SOURCE") %>%
mutate(SOURCE = ifelse(SOURCE == "1", "CD 2017", "BoL 2019" )) %>%
ggplot(aes(SOURCE, PERC_VOL, fill = state)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw() +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.y = element_blank()) +
facet_grid(cols = vars(hs6),
scales = "free_y") +
scale_fill_manual(values = state_colors) +
labs(title = "State of origin per HS6 code",
subtitle = "Unmatched have HS6 code UNKNOWN")
TBD.