Script to make a clean version of bovine SIGSIF data on the municipal origin of cattle slaughtered in SIF slaughterhouses per state.
The output has the columns: STATE_SLAUGHTER
GEOCODE PROP_FLOWS QUANTITY
BIOME
# [A] Load raw SIGSIF for 2015-2019
# Note: these data are semi_colon delimted
path <- "brazil/production/statistics/sigsif/out/"
keys <- map_chr(get_bucket(ts, prefix = path), "Key")
keys_semi_colon <- keys[grepl("SIF_2015_2017|SIF_2018|SIF_2019", keys)]
keys_semi_colon <- keys_semi_colon[grepl("csv", keys_semi_colon)]
sigsif_2015_2019 <-
map_df(keys_semi_colon,
~ s3read_using(
object = .x,
FUN = read_delim,
col_types = cols(GEOCODE = col_character()),
delim = ";",
bucket = ts,
opts = c("check_region" = T)
)
)
# [B] Load raw SIGSIF for 2020 & 2021
# Note: these data are comma delimted
keys_comma <- keys[grepl("SIF_2020|SIF_2021", keys)]
keys_comma <- keys_comma[grepl("csv", keys_comma)]
sigsif_2020_2021 <-
map_df(keys_comma,
~ s3read_using(
object = .x,
FUN = read_delim,
col_types = cols(GEOCODE = col_character()),
delim = ",",
bucket = ts,
opts = c("check_region" = T)
)
)
# [C] Join 2015-2021 data together
sigsif <- bind_rows(sigsif_2015_2019, sigsif_2020_2021)
# Fix missing GEOCODEs
sigsif <- sigsif %>%
mutate(GEOCODE = case_when(
is.na(GEOCODE) & MUNICIPALITY == "MIRASSOL D OESTE" ~ "5105622",
TRUE ~ GEOCODE
))
stopifnot(any(is.na(sigsif$GEOCODE)) == FALSE)
# [D] Load spatial data
spatial_path <- "brazil/spatial/BOUNDARIES/ibge/old/boundaries municipalities/"
munis <- get_object(paste0(spatial_path, "MUNICIPALITIES_BRA.geojson"), "trase-storage", check_region = T)
munis <- read_sf(rawToChar(munis))
brazil <- st_union(munis)
muni_centroids <- st_centroid(munis)
# muni/biome classifications
muni_biomes <-
read_delim(
get_object("brazil/spatial/BOUNDARIES/boundaries biomes/old/MUNIC_BIOME.csv",
"trase-storage",
check_region = T),
col_types = cols(GEOCODMU = col_character()),
delim = ";"
) %>%
select(GEOCODE = GEOCODMU,
BIOME)
# tidy up
rm(path, key, keys_comma, keys_semi_colon, sigsif_2015_2019, sigsif_2020_2021, spatial_path)
First, we drop the 2021 SIGSIF data - which is woefully incomplete! (Only having ca. 10% of the expected number of slaughtered head).
# Check for consistency in SIGSIF data across years
sigsif %>%
group_by(YEAR, TYPE) %>%
summarise(SUM_HEAD = sum(QUANTITY)) %>%
ungroup() %>%
drop_na(TYPE) %>%
ggplot(aes(YEAR, SUM_HEAD)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~TYPE, scales = "free_y") +
theme_classic() +
labs(title = "2021 data on slaughtered heads are incomplete",
subtitle = "We therefore drop these data to focus on 2015-2020")
# Drop extra years
sigsif <- sigsif %>%
drop_na(TYPE) %>%
filter(YEAR != 2021)
Here I check how consistent the data are between years, by evaluating
the R-sq of a model of PROP_FLOWS per GEOCODE
(i.e. any variation is because of changes in sourcing between
years).
# Identify the proportion per GEOCODE, per STATE_SLAUGHTER
sigsif_bov_annual <-
sigsif %>%
filter(TYPE == "CATTLE") %>%
group_by(YEAR, STATE_SLAUGHTER) %>%
mutate(TOTAL_SLAUGHTERED_HEADS = sum(QUANTITY)) %>%
group_by(YEAR, STATE_SLAUGHTER, GEOCODE) %>%
summarise(PROP_FLOWS = sum(QUANTITY) / unique(TOTAL_SLAUGHTERED_HEADS)) %>%
ungroup()
extract_sq_per_state <- function(state_name, data) {
data <- data %>% filter(STATE_SLAUGHTER == state_name)
if(length(unique(data$YEAR)) > 1 & length(unique(data$GEOCODE)) > 1) {
m1 <-
lm(data = data,
PROP_FLOWS ~ GEOCODE)
model_output <- summary(m1)
model_r_sq <- model_output$r.squared
tibble("STATE" = state_name,
"R_SQ" = model_r_sq)
} else {
NULL
}
}
res <-
map_df(
.x = unique(sigsif_bov_annual$STATE_SLAUGHTER),
.f = extract_sq_per_state,
data = sigsif_bov_annual
)
The sourcing is very consistent across years (R-sq > 0.9) for most states, except SE (0.75), SC (0.85), and SP (0.86).
# Visualise the consistency across years
ggplot(data = res,
aes(STATE, R_SQ)) +
geom_point() +
scale_y_continuous(limits = c(0, 1)) +
theme_bw() +
geom_hline(yintercept = 0.9, linetype = "dashed") +
labs(title = "R-sq of municipal-level sourcing of SIFs per state",
subtitle = "R-sq = 0.9 shown as dashed line")
An example of the variation between years is in SP, where sourcing from PA and RO appears in some years, but not others
chk <- inner_join(munis, sigsif_bov_annual %>% filter(STATE_SLAUGHTER == "SP"), by = "GEOCODE")
ggplot() +
geom_sf(data = brazil, fill = "lightgrey") +
geom_sf(data = chk, aes(fill = PROP_FLOWS), col = NA) +
facet_wrap(~YEAR) +
theme_minimal() +
coord_sf(datum = NA) +
scale_fill_viridis(option="magma",
name = "% SOURCING",
na.value="white",
direction = -1)
I choose to aggregate these data across years (i.e. have one municipal-level supply shed per state), because:
# Make cleaned output dataset
# Identify the proportion per GEOCODE, per STATE_SLAUGHTER
sigsif_bov_summary <-
sigsif %>%
filter(TYPE == "CATTLE") %>%
group_by(STATE_SLAUGHTER) %>%
mutate(TOTAL_SLAUGHTERED_HEADS = sum(QUANTITY)) %>%
group_by(STATE_SLAUGHTER, GEOCODE) %>%
summarise(SUM_QUANTITY = sum(QUANTITY),
PROP_FLOWS = SUM_QUANTITY / unique(TOTAL_SLAUGHTERED_HEADS)) %>%
ungroup()
Below I plot these supply sheds, as a visual check that the data are OK:
# Check it looks OK
plot_sourcing <- function(state_name, data){
munis_to_plot <-
inner_join(munis, sigsif_bov_summary %>% filter(STATE_SLAUGHTER == state_name), by = "GEOCODE")
p <- ggplot() +
geom_sf(data = brazil, fill = 'grey') +
geom_sf(data = munis_to_plot,
aes(fill = PROP_FLOWS * 100)) +
labs(title = paste("Municipal-level sourcing of SIFs in", state_name)) +
theme_minimal() +
coord_sf(datum = NA) +
scale_fill_viridis(option="magma",
name = "% SOURCING",
na.value="white",
direction = -1) +
rm(munis_to_plot)
return(p)
}
map(
.x = unique(sigsif_bov_summary$STATE_SLAUGHTER),
.f = plot_sourcing,
data = sigsif_bov_summary
)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
And then export the data.
Note: before I export the data, I add the BIOME because this
information is required in SEI-PCS when aggregating small flows (to
avoid losing information about deforestation rates across biomes). I
also rename the SUM_QUANTITY column to match the
br_bovine_slaughter_2015_2019_addQuantity.csv file S3.
# Add biome
sigsif_bov_summary <-
left_join(sigsif_bov_summary, muni_biomes, by = "GEOCODE") %>%
mutate(BIOME =
case_when(GEOCODE == "4212650" ~ "MATA ATLANTICA",
TRUE ~ BIOME)
)
stopifnot(!any(is.na(sigsif_bov_summary$BIOME)))
# export
sigsif_bov_summary %>%
rename(QUANTITY = SUM_QUANTITY) %>%
s3write_using(
x = .,
object = "brazil/production/statistics/sigsif/out/br_bovine_slaughter_2015_2020.csv",
FUN = write_delim,
delim = ";",
bucket = ts,
opts = c("check_region" = T)
)