This is an in progress workbook with insights on the global cocoa trade.

# Load customs data
# ... the 'main' specification
cd <- 
  s3read_using(
  object = "world/cocoa/trade/cd/export/global/2021-02-01-GLOBAL_EXPORT_DATA_MAIN.csv",
  FUN = read_delim, 
  col_types = cols(HS6 = col_character(),
                   YEAR = col_character()),
  delim = ";",
  bucket = "trase-storage",
  opts = c("check_region" = T)
)
## Warning: The following named parsers don't match the column names: HS6
# And the 'auxilliary' specification
# ... with a slightly different classification of the 'TRADER'
cd_aux <- 
  s3read_using(
  object = "world/cocoa/trade/cd/export/global/2021-02-01-GLOBAL_EXPORT_DATA_AUX.csv",
  FUN = read_delim, 
  col_types = cols(HS6 = col_character(),
                   YEAR = col_character()),
  delim = ";",
  bucket = "trase-storage",
  opts = c("check_region" = T)
)
## Warning: The following named parsers don't match the column names: HS6

Marketshare

The market share of the big ‘7’ traders differs between countries.

We find no exports for these companies in Colombia and Peru - we need to check if we’re not missing any subsidiaries in these cases.

# Arrange in order of size.
# ... check Colombia and Peru traders for subsidiaries
cd %>%
  mutate(TRADER_GROUP = ifelse(is.na(TRADER_GROUP), "UNKNOWN", TRADER_GROUP)) %>%
  group_by(COUNTRY_OF_ORIGIN) %>% 
  mutate(TOTAL_PRODUCT_TONS = sum(PRODUCT_TONS)) %>%
  group_by(COUNTRY_OF_ORIGIN, TRADER_GROUP) %>% 
  summarise(SUM_PRODUCT_TONS = sum(PRODUCT_TONS), 
            PERC_TONS = SUM_PRODUCT_TONS / unique(TOTAL_PRODUCT_TONS) * 100) %>%
  ungroup() %>%
  mutate(TRADER_GROUP = fct_lump(TRADER_GROUP, w = SUM_PRODUCT_TONS, n = 7)) %>%
  ggplot(aes(x = "", PERC_TONS, fill = TRADER_GROUP)) + 
  geom_bar(stat = "identity") +
  coord_polar("y", start=0) +
  theme_minimal() + 
  facet_wrap(~COUNTRY_OF_ORIGIN, nrow = 2) +
  scale_fill_manual(values = cbPalette) +
  theme(axis.title.y = element_blank(), 
        axis.title.x = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") 

Below the data are plotted for the ‘auxiliary’ trader classification. There are now some big traders in Colombia, but all importers are unknown for Cameroon (this suggests we should drop the ‘auxiliary’ trader classification for Cameroon - to be implemented).

# Arrange in order of size.
# ... check Colombia and Peru traders for subsidiaries
cd_aux %>%
  mutate(TRADER_GROUP = ifelse(is.na(TRADER_GROUP), "UNKNOWN", TRADER_GROUP)) %>%
  group_by(COUNTRY_OF_ORIGIN) %>% 
  mutate(TOTAL_PRODUCT_TONS = sum(PRODUCT_TONS)) %>%
  group_by(COUNTRY_OF_ORIGIN, TRADER_GROUP) %>% 
  summarise(SUM_PRODUCT_TONS = sum(PRODUCT_TONS), 
            PERC_TONS = SUM_PRODUCT_TONS / unique(TOTAL_PRODUCT_TONS) * 100) %>%
  ungroup() %>%
  mutate(TRADER_GROUP = fct_lump(TRADER_GROUP, w = SUM_PRODUCT_TONS, n = 7)) %>%
  ggplot(aes(x = "", PERC_TONS, fill = TRADER_GROUP)) + 
  geom_bar(stat = "identity") +
  coord_polar("y", start=0) +
  theme_minimal() + 
  facet_wrap(~COUNTRY_OF_ORIGIN, nrow = 2) +
  scale_fill_manual(values = cbPalette) +
  theme(axis.title.y = element_blank(), 
        axis.title.x = element_blank(),
        axis.text = element_blank(),
        legend.position = "bottom") 

To do: do web searches for the company relationships of the top 10 exporting companies from Colombia and Peru. These are listed below.

cd %>%
  filter(COUNTRY_OF_ORIGIN %in% c("COLOMBIA", "PERU")) %>%
  group_by(COUNTRY_OF_ORIGIN) %>% 
  mutate(TOTAL_PRODUCT_TONS = sum(PRODUCT_TONS)) %>%
  group_by(COUNTRY_OF_ORIGIN, TRADER_GROUP) %>% 
  summarise(SUM_PRODUCT_TONS = sum(PRODUCT_TONS), 
            PERC_TONS = SUM_PRODUCT_TONS / unique(TOTAL_PRODUCT_TONS) * 100) %>%
  ungroup() %>%
  group_by(COUNTRY_OF_ORIGIN) %>%
  arrange(desc(PERC_TONS)) %>%
  top_n(n = 10, wt = PERC_TONS) %>%
  kable()
COUNTRY_OF_ORIGIN TRADER_GROUP SUM_PRODUCT_TONS PERC_TONS
COLOMBIA SUCESORES DE JOSE JESUS RESTREPO Y CIA 4745.1027 35.845683
COLOMBIA GRUPO NUTRESA 2778.1573 20.986890
PERU AMAZONAS TRADING PERU SAC 12688.3360 17.321046
PERU MACHU PICCHU TRADING SAC 10979.5921 14.988413
PERU CAFETALERA AMAZONICA SAC 9565.3140 13.057760
COLOMBIA FED NAL DE CACAOTEROS 1701.3500 12.852420
COLOMBIA CIA COL AGROINDL 1598.5920 12.076161
PERU EXPORTADORA ROMEX 8357.6860 11.409208
PERU SUMAQAO 5050.8400 6.894981
COLOMBIA SURCACAO 620.5000 4.687411
COLOMBIA COLCOCOA 556.5734 4.204494
PERU COOPERATIVA AGRARIA CACAOTERA ACOPAGRO 3042.1860 4.152936
PERU CIA NACIONAL DE CHOCOLATES DE PERU 2200.0000 3.003255
PERU BLUE PACIFIC 2171.3452 2.964137
PERU ASOCIACION DE PRODUCTORES CACAO ALTO HU 1625.0890 2.218434
PERU COOPERATIVA DE SERVICIOS MULTIPLES NORAN 1203.8340 1.643373
COLOMBIA M Y M TRADING 213.0600 1.609508
COLOMBIA ASOC DE ORGANIZACIONES PRODUCTORAS DE CACAO DEL NUDO 212.5000 1.605278
COLOMBIA D ORIGENN 190.0000 1.435307
COLOMBIA MARIANA COCOA EXPORT 153.5000 1.159577

Prices

Compare prices per country for raw beans:

cd %>% 
  filter(TYPE == "COCOA BEANS") %>%
  mutate(FOB_PER_TON = FOB / PRODUCT_TONS) %>% 
  filter(FOB_PER_TON <= 6000) %>%
   mutate(COUNTRY_OF_ORIGIN = fct_reorder(COUNTRY_OF_ORIGIN, FOB_PER_TON, .fun='mean')) %>%
  ggplot(aes(COUNTRY_OF_ORIGIN, FOB_PER_TON)) + 
  # geom_violin() +
  geom_boxplot() +
  coord_flip() + 
  labs(title = "FOB prices for cocoa beans paid per company",
       subtitle = "Excludes outliers > 6000 USD/TON")

This shows that FOB prices in general: Brazil > Peru > Ghana > Colombia > Ecuador > Cote d’Ivoire > Cameroon.

And you can these different prices played out per company:

cd %>% 
  filter(TYPE == "COCOA BEANS") %>%
  mutate(TRADER_GROUP = fct_lump(TRADER_GROUP, w = PRODUCT_TONS, n = 6)) %>%
  filter(TRADER_GROUP != "Other") %>%
  mutate(FOB_PER_TON = FOB / PRODUCT_TONS) %>% 
  filter(FOB_PER_TON <= 6000) %>%
  ggplot(aes(TRADER_GROUP, FOB_PER_TON, fill = COUNTRY_OF_ORIGIN)) + 
  # geom_violin() +
  geom_boxplot() + 
  coord_flip() + 
  facet_wrap(~TRADER_GROUP, scale = "free_y") +
  labs(title = "FOB prices for cocoa beans paid per company",
       subtitle = "Excludes outliers > 6000 USD/TON")

And there some low value outliers in the Cote d’Ivoire data.