View or edit on GitHub
This page is synchronized from trase/models/brazil/soy_2021_qa/north_states.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
import pandas as pd
from trase.tools import CNX
from trase.tools.pandasdb.query import query_with_dataframe
from trase.tools.pandasdb import read
from trase.tools.aws.aws_helpers_cached import get_pandas_df_once
from trase.tools.aws.aws_helpers import read_json
from psycopg2 import sql
import numpy as np
import plotly.express as px
import numpy as np
import pandas as pd
import tabulate
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure
import seaborn as sns
import seaborn.objects as so
1. Prepare data for analysis
# configure to display numbers as plain decimal numbers with no special formatting
pd.set_option("float_format", "{:f}".format)
# set variables
DICT_STATES_NUM = {
"BR-43": "RIO GRANDE DO SUL",
"BR-35": "SAO PAULO",
"BR-50": "MATO GROSSO DO SUL",
"BR-33": "RIO DE JANEIRO",
"BR-41": "PARANA",
"BR-15": "PARA",
"BR-31": "MINAS GERAIS",
"BR-11": "RONDONIA",
"BR-51": "MATO GROSSO",
"BR-52": "GOIAS",
"BR-42": "SANTA CATARINA",
"BR-21": "MARANHAO",
"BR-29": "BAHIA",
"BR-53": "DISTRITO FEDERAL",
"BR-32": "ESPIRITO SANTO",
"BR-12": "ACRE",
"BR-XX": "UNKNOWN",
"BR-23": "CEARA",
"BR-14": "RORAIMA",
"BR-16": "AMAPA",
"BR-25": "PARAIBA",
"BR-26": "PERNAMBUCO",
"BR-22": "PIAUI",
"BR-24": "RIO GRANDE DO NORTE",
"BR-17": "TOCANTINS",
"BR-28": "SERGIPE",
"BR-27": "ALAGOAS",
"BR-13": "AMAZONAS",
}
NORTH = ["ACRE", "AMAZONAS", "RORAIMA", "RONDONIA", "PARA", "TOCANTINS", "AMAPA"]
# load datasets
df_mun = pd.read_csv(
f"/usr/share/TRASE/trase/models/brazil/soy/2020/downloaded/municipality.csv",
dtype=str,
sep=";",
)
df_2016 = get_pandas_df_once(
"brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2016.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_2017 = get_pandas_df_once(
"brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2017.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_2018 = get_pandas_df_once(
"brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2018.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_2019 = get_pandas_df_once(
"brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2019.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_2020 = get_pandas_df_once(
"brazil/soy/sei_pcs/v2.6.0/SEIPCS_BRAZIL_SOY_2020.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_2021 = get_pandas_df_once(
"brazil/soy/sei_pcs/v2.6.1/SEIPCS_BRAZIL_SOY_2021.csv",
encoding="utf8",
sep=";",
dtype=str,
keep_default_na=False,
)
df_concat = pd.concat(
[df_2016, df_2017, df_2018, df_2019, df_2020, df_2021], ignore_index=True
)
df_concat["VOLUME_RAW"] = df_concat["VOLUME_RAW"].astype(float).round(4)
df_concat["VOLUME_PRODUCT"] = df_concat["VOLUME_PRODUCT"].astype(float).round(4)
# merge df_concat to df_mun
hist_data = pd.merge(
df_concat, df_mun, left_on="LVL6_TRASE_ID_PROD", right_on="trase_id", how="inner"
)
# map states
hist_data["state"] = hist_data["state_trase_id"].map(DICT_STATES_NUM)
# rename columns
hist_data = hist_data.rename(
columns={"YEAR": "base_year", "VOLUME_PRODUCT": "vol", "CNPJ8": "cnpj8"}
)
def plot_volume_per_year(df: pd.DataFrame, source: str = "s3") -> None:
"""
Plot the sum of 'VOLUME_RAW' column per year in 'YEAR' column of the DataFrame
Parameters:
- sei_pcs: DataFrame with 'YEAR' and 'VOLUME_RAW' columns
:return: None
"""
if source == "s3":
grouped_df = df.groupby(["YEAR"]).sum().reset_index()
else:
grouped_df = query_for_plots()
fig = px.bar(
grouped_df,
x="YEAR",
y="VOLUME_RAW",
title="Volume raw per year",
color_discrete_sequence=[MAIN_COLOR],
)
fig.update_layout(
xaxis_title="Year", yaxis_title="Volume(Tonnes)", showlegend=False
)
fig = add_anotations_to_plot(fig, "Source: SEI-PCS model from Splitgraph")
fig.show()
plot_volume_per_year(sei_pcs_s3)
2. Create dictionary for groups of exporters based on CNPJ8
# create cnpj8 column
hist_data["cnpj8"] = hist_data["CNPJ"].str[0:8]
# find distinct cnpj8 of controller companies (xxxxxxxx0001-xx)
parent = hist_data[hist_data["CNPJ"].str[8:12] == "0001"]
parent_cnpj8 = parent.copy()
# create subset
subset_parent_cnpj8 = parent_cnpj8[["EXPORTER", "cnpj8", "CNPJ", "vol"]]
# check unique values of cnpj8
subset_parent_cnpj8["cnpj8"].nunique()
# Resolve mispelling issues
# find amount of labels identified for each cnpj8
parent = (
subset_parent_cnpj8.groupby(["cnpj8"]).agg({"EXPORTER": "nunique"}).reset_index()
)
parent[parent["EXPORTER"] > 1]
# find names versions
subset_parent_cnpj8[subset_parent_cnpj8["cnpj8"] == "00012377"].EXPORTER.unique()
# find full cnpj and check it against cnpj official register
subset_parent_cnpj8[subset_parent_cnpj8["cnpj8"] == "00012377"].CNPJ.unique()
# result
MAIN_COMPANIES_DICT = {
"JOSE DINON & CIA LTDA": "JOSE DINON & CIA LTDA",
"JOSE DINON FILHOS & CIA LTDA": "JOSE DINON & CIA LTDA",
"BALDO SA COMERCIO INDUSTRIA E EXPORTACAO": "BALDO SA COMERCIO INDUSTRIA E EXPORTACAO",
"BALDO SA COMERCIO INDUSTRIA & EXPORTACAO": "BALDO SA COMERCIO INDUSTRIA E EXPORTACAO",
"OLFAR S/A - ALIMENTO E ENERGIA": "OLFAR S/A - ALIMENTO E ENERGIA",
"OLFAR SA ALIMENTO & ENERGIA": "OLFAR S/A - ALIMENTO E ENERGIA",
"INDUSTRIA VITORIA LTDA - ME": "INDUSTRIA VITORIA LTDA",
"INDUSTRIA VITORIA LTDA": "INDUSTRIA VITORIA LTDA",
"BUNGE ALIMENTOS S/A": "BUNGE ALIMENTOS S/A",
"BUNGE ALIMENTOS SA": "BUNGE ALIMENTOS S/A",
"BIANCHINI SA INDUSTRIA COMERCIO E AGRICULTURA": "BIANCHINI SA INDUSTRIA COMERCIO E AGRICULTURA",
"BIANCHINI SA INDUSTRIA COMERCIO & AGRICULTURA": "BIANCHINI SA INDUSTRIA COMERCIO E AGRICULTURA",
"COTRIJUC - COOPERATIVA AGROPECUARIA JULIO DE CASTILHOS": "COTRIJUC - COOPERATIVA AGROPECUARIA JULIO DE CASTILHOS",
"COOPERATIVA AGROPECUARIA DE JULIO DE CASTILHOS - COTRIJUC": "COTRIJUC - COOPERATIVA AGROPECUARIA JULIO DE CASTILHOS",
"COOPERATIVA TRITICOLA DE JULIO DE CASTILHOS LTDA": "COTRIJUC - COOPERATIVA AGROPECUARIA JULIO DE CASTILHOS",
"ACO MINERACAO LIMITADA": "ACO MINERACAO LIMITADA",
"ACO MINERACAO LTDA": "ACO MINERACAO LIMITADA",
"MOINHO IGUACU AGROINDUSTRIAL SA": "CAVALCA TRADING E LOGISTICA S/A.",
"CAVALCA TRADING & LOGISTICA": "CAVALCA TRADING E LOGISTICA S/A.",
"MOINHO IGUACU AGROINDUSTRIAL S.A.": "CAVALCA TRADING E LOGISTICA S/A.",
"I RIEDI & CIA LTDA": "I. RIEDI & CIA. LTDA.",
"I. RIEDI & CIA. LTDA.": "I. RIEDI & CIA. LTDA.",
"C.VALE - COOPERATIVA AGROINDUSTRIAL": "C.VALE - COOPERATIVA AGROINDUSTRIAL",
"C VALE COOPERATIVA AGROINDUSTRIAL": "C.VALE - COOPERATIVA AGROINDUSTRIAL",
"COCARI - COOPERATIVA AGROPECUARIA E INDUSTRIAL": "COCARI - COOPERATIVA AGROPECUARIA E INDUSTRIAL",
"COCARI COOPERATIVA AGROPECUARIA E INDUSTRIAL": "COCARI - COOPERATIVA AGROPECUARIA E INDUSTRIAL",
"BELAGRICOLA COMERCIO E REPRESENTACOES DE PRODUTOS AGRICOLAS S.A.": "BELAGRICOLA COMERCIO E REPRESENTACOES DE PRODUTOS AGRICOLAS S.A.",
"BELAGRICOLA COMERCIO & REPRESENTACOES DE PRODUTOS AGRIC": "BELAGRICOLA COMERCIO E REPRESENTACOES DE PRODUTOS AGRICOLAS S.A.",
"POLAR INDUSTRIA DE PLASTICOS LTDA": "POLAR INDUSTRIA DE PLASTICOS S/A",
"POLAR INDUSTRIA DE PLASTICOS S/A": "POLAR INDUSTRIA DE PLASTICOS S/A",
"LAURENTI EQUIPAMENTOS PARA PROCESSAMENTO DE DADOS EIRELI": "LAURENTI EQUIPAMENTOS PARA PROCESSAMENTO DE DADOS LTDA",
"LAURENTI EQUIPAMENTOS PARA PROCESSAMENTO DE DADOS LTDA": "LAURENTI EQUIPAMENTOS PARA PROCESSAMENTO DE DADOS LTDA",
"DINI TEXTIL INDUSTRIA E COMERCIO LTDA.": "DINI TEXTIL INDUSTRIA E COMERCIO LTDA.",
"DINI TEXTIL INDUSTRIA E COMERCIO LTDA": "DINI TEXTIL INDUSTRIA E COMERCIO LTDA.",
"VEGRANDE VEICULOS LTDA": "VEGRANDE VEICULOS LTDA",
"VEGRANDE VEICULOS CASAGRANDE SA": "VEGRANDE VEICULOS LTDA",
"COPACOL COOPERATIVA AGROINDUSTRIAL CONSOLATA": "COPACOL-COOPERATIVA AGROINDUSTRIAL CONSOLATA",
"COPACOL-COOPERATIVA AGROINDUSTRIAL CONSOLATA": "COPACOL-COOPERATIVA AGROINDUSTRIAL CONSOLATA",
"CSM-COMPONENTES SISTEMAS E MAQUINAS PARA CONSTRUCAO LTD": "CSM-COMPONENTES SISTEMAS E MAQUINAS PARA CONSTRUCAO LTD",
"CSM COMPONENTES SISTEMAS & MAQUINAS PARA CONSTRUCAO LTD": "CSM-COMPONENTES SISTEMAS E MAQUINAS PARA CONSTRUCAO LTD",
"TERRA COMERCIO E EXPORTACAO DE CEREAIS LTDA.": "TERRA COMERCIO E EXPORTACAO DE CEREAIS LTDA.",
"TERRA COMERCIO E EXPORTACAO DE CEREAIS LTDA": "TERRA COMERCIO E EXPORTACAO DE CEREAIS LTDA.",
"COAGRU COOPERATIVA AGROINDUSTRIAL UNIAO": "COAGRU COOPERATIVA AGROINDUSTRIAL UNIAO",
"COOPERATIVA AGROPECUARIA UNIAO LTDA COAGRU": "COAGRU COOPERATIVA AGROINDUSTRIAL UNIAO",
"AMAGGI EXPORTACAO & IMPORTACAO LTDA": "AMAGGI EXPORTACAO E IMPORTACAO LTDA",
"AMAGGI EXPORTACAO E IMPORTACAO LTDA": "AMAGGI EXPORTACAO E IMPORTACAO LTDA",
"USINA ACUCAREIRA S. MANOEL S/A.": "USINA ACUCAREIRA S. MANOEL S/A.",
"USINA ACUCAREIRA S MANOEL SA": "USINA ACUCAREIRA S. MANOEL S/A.",
"AESA AUTOMOLAS EQUIPAMENTOS LTDA.": "AESA AUTOMOLAS EQUIPAMENTOS LTDA.",
"AESA AUTOMOLAS EQUIPAMENTOS LTDA": "AESA AUTOMOLAS EQUIPAMENTOS LTDA.",
"CARGILL AGRICOLA SA": "CARGILL AGRICOLA S A",
"CARGILL AGRICOLA S A": "CARGILL AGRICOLA S A",
"DYTECH TECALON INDUSTRIA E COMERCIO DE AUTOPECAS S.A.": "S RIKO AUTOMOTIVE HOSE TECALON BRASIL S.A.",
"S RIKO AUTOMOTIVE HOSE TECALON BRASIL S.A.": "S RIKO AUTOMOTIVE HOSE TECALON BRASIL S.A.",
"MARUBENI BRASIL S A": "MARUBENI BRASIL S A",
"MARUBENI BRASIL SA": "MARUBENI BRASIL S A",
"USINA ACUCAREIRA ESTER SA": "USINA ACUCAREIRA ESTER S.A. ",
"USINA ACUCAREIRA ESTER S A": "USINA ACUCAREIRA ESTER S.A. ",
"LIOTECNICA TECNOLOGIA EM ALIMENTOS S.A.": "LIOTECNICA TECNOLOGIA EM ALIMENTOS S.A.",
"LIOTECNICA TECNOLOGIA EM ALIMENTOS LTDA": "LIOTECNICA TECNOLOGIA EM ALIMENTOS S.A.",
"PCO COMERCIO IMPORTACAO EXPORTACAO & AGROPECUARIA LTDA": "P C O- COMERCIO IMPORTACAO EXPORTACAO E AGROPECUARIA LTDA",
"P C O- COMERCIO IMPORTACAO EXPORTACAO E AGROPECUARIA LTDA": "P C O- COMERCIO IMPORTACAO EXPORTACAO E AGROPECUARIA LTDA",
"NEXA RECURSOS MINERAIS S.A.": "NEXA RECURSOS MINERAIS S.A.",
"NEXA RECURSOS MINERAIS SA": "NEXA RECURSOS MINERAIS S.A.",
"USINA SANTA FE SA": "USINA SANTA FE S/A.",
"USINA SANTA FE S/A.": "USINA SANTA FE S/A.",
"BIO SOJA INDUSTRIAS QUIMICAS E BIOLOGICAS LTDA.": "VITTIA S.A.",
"VITTIA FERTILIZANTES E BIOLOGICOS S.A.": "VITTIA S.A.",
"LOUIS DREYFUS COMPANY BRASIL SA": "LOUIS DREYFUS COMPANY BRASIL S.A.",
"LOUIS DREYFUS CO BRASIL SA": "LOUIS DREYFUS COMPANY BRASIL S.A.",
"LOUIS DREYFUS COMPANY BRASIL S.A.": "LOUIS DREYFUS COMPANY BRASIL S.A.",
"VIRGOLINO DE OLIVEIRA S/A - ACUCAR E ALCOOL": "VIRGOLINO DE OLIVEIRA S/A - ACUCAR E ALCOOL ",
"VIRGOLINO DE OLIVEIRA SA ACUCAR E ALCOOL": "VIRGOLINO DE OLIVEIRA S/A - ACUCAR E ALCOOL ",
"GRANOL INDUSTRIA COMERCIO E EXPORTACAO SA": "GRANOL INDUSTRIA COMERCIO E EXPORTACAO SA",
"GRANOL INDUSTRIA COMERCIO & EXPORTACAO SA": "GRANOL INDUSTRIA COMERCIO E EXPORTACAO SA",
"PRODUTOS ALIMENTICIOS ORLANDIA S A COMERCIO E INDUSTRIA": "PRODUTOS ALIMENTICIOS ORLANDIA S A COMERCIO E INDUSTRIA",
"PRODUTOS ALIMENTICIOS ORLANDIA SA COMERCIO E INDUSTRIA": "PRODUTOS ALIMENTICIOS ORLANDIA S A COMERCIO E INDUSTRIA",
"BLANVER FARMOQUIMICA E FARMACEUTICA S.A.": "BLANVER FARMOQUIMICA E FARMACEUTICA S.A.",
"BLANVER FARMOQUIMICA & FARMACEUTICA SA": "BLANVER FARMOQUIMICA E FARMACEUTICA S.A.",
"BUTILAMIL INDUSTRIAS REUNIDAS SA": "BUTILAMIL INDUSTRIAS REUNIDAS S/A",
"BUTILAMIL INDUSTRIAS REUNIDAS S/A": "BUTILAMIL INDUSTRIAS REUNIDAS S/A",
"J.N. DISTRIBUIDORA DE ALIMENTOS, MEDICAMENTOS E PERFUMARIA L": "J.N. DISTRIBUIDORA DE ALIMENTOS, MEDICAMENTOS E PERFUMARIA LTDA",
"JN AGRICULTURA MINERACAO COM & DIST DE ALIMENTOS & MEDICAMENTOS LTDA": "J.N. DISTRIBUIDORA DE ALIMENTOS, MEDICAMENTOS E PERFUMARIA LTDA",
"J.N. AGRICULTURA, MINERACAO, COMERCIO E DISTRIBUICAO DE ALIMENTOS E MEDICAMENTOS LTDA": "J.N. DISTRIBUIDORA DE ALIMENTOS, MEDICAMENTOS E PERFUMARIA LTDA",
"CUTRALE TRADING BRASIL LTDA.": "CUTRALE TRADING BRASIL LTDA.",
"CUTRALE TRADING BRASIL LTDA": "CUTRALE TRADING BRASIL LTDA.",
"EFRAIM AGRONEGOCIOS EIRELI": "EFRAIM V. A. S. BIRTCHE LTDA",
"EFRAIM AGRONEGOCIOS LTDA": "EFRAIM V. A. S. BIRTCHE LTDA",
"AGROMOND INDUSTRIA E COMERCIO INTERNACIONAL DE COMMODITIES L": "LEMARC AGROMOND INDUSTRIA E COMERCIO INTERNACIONAL LTDA",
"LEMARC AGROMOND INDUSTRIA & COMERCIO INTERNACIONAL LTDA": "LEMARC AGROMOND INDUSTRIA E COMERCIO INTERNACIONAL LTDA",
"CJ INTERNATIONAL BRASIL COMERCIAL AGRICOLA LTDA": "CJ INTERNATIONAL BRASIL COMERCIAL AGRICOLA LTDA.",
"CJ INTERNATIONAL BRASIL COMERCIAL AGRICOLA LTDA.": "CJ INTERNATIONAL BRASIL COMERCIAL AGRICOLA LTDA.",
"UBY AGROQUIMICA LTDA": "UBY AGROQUIMICA S.A",
"UBY AGROQUIMICA S.A": "UBY AGROQUIMICA S.A",
"DTI SEMENTES S.A.": "DTI SEMENTES S.A.",
"DTI SEMENTES SA": "DTI SEMENTES S.A.",
"GRANELES BRASIL COMERCIAL IMPORTADORA EXPORTADORA AGRICOLA L": "GRANELES BRASIL COMERCIAL IMPORTADORA EXPORTADORA AGRICOLA S.A",
"GRANELES BRASIL COMERCIAL IMP EXP AGRICOLA SA": "GRANELES BRASIL COMERCIAL IMPORTADORA EXPORTADORA AGRICOLA S.A",
"MARUBENI GRAOS BRASIL S.A.": "MARUBENI GRAOS BRASIL S.A.",
"MARUBENI GRAOS BRASIL SA": "MARUBENI GRAOS BRASIL S.A.",
"PERDUE COMERCIAL IMPORTADORA E EXPORTADORA LTDA.": "PERDUE COMERCIAL IMPORTADORA E EXPORTADORA LTDA.",
"PERDUE COMERCIAL IMPORTADORA & EXPORTADORA LTDA": "PERDUE COMERCIAL IMPORTADORA E EXPORTADORA LTDA.",
"COFCO INTERNATIONAL GRAINS LTDA": "COFCO INTERNATIONAL GRAINS LTDA.",
"COFCO INTERNATIONAL GRAINS LTDA.": "COFCO INTERNATIONAL GRAINS LTDA.",
"GLENCORE IMPORTADORA & EXPORTADORA SA": "VITERRA BRASIL S.A.",
"GLENCORE IMPORTADORA E EXPORTADORA S.A.": "VITERRA BRASIL S.A.",
"VITERRA BRASIL SA": "VITERRA BRASIL S.A.",
"IBF INDUSTRIA BRASILEIRA DE FILMES S/A.": "IBF INDUSTRIA BRASILEIRA DE FILMES S/A.",
"IBF INDUSTRIA BRASILEIRA DE FILMES SA": "IBF INDUSTRIA BRASILEIRA DE FILMES S/A.",
"COPALEM COOP. DE PRODUTORES RURAIS DE LUIS EDUARDO MAGALHAES": "COPALEM COOP. DE PRODUTORES RURAIS DE LUIS EDUARDO MAGALHAES LTDA",
"CARGILL AGRICOLA SA - TURKS AND CAICOS": "COPALEM COOP. DE PRODUTORES RURAIS DE LUIS EDUARDO MAGALHAES LTDA",
"COOPERATIVA DE PRODUTORE RURAIS DE LUIS EDUARDO MAGALHA": "COPALEM COOP. DE PRODUTORES RURAIS DE LUIS EDUARDO MAGALHAES LTDA",
"ENGELHART CTP (BRASIL) S.A.": "ENGELHART CTP (BRASIL) S.A.",
"ENGELHART CTP BRASIL SA": "ENGELHART CTP (BRASIL) S.A.",
"C M F PADOVESE CALCADOS EIRELI - EPP": "INDUSTRIA DE CALCADOS BIRIGUI LTDA",
"C M F PADOVESE CALCADOS EIRELI": "INDUSTRIA DE CALCADOS BIRIGUI LTDA",
"CMF PADOVESE CALCADOS EIRELI": "INDUSTRIA DE CALCADOS BIRIGUI LTDA",
"USINAS ITAMARATI S/A": "USINAS ITAMARATI S/A",
"USINAS ITAMARATI S.A.": "USINAS ITAMARATI S/A",
"BIOSEV SA": "RAIZEN CENTRO-SUL S.A",
"BIOSEV S.A.": "RAIZEN CENTRO-SUL S.A",
"TRESBOMM COMERCIO & EXPORTACAO DE GRAOS LTDA": "TRESBOMM COMERCIO E EXPORTACAO DE GRAOS LTDA",
"TRESBOMM COMERCIO E EXPORTACAO DE GRAOS LTDA": "TRESBOMM COMERCIO E EXPORTACAO DE GRAOS LTDA",
"AGROBOM COMERCIO & INDUSTRIA DE CEREAIS LTDA": "AGROBOM COMERCIO E INDUSTRIA DE CEREAIS LTDA",
"AGROBOM COMERCIO E INDUSTRIA DE CEREAIS LTDA": "AGROBOM COMERCIO E INDUSTRIA DE CEREAIS LTDA",
"ABC-INDUSTRIA E COMERCIO S/A-ABC-INCO": "ABC-INDUSTRIA E COMERCIO S/A-ABC-INCO",
"ABC INDUSTRIA E COMERCIO SA ABC INCO": "ABC-INDUSTRIA E COMERCIO S/A-ABC-INCO",
"AGROPECUARIA CATARATAS LTDA": "AGROPECUARIA CATARATAS LTDA.",
"AGROPECUARIA CATARATAS LTDA.": "AGROPECUARIA CATARATAS LTDA.",
"HUMBERG AGRIBRASIL COMERCIO & EXPORTACAO DE GRAOS LTDA": "HUMBERG AGRIBRASIL COMERCIO E EXPORTACAO DE GRAOS SA",
"HUMBERG AGRIBRASIL COMERCIO & EXPORTACAO DE GRAOS SA": "HUMBERG AGRIBRASIL COMERCIO E EXPORTACAO DE GRAOS SA",
"HUMBERG AGRIBRASIL COMERCIO E EXPORTACAO DE GRAOS LTDA": "HUMBERG AGRIBRASIL COMERCIO E EXPORTACAO DE GRAOS SA",
"BRAZSOY COMERCIO EXPORTACAO E TECNOLOGIA LTDA - EPP": "BRAZSOY COMERCIO EXPORTACAO E TECNOLOGIA LTDA",
"BRAZSOY COMERCIO EXPORTACAO E TECNOLOGIA LTDA": "BRAZSOY COMERCIO EXPORTACAO E TECNOLOGIA LTDA",
"AGRICOLA XINGU SA": "AGRICOLA XINGU S/A",
"AGRICOLA XINGU S/A": "AGRICOLA XINGU S/A",
"BSBIOS INDUSTRIA & COMERCIO DE BIODIESEL SUL BRASIL SA": "BE8 S.A.",
"BSBIOS INDUSTRIA E COMERCIO DE BIODIESEL SUL BRASIL S/A": "BE8 S.A.",
"AGROCONTATO COMERCIO & REPRESENTACOES DE PRODUTOS AGROPECUARIOS LTDA": "AGROCONTATO COMERCIO E REPRESENTACOES DE PRODUTOS AGROPECUARIOS S/A",
"AGROCONTATO COMERCIO E REPRESENTACOES DE PRODUTOS AGROPECUAR": "AGROCONTATO COMERCIO E REPRESENTACOES DE PRODUTOS AGROPECUARIOS S/A",
"AGROCONTATO COMERCIO E REPRESENTACOES DE PRODUTOS AGROPECUARIOS LTDA": "AGROCONTATO COMERCIO E REPRESENTACOES DE PRODUTOS AGROPECUARIOS S/A",
"AGRONOVA COMERCIO DE INSUMOS AGRICOLAS EIRELI - EPP": "AGRONOVA COMERCIO DE INSUMOS AGRICOLAS LTDA",
"AGRONOVA COMERCIO DE INSUMOS AGRICOLAS EIRELI": "AGRONOVA COMERCIO DE INSUMOS AGRICOLAS LTDA",
"AGRONOVA COMERCIO DE INSUMOS AGRICOLAS LTDA": "AGRONOVA COMERCIO DE INSUMOS AGRICOLAS LTDA",
"AGRINVEST BRASIL S.A.": "SIERENTZ AGRO BRASIL LTDA.",
"SIERENTZ AGRO BRASIL S.A.": "SIERENTZ AGRO BRASIL LTDA.",
"GUSA NORDESTE S/A": "ACO VERDE DO BRASIL S/A",
"ACO VERDE DO BRASIL SA": "ACO VERDE DO BRASIL S/A",
"MAQFRAN COMERCIAL AGRICOLA LTDA": "MAQFRAN COMERCIAL AGRICOLA LTDA",
"MAQFRAN LTDA": "MAQFRAN COMERCIAL AGRICOLA LTDA",
"COPASPE - COOPERATIVA DE PRODUTORES AGRICOLAS DA SERRA DA PETROVINA": "COPASPE - COOPERATIVA DE PRODUTORES AGRICOLAS DA SERRA DA PETROVINA",
"COPASPE COOP DE PROD AGRICOLAS DA SERRA DA PETROVINA": "COPASPE - COOPERATIVA DE PRODUTORES AGRICOLAS DA SERRA DA PETROVINA",
"COPASPE - COOPERATIVA DE PRODUTORES AGRICOLAS DA SERRA DA PE": "COPASPE - COOPERATIVA DE PRODUTORES AGRICOLAS DA SERRA DA PETROVINA",
"VALE DO TIJUCO ACUCAR E ALCOOL S.A.": "VALE DO TIJUCO ACUCAR E ALCOOL S.A.",
"VALE DO TIJUCO ACUCAR & ALCOOL LTDA": "VALE DO TIJUCO ACUCAR E ALCOOL S.A.",
"CAMANOR PRODUTOS MARINHOS LTDA": "CAMANOR PRODUTOS MARINHOS S.A.",
"CAMANOR PRODUTOS MARINHOS S.A": "CAMANOR PRODUTOS MARINHOS S.A.",
"BIORGANICA COMERCIO DE PRODUTOS ORGANICOS LTDA.": "BIORGANICA PRODUTOS ORGANICOS LTDA",
"BIORGANICA PRODUTOS ORGANICOS LTDA": "BIORGANICA PRODUTOS ORGANICOS LTDA",
"BIORGANICA COM DE PROD ORGANICOS LTDA": "BIORGANICA PRODUTOS ORGANICOS LTDA",
"USINA RIO PARDO S/A": "USINA RIO PARDO S.A.",
"USINA RIO PARDO S.A. EM RECUPERACAO JUDICIAL": "USINA RIO PARDO S.A.",
"COPAGRO COOPERATIVA DE PRODUTORES AGRICOLAS DE RODA V": "COPAGRO - COOPERATIVA DE PRODUTORES AGRICOLAS DE RODA VELHA",
"COPAGRO - COOPERATIVA DE PRODUTORES AGRICOLAS DE RODA VELHA": "COPAGRO - COOPERATIVA DE PRODUTORES AGRICOLAS DE RODA VELHA",
"UNGI CAFE COMERCIO INDUSTRIA E EXPORTACAO LTDA EPP": "UNGI CAFE COMERCIO, INDUSTRIA E EXPORTACAO LTDA",
"UNGI CAFE COMERCIO, INDUSTRIA E EXPORTACAO LTDA - EPP": "UNGI CAFE COMERCIO, INDUSTRIA E EXPORTACAO LTDA",
"UNGI CAFE COMERCIO, INDUSTRIA E EXPORTACAO LTDA": "UNGI CAFE COMERCIO, INDUSTRIA E EXPORTACAO LTDA",
"CCAB AGRO S.A.": "CCAB AGRO S.A.",
"CCAB AGRO S.A.": "CCAB AGRO S.A.",
"CCAB AGRO SA": "CCAB AGRO S.A.",
"RIO PARDO PROTEINA VEGETAL SA": "RIO PARDO PROTEINA VEGETAL S.A",
"RIO PARDO PROTEINA VEGETAL S.A": "RIO PARDO PROTEINA VEGETAL S.A",
"NOVAAGRI INFRA ESTRUTURA DE ARMAZENAGEM E ESCOAM AGR SA": "NOVAAGRI INFRA-ESTRUTURA DE ARMAZENAGEM E ESCOAMENTO AGRICOLA S.A.",
"NOVAAGRI INFRA ESTRUTURA DE ARMAZENAGEM & ESCOAM AGR SA": "NOVAAGRI INFRA-ESTRUTURA DE ARMAZENAGEM E ESCOAMENTO AGRICOLA S.A.",
"NOVAAGRI INFRA ESTRUTURA DE ARMAZENAGEM & ESCOAMENTO AGRICOLA SA": "NOVAAGRI INFRA-ESTRUTURA DE ARMAZENAGEM E ESCOAMENTO AGRICOLA S.A.",
"ADRIANA AGRICOLA LTDA": "ATTO AGRICOLA LTDA.",
"ATTO AGRICOLA LTDA.": "ATTO AGRICOLA LTDA.",
"USINA UNIAO E INDUSTRIA SA": "USINA UNIAO E INDUSTRIA SA",
"USINA UNIAO & INDUSTRIA SA": "USINA UNIAO E INDUSTRIA SA",
"HORITA EMPREENDIMENTOS AGRICOLAS S/A": "HORITA EMPREENDIMENTOS AGRICOLAS LTDA",
"HORITA EMPREENDIMENTOS AGRICOLAS SA": "HORITA EMPREENDIMENTOS AGRICOLAS LTDA",
"USINA SAO JOSE S/A": "USINA SAO JOSE S/A",
"USINA SAO JOSE S.A.": "USINA SAO JOSE S/A",
"AGREX DO BRASIL S.A.": "AGREX DO BRASIL LTDA.",
"AGREX DO BRASIL SA": "AGREX DO BRASIL LTDA.",
"VIAGRI IMPORTACAO E EXPORTACAO DE CEREAIS LTDA ME": "VIAGRI IMPORTACAO E EXPORTACAO DE CEREAIS LTDA",
"VIAGRI IMPORTACAO E EXPORTACAO DE CEREAIS LTDA - ME": "VIAGRI IMPORTACAO E EXPORTACAO DE CEREAIS LTDA",
"ABJ COMERCIO AGRICOLA LTDA": "ABJ COMERCIO AGRICOLA LTDA",
"ABJ COMERCIO AGRICOLA LTDA - EM RECUPERACAO JUDICIAL": "ABJ COMERCIO AGRICOLA LTDA",
"AMAGGI LOUIS DREYFUS ZEN NOH GRAOS SA": "AMAGGI LOUIS DREYFUS ZEN-NOH GRAOS S.A.",
"AMAGGI LOUIS DREYFUS ZEN-NOH GRAOS S.A.": "AMAGGI LOUIS DREYFUS ZEN-NOH GRAOS S.A.",
"AMAGGI & LD COMMODITIES S.A.": "AMAGGI LOUIS DREYFUS ZEN-NOH GRAOS S.A.",
"BOM SUCESSO AGROINDUSTRIA S.A.": "BOM SUCESSO AGROINDUSTRIA S.A.",
"BOM SUCESSO AGROINDUSTRIA LTDA": "BOM SUCESSO AGROINDUSTRIA S.A.",
"ALIANCA AGRICOLA DO CERRADO SA": "ALIANCA AGRICOLA DO CERRADO S.A.",
"ALIANCA AGRICOLA DO CERRADO S.A.": "ALIANCA AGRICOLA DO CERRADO S.A.",
"S A USINA CORURIPE ACUCAR E ALCOOL": "S A USINA CORURIPE ACUCAR E ALCOOL",
"SA USINA CORURIPE ACUCAR & ALCOOL": "S A USINA CORURIPE ACUCAR E ALCOOL",
"CANTAGALO GENERAL GRAINS SA": "CANTAGALO GENERAL GRAINS S.A.",
"CANTAGALO GENERAL GRAINS S.A.": "CANTAGALO GENERAL GRAINS S.A.",
"ACRINOR ACRILONITRILA DO NORDESTE S A": "ACRINOR ACRILONITRILA DO NORDESTE S A",
"ACRINOR ACRILONITRILA DO NORDESTE SA": "ACRINOR ACRILONITRILA DO NORDESTE S A",
"COPASUL COOPERATIVA AGRICOLA SUL MATOGROSSENSE": "COPASUL COOPERATIVA AGRICOLA SUL MATOGROSSENSE",
"COPASUL COOPERATIVA AGRICOLA SUL MATOGROSSENSE LTDA": "COPASUL COOPERATIVA AGRICOLA SUL MATOGROSSENSE",
"MARLON BONILHA EIRELI": "MARLON BONILHA LTDA",
"MARLON BONILHA": "MARLON BONILHA LTDA",
"COPAGRI - COMERCIAL PARANAENSE AGRICOLA LTDA": "SAFRAS AGROINDUSTRIA S/A",
"COPAGRI COMERCIAL PARANAENSE AGRICOLA LTDA": "SAFRAS AGROINDUSTRIA S/A",
"GAVILON DO BRASIL COMERCIO DE PRODUTOS AGRICOLAS LTDA.": "VITERRA AGRICULTURE BRASIL S.A.",
"GAVILON DO BRASIL COMERCIO DE PRODUTOS AGRICOLAS LTDA": "VITERRA AGRICULTURE BRASIL S.A.",
"COOL SEED IND E COM DE EQUIPAMENTOS DE AERACAO LTDA": "COOL SEED INDUSTRIA E COMERCIO DE EQUIPAMENTOS AGRICOLAS LTDA",
"COOL SEED INDUSTRIA E COMERCIO DE EQUIPAMENTOS AGRICOLAS LTDA": "COOL SEED INDUSTRIA E COMERCIO DE EQUIPAMENTOS AGRICOLAS LTDA",
"METALFRIO SOLUTIONS S.A.": "METALFRIO SOLUTIONS S.A.",
"METALFRIO SOLUTIONS SA": "METALFRIO SOLUTIONS S.A.",
"AGRICOLA ALVORADA LTDA": "AGRICOLA ALVORADA S.A.",
"AGRICOLA ALVORADA S.A.": "AGRICOLA ALVORADA S.A.",
"COMERCIO E EXPORTACAO DE CEREAIS RIO ELIAS LTDA.": "COMERCIO E EXPORTACAO DE CEREAIS RIO ELIAS LTDA.",
"COMERCIO E EXPORTACAO DE CEREAIS RIO ELIAS LTDA": "COMERCIO E EXPORTACAO DE CEREAIS RIO ELIAS LTDA.",
"AGROVENCI COMERCIO IMPORTACAO EXPORTACAO & AGROPECUARIA LTDA": "AGROVENCI - COMERCIO, IMPORTACAO, EXPORTACAO E AGROPECUARIA LTDA",
"AGROVENCI - COMERCIO, IMPORTACAO, EXPORTACAO E AGROPECUARIA": "AGROVENCI - COMERCIO, IMPORTACAO, EXPORTACAO E AGROPECUARIA LTDA",
"AGROVENCI - COMERCIO, IMPORTACAO, EXPORTACAO E AGROPECUARIA LTDA": "AGROVENCI - COMERCIO, IMPORTACAO, EXPORTACAO E AGROPECUARIA LTDA",
"CLW ALIMENTOS LTDA": "HT NUTRI INDUSTRIA DE ALIMENTOS LTDA.",
"HT NUTRI INDUSTRIA DE ALIMENTOS LTDA": "HT NUTRI INDUSTRIA DE ALIMENTOS LTDA.",
"SLC AGRICOLA CENTRO OESTE SA": "SLC AGRICOLA CENTRO OESTE S.A.",
"TERRA SANTA AGRO S.A.": "SLC AGRICOLA CENTRO OESTE S.A.",
"TERRA SANTA AGRO SA": "SLC AGRICOLA CENTRO OESTE S.A.",
"COPLASA - ACUCAR E ALCOOL LTDA": "COPLASA ACUCAR E ALCOOL LTDA",
"COPLASA ACUCAR & ALCOOL LTDA": "COPLASA ACUCAR E ALCOOL LTDA",
"LAVORO AGROCOMERCIAL S.A.": "LAVORO AGROCOMERCIAL S.A.",
"LAVORO AGROCOMERCIAL LTDA": "LAVORO AGROCOMERCIAL S.A.",
"COFCO INTERNATIONAL BRASIL SA": "COFCO INTERNATIONAL BRASIL S.A.",
"COFCO INTERNATIONAL BRASIL S.A.": "COFCO INTERNATIONAL BRASIL S.A.",
"COFCO BRASIL S.A": "COFCO INTERNATIONAL BRASIL S.A.",
"RISA SA": "GEES S/A",
"RISA S/A": "GEES S/A",
"RIBEIRAO S A": "GEES S/A",
"OLAM AGRICOLA LTDA": "OLAM AGRICOLA LTDA.",
"OLAM AGRICOLA LTDA.": "OLAM AGRICOLA LTDA.",
"CEREAL COMERCIO EXPORTACAO E REPRESENTACAO AGROPECUARIA SA": "CEREAL COMERCIO EXPORTACAO E REPRESENTACAO AGROPECUARIA SA",
"CEREAL COM EXP & REPRESENTACAO AGROPECUARIA SA": "CEREAL COMERCIO EXPORTACAO E REPRESENTACAO AGROPECUARIA SA",
"CARAMURU ALIMENTOS S/A.": "CARAMURU ALIMENTOS S/A",
"CARAMURU ALIMENTOS S.A": "CARAMURU ALIMENTOS S/A",
"CARAMURU ALIMENTOS SA": "CARAMURU ALIMENTOS S/A",
"CARAMURU ALIMENTOS S.A.": "CARAMURU ALIMENTOS S/A",
"GICS INDUSTRIA COMERCIO & SERVICOS SA": "GICS INDUSTRIA, COMERCIO E SERVICOS S.A.",
"GALVANI INDUSTRIA, COMERCIO E SERVICOS S.A.": "GICS INDUSTRIA, COMERCIO E SERVICOS S.A.",
"CRISTA INDUSTRIA E COMERCIO LTDA": "CRISTA INDUSTRIA E COMERCIO LTDA",
"CRISTA INDUSTRIA E COMERCIO EIRELI": "CRISTA INDUSTRIA E COMERCIO LTDA",
"COOPERATIVA AGROINDUSTRIAL DOS PRODUTORES RURAIS DO SUDOESTE": "COOPERATIVA AGROINDUSTRIAL DOS PRODUTORES RURAIS DO SUDOESTE GOIANO",
"COOPERATIVA AGROINDUSTRIAL DOS PRODUTORES RURAIS DO SUD": "COOPERATIVA AGROINDUSTRIAL DOS PRODUTORES RURAIS DO SUDOESTE GOIANO",
"COOPERATIVA AGROINDUSTRIAL DOS PRODUTORES RURAIS DO SUDOESTE GOIANO": "COOPERATIVA AGROINDUSTRIAL DOS PRODUTORES RURAIS DO SUDOESTE GOIANO",
"COOPERATIVA AGROIND DOS PROD R DO SUDOESTE GOIANO": "COOPERATIVA AGROINDUSTRIAL DOS PRODUTORES RURAIS DO SUDOESTE GOIANO",
"JF HILLEBRAND GROUP": "J F HILLEBRAND DO BRASIL TRANSPORTES INTERNACIONAIS LTDA",
"HILLEBRAND": "J F HILLEBRAND DO BRASIL TRANSPORTES INTERNACIONAIS LTDA",
"ADAMA BRASIL S/A": "ADAMA BRASIL S/A",
"ADAMA BRASIL SA": "ADAMA BRASIL S/A",
"UNIGEL PLASTICOS S/A": "UNIGEL QUIMICOS S/A",
"UNIGEL PLASTICOS SA": "UNIGEL QUIMICOS S/A",
"BOCCHI INDUSTRIA & COMERCIO DE CEREAIS LTDA": "BOCCHI INDUSTRIA E COMERCIO DE CEREAIS LTDA",
"BOCCHI INDUSTRIA E COMERCIO DE CEREAIS LTDA": "BOCCHI INDUSTRIA E COMERCIO DE CEREAIS LTDA",
"AFG BRASIL S/A": "AFG BRASIL S/A",
"AFG BRASIL SA": "AFG BRASIL S/A",
"FOLEM INDUSTRIA E COMERCIO S/A": "FOLEM INDUSTRIA E COMERCIO S/A",
"FOLEM INDUSTRIA E COMERCIO SA": "FOLEM INDUSTRIA E COMERCIO S/A",
"CHS AGRONEGOCIO - INDUSTRIA E COMERCIO LTDA": "CHS AGRONEGOCIO - INDUSTRIA E COMERCIO LTDA",
"CHS AGRONEGOCIO INDUSTRIA & COMERCIO LTDA": "CHS AGRONEGOCIO - INDUSTRIA E COMERCIO LTDA",
}
# Find distinct CNPJ8 of lonely companies
# Extract list of unique CNPJ8 of parent companies
parent_cpnj8_list = parent_cnpj8["cnpj8"].unique().tolist()
# Filter dataset to extract dataframe of lonely companies
non_parent_cnpj8 = hist_data[~hist_data["cnpj8"].isin(parent_cpnj8_list)]
# check unique values of cnpj8 (lonely companies)
non_parent_cnpj8["cnpj8"].nunique()
# Resolve misspelling issues
subset_non_parent_cnpj8 = non_parent_cnpj8[["EXPORTER", "cnpj8", "CNPJ", "vol"]]
# find amount of labels identified for each distinct cnpj8
non_parent = (
subset_non_parent_cnpj8.groupby(["cnpj8"])
.agg({"EXPORTER": "nunique"})
.reset_index()
)
non_parent[non_parent["EXPORTER"] > 1]
# find name versions
subset_non_parent_cnpj8[
subset_non_parent_cnpj8["cnpj8"] == "03465501"
].EXPORTER.unique()
# find full cnpj and check it against cnpj official register
subset_non_parent_cnpj8[subset_non_parent_cnpj8["cnpj8"] == "03465501"].CNPJ.unique()
# result
LONELY_COMPANIES_DICT = {
"NATURALLE AGRO MERCANTIL S.A.": "NATURALLE AGRO MERCANTIL LTDA",
"NATURALLE AGRO MERCANTIL EIRELI": "NATURALLE AGRO MERCANTIL LTDA",
"SERTRADING BR LTDA": "SERTRADING S/A",
"SERTRADING S/A": "SERTRADING S/A",
"BON-MART FRIGORIFICO LTDA": "BON-MART FRIGORIFICO LTDA",
"BON MART FRIGORIFICO LTDA": "BON-MART FRIGORIFICO LTDA",
"TIMBAUBA S.A.": "TIMBAUBA S.A.",
"QUEIROZ GALVAO ALIMENTOS S/A": "TIMBAUBA S.A.", ## invalid cnpj: 04899037000000
"SAINTE MARIE IMPORTACAO E EXPORTACAO LTDA": "SAINTE MARIE IMPORTACAO E EXPORTACAO LTDA",
"SAINT MARIE IMPORTACAO & EXPORTACAO LTDA": "SAINTE MARIE IMPORTACAO E EXPORTACAO LTDA",
"NIDERA SEMENTES LTDA.": "COFCO INTERNATIONAL GRAOS E OLEAGINOSAS LTDA.",
"COFCO INTERNATIONAL GRAOS E OLEAGINOSAS LTDA.": "COFCO INTERNATIONAL GRAOS E OLEAGINOSAS LTDA.", ## invalid cnpj: 07053693000000
"ADECOAGRO VALE DO IVINHEMA S.A.": "ADECOAGRO VALE DO IVINHEMA S.A.",
"ADECOAGRO VALE DO IVINHEMA SA": "ADECOAGRO VALE DO IVINHEMA S.A.",
"MARGUSA - MARANHAO GUSA LTDA": "MARGUSA - MARANHAO GUSA LTDA",
"MARGUSA MARANHAO GUSA SA": "MARGUSA - MARANHAO GUSA LTDA",
"BRASIL ALIMENTOS COMERCIAL EXPORTACAO & IMPORTACAO LTDA - M": "MARINHO MOREIRA DA ROCHA",
"BRASIL ALIMENTOS COMERCIAL EXPORTACAO & IMPORTACAO LTDA": "MARINHO MOREIRA DA ROCHA", ### changed company name for "fantasy name"
"SABARA QUIMICOS E INGREDIENTES S/A": "SABARA QUIMICOS E INGREDIENTES S/A",
"SABARA QUIMICOS E INGREDIENTES S.A": "SABARA QUIMICOS E INGREDIENTES S/A",
"RIO GRANDE AGRICOLA S.A": "RIO GRANDE AGRICOLA S.A",
"RIO GRANDE AGRICOLA SA": "RIO GRANDE AGRICOLA S.A",
"DUAL DUARTE ALBUQUERQUE COMERCIO E INDUSTRIA LTDA": "DUAL DUARTE ALBUQUERQUE COMERCIO E INDUSTRIA LTDA",
"DUAL DUARTE ALBUQUERQUE COMERCIO & INDUSTRIA LTDA": "DUAL DUARTE ALBUQUERQUE COMERCIO E INDUSTRIA LTDA",
"PROQUIGEL QUIMICA SA": "PROQUIGEL QUIMICA S/A",
"PROQUIGEL QUIMICA S/A": "PROQUIGEL QUIMICA S/A",
"FERTITEX AGRO FERTILIZANTES & PRODUTOS AGROPECUARIOS LT": "FERTITEX AGRO - FERTILIZANTES E PRODUTOS AGROPECUARIOS LTDA",
"FERTITEX AGRO - FERTILIZANTES E PRODUTOS AGROPECUARIOS LTDA": "FERTITEX AGRO - FERTILIZANTES E PRODUTOS AGROPECUARIOS LTDA",
"IGUACU CELULOSE PAPEL SA": "IGUACU CELULOSE PAPEL S/A",
"IGUACU CELULOSE PAPEL S/A": "IGUACU CELULOSE PAPEL S/A",
"CAMERA AGROALIMENTOS SA": "CAMERA AGROINDUSTRIAL S.A.",
"CAMERA AGROALIMENTOS S.A": "CAMERA AGROINDUSTRIAL S.A.",
}
# extract rows with CNPJ8 = 00000000
# extract rows in which cnpj8 = 00000000
cnpj8_0 = non_parent_cnpj8[non_parent_cnpj8["cnpj8"] == "00000000"]
non_parent_cnpj8 = non_parent_cnpj8[non_parent_cnpj8["cnpj8"] != "00000000"]
# create subsets
subset_cnpj8_0 = cnpj8_0[["EXPORTER", "cnpj8", "CNPJ", "vol"]]
subset_non_parent_cnpj8 = non_parent_cnpj8[["EXPORTER", "cnpj8", "CNPJ", "vol"]]
# check for rows counts
non_parent_cnpj8["cnpj8"].nunique()
# check for rows count
cnpj8_0["cnpj8"].nunique()
# Resolve data issues
zero_cnpj8 = (
subset_cnpj8_0.groupby(["cnpj8"]).agg({"EXPORTER": "nunique"}).reset_index()
)
zero_cnpj8[zero_cnpj8["EXPORTER"] > 1]
# Rows with cnpj8 = 00000000 - how many soy volume in 2020?
cnpj0_2020 = cnpj8_0[cnpj8_0["base_year"] == "2020"]
total_vol_cnpj0_2020 = cnpj0_2020["vol"].sum()
year_2020 = hist_data[hist_data["base_year"] == "2020"]
total_vol_2020 = year_2020["vol"].sum()
percentage_over_year_total = total_vol_cnpj0_2020 * 100 / total_vol_2020
percentage_over_year_total
# Rows with cnpj8 = 00000000 - how many soy volume in 2021?
cnpj0_2021 = cnpj8_0[cnpj8_0["base_year"] == "2021"]
total_vol_cnpj0_2021 = cnpj0_2021["vol"].sum()
year_2021 = hist_data[hist_data["base_year"] == "2021"]
total_vol_2021 = year_2021["vol"].sum()
percentage_over_year_total = total_vol_cnpj0_2021 * 100 / total_vol_2021
percentage_over_year_total
Comments
- Trade made by exporters with cnpj8= '00000000' represents approximately 43% of the total soy exported by all states of Brazil in 2020 and approximately 15% in 2021.
- There still need to do more cleaning in order to be able to work with CNPJ14 and rows in which CNPJ8 = 00000000.
# Make dictionary
# Group of companies with the presence of the controllers
# apply dictionary
subset_parent_cnpj8["distinct_count"] = subset_parent_cnpj8.groupby("cnpj8")[
"EXPORTER"
].transform("nunique")
subset_parent_cnpj8["company_label"] = np.where(
subset_parent_cnpj8["distinct_count"] > 1,
subset_parent_cnpj8["EXPORTER"].map(MAIN_COMPANIES_DICT),
subset_parent_cnpj8["EXPORTER"],
)
parent_companies_dict = subset_parent_cnpj8[
["company_label", "cnpj8"]
].drop_duplicates()
# check rows count
len(parent_companies_dict)
# Make dictionary
# lonely companies
subset_non_parent_cnpj8["distinct_count"] = subset_non_parent_cnpj8.groupby("cnpj8")[
"EXPORTER"
].transform("nunique")
subset_non_parent_cnpj8["company_label"] = np.where(
subset_non_parent_cnpj8["distinct_count"] > 1,
subset_non_parent_cnpj8["EXPORTER"].map(LONELY_COMPANIES_DICT),
subset_non_parent_cnpj8["EXPORTER"],
)
lonely_companies_dict = subset_non_parent_cnpj8[
["company_label", "cnpj8"]
].drop_duplicates()
# check rows count
len(lonely_companies_dict)
# Make dictionary
# cnpj8 == 00000000
cnpj0_dict = subset_cnpj8_0.groupby("cnpj8")["EXPORTER"].first().reset_index()
cnpj0_dict["company_label"] = "ignored"
cnpj0_dict.drop("EXPORTER", axis=1, inplace=True)
# check rows count
cnpj0_dict["cnpj8"].nunique()
# Create field to indicate whether the name of the exporter refers to controller or subsidiary
parent_companies_dict["exporter_status"] = "main"
lonely_companies_dict["exporter_status"] = "lonely"
cnpj0_dict["exporter_status"] = "ignored"
# Make dictionary
company_dict = pd.concat(
[parent_companies_dict, lonely_companies_dict, cnpj0_dict], ignore_index=True
)
# check rows count
company_dict["cnpj8"].nunique()
# check rows count
len(company_dict)
Comments:
- EXPORTERS with CNPJ8 == '00000000' are acconted for as 'ignored'.
3.General plots
# Aggregate soy volume by year
total_volume_by_year = hist_data.groupby("base_year")["vol"].sum().reset_index()
# Create a dictionary to map values from total_volume_by_year to other tables
map_to_total_by_year = dict(
zip(total_volume_by_year["base_year"], total_volume_by_year["vol"])
)
total_volume_by_year.head(10)
# Create line chart for brazil soy exportations over time
# Create the line chart
plt.plot(total_volume_by_year["base_year"], total_volume_by_year["vol"])
# Add title and labels
plt.title("Soy exportation over time in Brazil")
plt.xlabel("Year")
plt.ylabel("Soy tones")
# Prevent scientific notation on the y-axis
plt.gca().get_yaxis().get_major_formatter().set_scientific(False)
# Show the chart
plt.show()
# group soy volume by year and state
total_year_state = (
hist_data.groupby(["base_year", "state"]).agg({"vol": "sum"}).reset_index()
)
total_year_state["base_year"] = total_year_state["base_year"].astype(int)
# Create a dictionary to map total volume by state
map_to_state = dict(
zip(
zip(total_year_state["base_year"], total_year_state["state"]),
total_year_state["vol"],
)
) #
# apply dictionary map_to_total_by_year to map soy totals by year
total_year_state["vol_percentage_annualTotal"] = (
total_year_state["vol"]
* 100
/ total_year_state["base_year"].map(map_to_total_by_year)
)
# Filter dataset by list of north states
north_states = total_year_state[total_year_state["state"].isin(NORTH)]
north_states["base_year"] = north_states["base_year"].astype(str)
# Create plots
fig, axes = plt.subplots(2, 1, figsize=(15, 10))
sns.set_style("whitegrid") # Use "whitegrid" style for line chart
# Line Chart
sns.lineplot(
data=north_states, x="base_year", y="vol", hue="state", ax=axes[0], errorbar=None
)
axes[0].set_title("Soy volume exported by North states of Brazil")
# Bar Chart
sns.barplot(
data=north_states,
x="base_year",
y="vol",
hue="state",
edgecolor="black",
ax=axes[1],
errorbar=None,
)
axes[1].set_title("Soy volume exported by North states of Brazil")
# Show the plots
plt.show()
# Function that calculates the difference in soybean raw volume exported between years
north_states["base_year"] = north_states["base_year"].astype(int)
df_2016_2017 = north_states[north_states["base_year"].isin([2016, 2017])]
df_2017_2018 = north_states[north_states["base_year"].isin([2017, 2018])]
df_2018_2019 = north_states[north_states["base_year"].isin([2018, 2019])]
df_2019_2020 = north_states[north_states["base_year"].isin([2019, 2020])]
df_2020_2021 = north_states[north_states["base_year"].isin([2020, 2021])]
def compare_intervals(df, start_year, end_year):
result_df = pd.DataFrame(columns=["state", "volume_difference", "comparison"])
states = north_states["state"].unique()
for state in states:
group = df[df["state"] == state]
if len(group) > 1:
group_start = group[group["base_year"] == start_year]
group_end = group[group["base_year"] == end_year]
if not group_start.empty and not group_end.empty:
volume_diff = group_end["vol"].values[0] - group_start["vol"].values[0]
result_df = result_df.append(
{
"state": state,
"volume_difference": volume_diff,
"comparison": f"{start_year}-{end_year}",
},
ignore_index=True,
)
return result_df
# Apply function to compare 2016-2017
result_uf = compare_intervals(df_2016_2017, 2016, 2017)
# Apply finction to compare 2017-2018
result_uf = result_uf.append(compare_intervals(df_2017_2018, 2017, 2018))
# Apply function to compare 2018-2019
result_uf = result_uf.append(compare_intervals(df_2018_2019, 2018, 2019))
# Apply function to compare 2019-2020
result_uf = result_uf.append(compare_intervals(df_2019_2020, 2019, 2020))
# Apply function to compare 2020-2021
result_uf = result_uf.append(compare_intervals(df_2020_2021, 2020, 2021))
result_uf["base_year"] = result_uf["comparison"].str[:4]
result_uf["base_year"] = result_uf["base_year"].astype(int)
result_uf["year"] = result_uf["comparison"].str[5:9]
# Define a custom function to map soy volumes from total_year_state using dictionary map_to_state
def get_volume(row):
return map_to_state.get((row["base_year"], row["state"]), None)
# Apply function
result_uf["total_volume_state"] = result_uf.apply(get_volume, axis=1)
# Calculate the percentage difference in soy volume between years.
result_uf["percentage_difference"] = (
result_uf["volume_difference"] * 100 / result_uf["total_volume_state"]
)
# percentual variation of soy volume - chart
# Create subplots
fig, axes = plt.subplots(2, 1, figsize=(15, 10))
sns.set_style("whitegrid") # Use "whitegrid" style for line chart
# Line Chart
sns.lineplot(
data=result_uf,
x="year",
y="percentage_difference",
hue="state",
ax=axes[0],
errorbar=None,
)
axes[0].set_title("Variation in volume percentage of soy exported by state")
# Bar Chart
sns.barplot(
data=result_uf,
x="year",
y="percentage_difference",
hue="state",
edgecolor="black",
ax=axes[1],
errorbar=None,
)
axes[1].set_title("Variation in volume percentage of soy exported by state")
# Show the plots
plt.show()
Comments
- The soy data for AMAPA is discontinued from 2018 onwards.
- The soy data for AMAZONAS is only available in 2019 and 2020.
- In 2021 there are available data only for states of PARA, RORAIMA and TOCANTINS
4. Analysis of soy exportation in the North region of Brazil
# Prepare data
# group soy volumes by cnpj8
group_exporter_cnpj8 = (
hist_data.groupby(
["base_year", "state", "cnpj8", "IMPORTER", "COUNTRY_OF_FIRST_IMPORT"]
)
.agg({"vol": "sum"})
.reset_index()
)
# convert column type
group_exporter_cnpj8["base_year"] = group_exporter_cnpj8["base_year"].astype(int)
# filter by list of states whithin North region
group_exporter_cnpj8 = group_exporter_cnpj8[group_exporter_cnpj8["state"].isin(NORTH)]
# filter by year
group_exporter_north_2020 = group_exporter_cnpj8[
group_exporter_cnpj8["base_year"] == 2020
]
group_exporter_north_2021 = group_exporter_cnpj8[
group_exporter_cnpj8["base_year"] == 2021
]
# calculate volume quartiles for the year of 2020
q1 = group_exporter_north_2020["vol"].quantile(0.25)
q2 = group_exporter_north_2020["vol"].quantile(0.50)
q3 = group_exporter_north_2020["vol"].quantile(0.75)
iqr = q3 - q1
lower_limit = q1 - 1.5 * iqr
upper_limit = q3 + 1.5 * iqr
# extract rows within each quartile
q1_exp_group = group_exporter_north_2020[group_exporter_north_2020.vol <= q1]
q1_exp_group["quartile"] = "q1"
q2_exp_group = group_exporter_north_2020[
(group_exporter_north_2020.vol <= q2) & (group_exporter_north_2020.vol > q1)
]
q2_exp_group["quartile"] = "q2"
q3_exp_group = group_exporter_north_2020[
(group_exporter_north_2020.vol <= q3) & (group_exporter_north_2020.vol > q2)
]
q3_exp_group["quartile"] = "q3"
upper_limit_exp_group = group_exporter_north_2020[
(group_exporter_north_2020.vol <= upper_limit)
& (group_exporter_north_2020.vol > q3)
]
upper_limit_exp_group["quartile"] = "upper_limit"
outliers = group_exporter_north_2020[
(group_exporter_north_2020.vol < lower_limit)
| (group_exporter_north_2020.vol > upper_limit)
]
outliers["quartile"] = "outlier"
# concatenate results
north2020 = pd.concat(
[q1_exp_group, q2_exp_group, q3_exp_group, upper_limit_exp_group, outliers],
ignore_index=True,
)
# check
north2020["quartile"].nunique()
# check
(
len(q1_exp_group)
+ len(q2_exp_group)
+ len(q3_exp_group)
+ len(upper_limit_exp_group)
+ len(outliers)
)
# check
len(group_exporter_north_2020)
# check
len(north2020)
# Calculate total soy volume exported by North States in 2020
tabulate_year = (
group_exporter_north_2020.groupby(["base_year"]).agg({"vol": "sum"}).reset_index()
)
total_2020 = tabulate_year[tabulate_year["base_year"] == 2020]
total_2020
# Calculate percentage of soy volume exported by groups of exporters over total exported by North region and north states in 2020
# Create a tuple of the matching columns
north2020["Key_state"] = list(zip(north2020["base_year"], north2020["state"]))
# Use dictionary to get the volume values
north2020["total_volume_state"] = north2020["Key_state"].map(map_to_state)
north2020["total_volume_year"] = 4695198985.641400
# Calculate percentages
north2020["percentage_over_total_by_state"] = (
north2020["vol"] * 100 / north2020["total_volume_state"]
)
north2020["percentage_over_total_by_year"] = (
north2020["vol"] * 100 / north2020["total_volume_year"]
) # year
## Get names from companies dictionary
north2020 = pd.merge(
north2020,
company_dict[["company_label", "exporter_status", "cnpj8"]],
on="cnpj8",
how="left",
)
# Generate tables
# Order groups of exporters by their participation in total volume of soy exported by North States
north2020.groupby(["company_label", "exporter_status"]).agg(
{"percentage_over_total_by_year": "sum"}
).reset_index().sort_values(by=["percentage_over_total_by_year"], ascending=[False])
# Participation of quartile classes in total volume exported by each North State
north2020.groupby(["state", "quartile"]).agg(
{"percentage_over_total_by_state": "sum"}
).reset_index().sort_values(
by=["state", "percentage_over_total_by_state"], ascending=[False, False]
)
# calculate volume quartiles for the year of 2021
# calculate volume quartiles for the year of 2021
q1 = group_exporter_north_2021["vol"].quantile(0.25)
q2 = group_exporter_north_2021["vol"].quantile(0.50)
q3 = group_exporter_north_2021["vol"].quantile(0.75)
iqr = q3 - q1
lower_limit = q1 - 1.5 * iqr
upper_limit = q3 + 1.5 * iqr
# extract rows within each quartile
q1_exp_group = group_exporter_north_2021[group_exporter_north_2021.vol <= q1]
q1_exp_group["quartile"] = "q1"
q2_exp_group = group_exporter_north_2021[
(group_exporter_north_2021.vol <= q2) & (group_exporter_north_2021.vol > q1)
]
q2_exp_group["quartile"] = "q2"
q3_exp_group = group_exporter_north_2021[
(group_exporter_north_2021.vol <= q3) & (group_exporter_north_2021.vol > q2)
]
q3_exp_group["quartile"] = "q3"
upper_limit_exp_group = group_exporter_north_2021[
(group_exporter_north_2021.vol <= upper_limit)
& (group_exporter_north_2021.vol > q3)
]
upper_limit_exp_group["quartile"] = "upper_limit"
outliers = group_exporter_north_2021[
(group_exporter_north_2021.vol < lower_limit)
| (group_exporter_north_2021.vol > upper_limit)
]
outliers["quartile"] = "outlier"
# concatenate results
north2021 = pd.concat(
[q1_exp_group, q2_exp_group, q3_exp_group, upper_limit_exp_group, outliers],
ignore_index=True,
)
# check
north2021["quartile"].nunique()
# check
(
len(q1_exp_group)
+ len(q2_exp_group)
+ len(q3_exp_group)
+ len(upper_limit_exp_group)
+ len(outliers)
)
# check
len(group_exporter_north_2021)
# check
len(north2021)
# Calculate total soy volume exported by North States in 2021
tabulate_year = (
group_exporter_north_2021.groupby(["base_year"]).agg({"vol": "sum"}).reset_index()
)
total_2021 = tabulate_year[tabulate_year["base_year"] == 2021]
total_2021
# Calculate percentage of volume soy from quartiles over total exported by north states in 2020
# Create a tuple of the matching columns
north2021["Key_state"] = list(zip(north2021["base_year"], north2021["state"]))
# Use dictionary to get the volume values
north2021["total_volume_state"] = north2021["Key_state"].map(map_to_state)
north2021["total_volume_year"] = 2803877107.335800
# Calculate percentages
north2021["percentage_over_total_by_state"] = (
north2021["vol"] * 100 / north2021["total_volume_state"]
)
north2021["percentage_over_total_by_year"] = (
north2021["vol"] * 100 / north2021["total_volume_year"]
) # year
# Get names from companies dictionary
north2021 = pd.merge(
north2021,
company_dict[["company_label", "exporter_status", "cnpj8"]],
on="cnpj8",
how="left",
)
# Generate tables
# Order groups of exporters by their participation in total volume of soy exported by North States
north2021.groupby(["company_label", "exporter_status"]).agg(
{"percentage_over_total_by_year": "sum"}
).reset_index().sort_values(by=["percentage_over_total_by_year"], ascending=[False])
# Participation of quartile classes in total volume exported by each North State
north2021.groupby(["state", "quartile"]).agg(
{"percentage_over_total_by_state": "sum"}
).reset_index().sort_values(by=["state", "quartile"], ascending=[True, True])
5. Charts
# Select quantile classes of interest from each year dataset and concatenate
# 2020
group_1 = north2020[
(north2020["state"] == "PARA") & (north2020["quartile"] == "outlier")
]
group_2 = north2020[
(north2020["state"] == "PARA") & (north2020["quartile"] == "upper_limit")
]
group_3 = north2020[
(north2020["state"] == "RORAIMA") & (north2020["quartile"] == "outlier")
]
group_4 = north2020[
(north2020["state"] == "RONDONIA") & (north2020["quartile"] == "outlier")
]
group_5 = north2020[
(north2020["state"] == "RONDONIA") & (north2020["quartile"] == "upper_limit")
]
group_6 = north2020[
(north2020["state"] == "TOCANTINS") & (north2020["quartile"] == "outlier")
]
group_7 = north2020[
(north2020["state"] == "TOCANTINS") & (north2020["quartile"] == "upper_limit")
]
group_8 = north2020[
(north2020["state"] == "ACRE") & (north2020["quartile"] == "upper_limit")
]
group_9 = north2020[
(north2020["state"] == "AMAZONAS") & (north2020["quartile"] == "upper_limit")
]
group2020 = pd.concat(
[group_1, group_2, group_3, group_4, group_5, group_6, group_7, group_8, group_9],
ignore_index=True,
)
# 2021
group_1 = north2021[
(north2021["state"] == "PARA") & (north2021["quartile"] == "outlier")
]
group_2 = north2021[
(north2021["state"] == "PARA") & (north2021["quartile"] == "upper_limit")
]
group_3 = north2021[(north2021["state"] == "RORAIMA") & (north2021["quartile"] == "q3")]
group_4 = north2021[(north2021["state"] == "RORAIMA") & (north2021["quartile"] == "q2")]
group_5 = north2021[
(north2021["state"] == "TOCANTINS") & (north2021["quartile"] == "outlier")
]
group_6 = north2021[
(north2021["state"] == "TOCANTINS") & (north2021["quartile"] == "upper_limit")
]
group2021 = pd.concat(
[group_1, group_2, group_3, group_4, group_5, group_6], ignore_index=True
)
# concatenate
group = pd.concat([group2020, group2021], ignore_index=True)
group["base_year"] = group["base_year"].astype(str)
# Plots
# Exporters-upwards
# Exporter
group1 = (
group.groupby(["state", "base_year", "company_label", "cnpj8"])
.agg({"percentage_over_total_by_state": "sum"})
.reset_index()
.sort_values(by=["percentage_over_total_by_state"], ascending=[False])
)
group1_para = group1[group1["state"] == "PARA"]
group1_tocantins = group1[group1["state"] == "TOCANTINS"]
group1_acre = group1[group1["state"] == "ACRE"]
group1_amazonas = group1[group1["state"] == "AMAZONAS"]
group1_rondonia = group1[group1["state"] == "RONDONIA"]
group1_roraima = group1[group1["state"] == "RORAIMA"]
# Importer
group2 = (
group.groupby(["state", "base_year", "company_label", "cnpj8", "IMPORTER"])
.agg({"percentage_over_total_by_state": "sum"})
.reset_index()
.sort_values(by=["percentage_over_total_by_state"], ascending=[False])
)
group2_para = group2[group2["state"] == "PARA"]
group2_tocantins = group2[group2["state"] == "TOCANTINS"]
group2_acre = group2[group2["state"] == "ACRE"]
group2_amazonas = group2[group2["state"] == "AMAZONAS"]
group2_rondonia = group2[group2["state"] == "RONDONIA"]
group2_roraima = group2[group2["state"] == "RORAIMA"]
# Country of first import
group3 = (
group.groupby(
["state", "base_year", "company_label", "cnpj8", "COUNTRY_OF_FIRST_IMPORT"]
)
.agg({"percentage_over_total_by_state": "sum"})
.reset_index()
.sort_values(by=["percentage_over_total_by_state"], ascending=[False])
)
group3_para = group3[group3["state"] == "PARA"]
group3_tocantins = group3[group3["state"] == "TOCANTINS"]
group3_acre = group3[group3["state"] == "ACRE"]
group3_amazonas = group3[group3["state"] == "AMAZONAS"]
group3_rondonia = group3[group3["state"] == "RONDONIA"]
group3_roraima = group3[group3["state"] == "RORAIMA"]
### PARA
### exporter
(
so.Plot(group1_para, x="percentage_over_total_by_state", y="company_label")
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - PARA".format,
)
.layout(size=(15, 7))
)
# importer
(
so.Plot(
group2_para,
x="percentage_over_total_by_state",
y="company_label",
color="IMPORTER",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - PARA".format,
)
.layout(size=(15, 7))
)
# Country of first import
(
so.Plot(
group3_para,
x="percentage_over_total_by_state",
y="company_label",
color="COUNTRY_OF_FIRST_IMPORT",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - PARA".format,
)
.layout(size=(15, 7))
)
# TOCANTINS
# Exporters
(
so.Plot(group1_tocantins, x="percentage_over_total_by_state", y="company_label")
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - TOCANTINS".format,
)
.layout(size=(20, 10))
)
# Importers
(
so.Plot(
group2_tocantins,
x="percentage_over_total_by_state",
y="company_label",
color="IMPORTER",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - TOCANTINS".format,
)
.layout(size=(18, 10))
)
# Country of first import
(
so.Plot(
group3_tocantins,
x="percentage_over_total_by_state",
y="company_label",
color="COUNTRY_OF_FIRST_IMPORT",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - TOCANTINS".format,
)
.layout(size=(18, 10))
)
# ACRE
# Exporter
(
so.Plot(group1_acre, x="percentage_over_total_by_state", y="company_label")
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - ACRE".format,
)
.layout(size=(5, 3))
)
# Importer
(
so.Plot(
group2_acre,
x="percentage_over_total_by_state",
y="company_label",
color="IMPORTER",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - ACRE".format,
)
.layout(size=(4, 3))
)
# Country of first import
(
so.Plot(
group3_acre,
x="percentage_over_total_by_state",
y="company_label",
color="COUNTRY_OF_FIRST_IMPORT",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - ACRE".format,
)
.layout(size=(4, 3))
)
# AMAZONAS
# Exporter
(
so.Plot(group1_amazonas, x="percentage_over_total_by_state", y="company_label")
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - AMAZONAS".format,
)
.layout(size=(5, 3))
)
# Importer
(
so.Plot(
group2_amazonas,
x="percentage_over_total_by_state",
y="company_label",
color="IMPORTER",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - AMAZONAS".format,
)
.layout(size=(4, 3))
)
# Country of first import
(
so.Plot(
group3_amazonas,
x="percentage_over_total_by_state",
y="company_label",
color="COUNTRY_OF_FIRST_IMPORT",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - AMAZONAS".format,
)
.layout(size=(4, 3))
)
# RORAIMA
# Exporter
(
so.Plot(group1_roraima, x="percentage_over_total_by_state", y="company_label")
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - RORAIMA".format,
)
.layout(size=(10, 7))
)
# Importer
(
so.Plot(
group2_roraima,
x="percentage_over_total_by_state",
y="company_label",
color="IMPORTER",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - RORAIMA".format,
)
.layout(size=(10, 7))
)
# Country of first import
(
so.Plot(
group3_roraima,
x="percentage_over_total_by_state",
y="company_label",
color="COUNTRY_OF_FIRST_IMPORT",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - RORAIMA".format,
)
.layout(size=(10, 7))
)
# RONDONIA
# Exporter
(
so.Plot(group1_rondonia, x="percentage_over_total_by_state", y="company_label")
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - RONDONIA".format,
)
.layout(size=(10, 7))
)
# Importer
(
so.Plot(
group2_rondonia,
x="percentage_over_total_by_state",
y="company_label",
color="IMPORTER",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - RONDONIA".format,
)
.layout(size=(10, 7))
)
# Country of first import
(
so.Plot(
group3_rondonia,
x="percentage_over_total_by_state",
y="company_label",
color="COUNTRY_OF_FIRST_IMPORT",
)
.add(so.Bar(), so.Agg(), so.Stack())
.facet("base_year", wrap=1)
.label(
title="{} - RONDONIA".format,
)
.layout(size=(10, 7))
)
# Check for data issues
# unknown importers - 2020
importer_group = north2020.groupby(["IMPORTER"]).agg({"vol": "sum"}).reset_index()
importer_select = importer_group[
(importer_group["IMPORTER"] == "NONE")
| (importer_group["IMPORTER"] == "UNKNOWN CUSTOMER")
]
importer_select_vol = importer_select["vol"].sum()
percentage_importers_select = (
importer_select_vol * 100 / total_2020
) # use variable total_2020
percentage_importers_select
# ignored exporters - 2020
exporter_group = north2020.groupby(["company_label"]).agg({"vol": "sum"}).reset_index()
exporter_group_select = exporter_group[(exporter_group["company_label"] == "ignored")]
exporter_group_select_vol = exporter_group_select["vol"].sum()
percentage_exporters_select = (
exporter_group_select_vol * 100 / total_2020
) # use variable total_2020
percentage_exporters_select
# unknown importers - 2021
importer_group = north2021.groupby(["IMPORTER"]).agg({"vol": "sum"}).reset_index()
importer_select = importer_group[
(importer_group["IMPORTER"] == "NONE")
| (importer_group["IMPORTER"] == "UNKNOWN CUSTOMER")
]
importer_select_vol = importer_select["vol"].sum()
percentage_importers_select = (
importer_select_vol * 100 / total_2021
) # use variable total_2021
percentage_importers_select
# ignored exporters - 2021
exporter_group = north2021.groupby(["company_label"]).agg({"vol": "sum"}).reset_index()
exporter_group_select = exporter_group[(exporter_group["company_label"] == "ignored")]
exporter_group_select_vol = exporter_group_select["vol"].sum()
percentage_exporters_select = (
exporter_group_select_vol * 100 / total_2021
) # use variable total_2021
percentage_exporters_select
Comments:
- The soy volumes imported by unidentified importers correspond to approximately 86% of the total exported by North States in 2020 and 78% in 2021
- The soy volumes exported by unidentified exporters correspond to approximately 47% of the total exported by North States in 2020 and 15% in 2021