Skip to content

LandConflict Report

View or edit on GitHub

This page is synchronized from trase/data/brazil/social/LandConflict_Report.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

from IPython.display import Image
from IPython.core.display import HTML 
Image("../../../GENERAL/Trase_logo_grey.png", width=500, height=500)

Partnership with APublica

Exploratory (Social) Data Analysis

Workplan: https://docs.google.com/document/d/1WpRgwlfrRdbUQSvsdj7e-a93DoC8YaXEeiKg6pOr5rE/edit

This analysis will seek to do several things: - To identify municipalities in which there has been both commodity deforestation and land conflicts happening concurrently 2013-2020. - To identify municipalities in which there has been both commodity expansion and land conflicts happening concurrently 2013-2020. - To identify the municipalities in which land conflicts have been concentrated 2013-2020 and establish whether or not there has been commodity expansion in these places. - To connect these results with Trase’s supply chain map and identify to what extent commodity buyers are sourcing from these municipalities, and municipalities in which commodity expansion and deforestation have coincided with land conflicts.

import numpy as np
import pandas as pd
import geopandas as gpd
import plotly.express as px
from trase.tools.aws.aws_helpers_cached import get_pandas_df_once

from libpysal.weights.contiguity import Queen
from esda.moran import Moran, Moran_Local, Moran_Local_BV
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
import matplotlib.pyplot as plt
# export this notebook to HTML file without input
# jupyter nbconvert LandConflict_Report.ipynb --no-input --to html --template full
import plotly.io as pio

# Export HTML file interactively
pio.renderers.default='jupyterlab'
# pio.renderers.default='notebook'
class Chart:
    def __init__(self, df, agg_col):
        self.df = df
        self.agg_col = agg_col

    def histogram(self, title: str, label: str):
        fig = px.histogram(self.df[self.df[self.agg_col]>0], x=self.agg_col,
                           marginal="box",
                           title=f"{title}", labels={self.agg_col: label},
                           height=400, width=900)
        fig.update_layout(
            plot_bgcolor = 'rgba(0, 0, 0, 0)',
            paper_bgcolor = 'rgba(0, 0, 0, 0)',
            yaxis_title = "Number of Municipalities"
        )

        return fig

    def bar_year(self, title: str, label: str):
        """Vertical bar chart"""
        df_year = self.df.groupby("YEAR", as_index=False).agg({self.agg_col: "sum"}).sort_values(self.agg_col, ascending=False)
        df_year['category'] = [str(i) for i in df_year.index]
        colors = ["rgb(189,189,189)"] * df_year["YEAR"].nunique()
        colors[0] = "rgb(255,106,95)"
        colors[1] = "rgb(120,120,120)"

        fig = px.bar(df_year, x="YEAR", y=self.agg_col, color='category', 
                     hover_name="YEAR", hover_data={"category": False, "YEAR":False},
                     labels={self.agg_col: label, "YEAR": ""},
                     color_discrete_sequence=colors, title=f"{title}",
                     height=500, width=800)
        fig.update_layout(
            showlegend=False,
            plot_bgcolor = 'rgba(0, 0, 0, 0)',
            paper_bgcolor = 'rgba(0, 0, 0, 0)',
            xaxis = dict(tickmode = 'linear', tick0 = 1),
        )

        return fig

    def bar_year_uf(self, title: str, label: str):
        """Vertical bar chart"""
        df_per_year_uf = self.df[self.df["YEAR"]>=2013].groupby(["YEAR", "SIGLA_UF"], as_index=False).agg({self.agg_col: "sum"})
        df_per_year_uf = df_per_year_uf.assign(UF_YEAR=lambda x: x["SIGLA_UF"] + " - " + x["YEAR"].astype(str))

        fig = px.bar(df_per_year_uf, x="YEAR", y=self.agg_col, color="SIGLA_UF", 
                     barmode="group", color_discrete_sequence=px.colors.qualitative.Pastel,
                     hover_name="UF_YEAR", hover_data={"YEAR":False, "SIGLA_UF":False},
                     height=500, width=900, labels={"YEAR": "", self.agg_col: label, "SIGLA_UF": "State"})

        fig.update_layout(
                plot_bgcolor = 'rgba(0, 0, 0, 0)',
                paper_bgcolor = 'rgba(0, 0, 0, 0)',
                xaxis = dict(tickmode = 'linear', tick0 = 1)
            )

        return fig

    def bar_mun_year(self, title: str, label: str):
        """Horizontal bar chart"""
        apb_mun_sorted = self.df[self.df["YEAR"]>=2013].sort_values(self.agg_col, ascending=False).head(20)

        fig = px.bar(apb_mun_sorted, x=self.agg_col, y="MUN_UF", color="YEAR",
                     color_continuous_scale="Viridis", title=f"{title}", 
                    height=500, width=800, hover_name="MUN_UF", hover_data={"MUN_UF": False},
                     labels={self.agg_col: label, "MUN_UF": "Municipalities", "YEAR": "Year"})
        fig.update_layout(
                plot_bgcolor = 'rgba(0, 0, 0, 0)',
                paper_bgcolor = 'rgba(0, 0, 0, 0)'
        )
        return fig


    def bar_mun_uf_total(self, agg_method: str, title: str, label: str):
        df_mun_group = (self.df[self.df["YEAR"]>=2013]
                         .groupby(["TRASE_ID", "MUN_UF"], as_index=False)
                         .agg({self.agg_col: agg_method})
                         .sort_values(self.agg_col, ascending=False)
                         .head(20)
                    )

        df_mun_group['category'] = [str(i) for i in df_mun_group.index]
        colors = ["rgb(189,189,189)"] * df_mun_group["TRASE_ID"].nunique()
        colors[0] = "rgb(255,106,95)"
        colors[1] = "rgb(120,120,120)"

        fig = px.bar(df_mun_group, x=self.agg_col, y="MUN_UF", 
                    color='category', color_discrete_sequence=colors, title=f"{title}",
                     hover_name="MUN_UF", hover_data={'category':False, "MUN_UF": False},
                    height=600, width=800, 
                     labels={self.agg_col: label, "MUN_UF": "Municipalities"})
        fig.update_layout(
            showlegend=False,
            plot_bgcolor = 'rgba(0, 0, 0, 0)',
            paper_bgcolor = 'rgba(0, 0, 0, 0)',
            yaxis={'categoryorder':'total ascending'}
        )
        return fig

    def subplot_hist(self, title: str):
        import plotly.graph_objects as go
        from plotly.subplots import make_subplots

        fig = make_subplots(rows=2, cols=4)

        df = self.df[self.df[self.agg_col]>0]

        trace0 = go.Histogram(x=df[df["YEAR"]==2020][self.agg_col], autobinx=True, name="2020", marker_color="rgb(255,106,95)")
        trace1 = go.Histogram(x=df[df["YEAR"]==2019][self.agg_col], autobinx=True, name="2019", marker_color="rgb(120,120,120)")
        trace2 = go.Histogram(x=df[df["YEAR"]==2018][self.agg_col], autobinx=True, name="2018", marker_color="rgb(189,189,189)")
        trace3 = go.Histogram(x=df[df["YEAR"]==2017][self.agg_col], autobinx=True, name="2017", marker_color="#EE7A68")
        trace4 = go.Histogram(x=df[df["YEAR"]==2016][self.agg_col], autobinx=True, name="2016", marker_color="#604693")
        trace5 = go.Histogram(x=df[df["YEAR"]==2015][self.agg_col], autobinx=True, name="2015", marker_color="rgb(189,189,189)")
        trace6 = go.Histogram(x=df[df["YEAR"]==2014][self.agg_col], autobinx=True, name="2014", marker_color='#330C73')
        trace7 = go.Histogram(x=df[df["YEAR"]==2013][self.agg_col], autobinx=True, name="2013", marker_color='#EB89B5')

        fig.append_trace(trace0, 1, 1)            
        fig.append_trace(trace1, 1, 2)
        fig.append_trace(trace2, 1, 3)
        fig.append_trace(trace3, 1, 4)
        fig.append_trace(trace4, 2, 1)
        fig.append_trace(trace5, 2, 2)
        fig.append_trace(trace6, 2, 3)
        fig.append_trace(trace7, 2, 4)

        fig.update_layout(
            autosize=False,
            height=500,
            width=900,
            plot_bgcolor = 'rgba(0, 0, 0, 0)',
            paper_bgcolor = 'rgba(0, 0, 0, 0)',
        )

        fig.update_xaxes(title=f"{title}")
        fig.update_yaxes(title="Num. Municipalities")

        return fig.show()
def mun_diff_capacity(df: pd.DataFrame, vol: str):
    """Checks whether the previous values is equal to the current year,
    if so `CAP_DIFF` is False otherwise, True"""
    import numpy as np

    df = df.sort_values(["TRASE_ID", "YEAR"])

    previous_value = df[vol].shift(1)

    unique_trase_id = sorted(df["TRASE_ID"].unique())
    unique_years = sorted(df["YEAR"].unique())

    first_year = unique_years[0]
    not_first_year = df["YEAR"] != first_year

    bigger_previous = df[vol] > previous_value
    equal_previous = df[vol] == previous_value
    lower_previous = df[vol] < previous_value

    for i, j in zip(unique_trase_id, unique_years):
        df.loc[df["YEAR"] == first_year, "AREA_TREND"] = np.NaN
        df.loc[(not_first_year) & (bigger_previous), "AREA_TREND"] = "Increased"
        df.loc[(not_first_year) & (bigger_previous), "AREA_DIFF"] = df[vol] - previous_value
        df.loc[equal_previous, "AREA_TREND"] = "Equal"
        df.loc[(not_first_year) & (lower_previous), "AREA_TREND"] = "Decreased"
        df.loc[(not_first_year) & (lower_previous), "AREA_DIFF"] = df[vol] - previous_value

    return df
class Regression:
    def __init__(self, df):
        self.df = df

    def quantile_normalization(self, col_one, col_two):
        """Implements a quantile normalization
        https://en.wikipedia.org/wiki/Quantile_normalization """

        df = self.df.copy()
        df = df[[col_one, col_two]]

        # compute mean rank
        dic = {}
        for col in df:
            dic.update({col : sorted(df[col])})
        sorted_df = pd.DataFrame(dic)
        rank = sorted_df.mean(axis = 1).tolist()

        # sort
        for col in df:
            t = np.searchsorted(np.sort(df[col]), df[col])
            df[col] = [rank[i] for i in t]

        return df

    def plot_regression(self, col_one, col_two, title, xaxis, yaxis):
        both = Regression.quantile_normalization(self, col_one, col_two)

        fig = px.scatter(y=both[col_one],x=both[col_two], 
                         trendline="ols", opacity=0.65, trendline_scope="overall",
                         title=title, labels={"y": yaxis, "x": xaxis},
                         height=540, width=800)

        fig.update_layout(
                showlegend=False,
                plot_bgcolor = 'rgba(0, 0, 0, 0)',
                paper_bgcolor = 'rgba(0, 0, 0, 0)',
        )


        return fig.show()
apb = get_pandas_df_once("brazil/indicators/in/apublica_social_conflicts_2011_2020_q4_2022.csv", sep=",")
apb = apb[apb["CD_LENTE"]==1].copy().rename(columns={"VALUE": "NUM_CONFLICT"})

soy = (get_pandas_df_once("brazil/soy/indicators/out/q2_2022/br_soy_deforestation.csv", usecols=["TRASE_ID", "YEAR", "ABSOLUTE_SUM_DEF"])
        .rename(columns={"ABSOLUTE_SUM_DEF": "SOY_DEF"}))

beef = (get_pandas_df_once("brazil/beef/indicators/out/q1_2023/br_cattle_deforestation_2012_2020_q1_2023.csv", usecols=["TRASE_ID", "YEAR", "CATTLE_DEF"])
        .rename(columns={"CATTLE_DEF": "BEEF_DEF"}))

soy_prod = get_pandas_df_once("brazil/soy/indicators/out/q2_2022/soy_production.csv", usecols=["TRASE_ID", "YEAR", "HA"])
soy_prod = soy_prod[soy_prod["YEAR"] >= 2013]

mun = get_pandas_df_once("brazil/spatial/BOUNDARIES/ibge/2019/br_municipalities_2019_ibge.csv", 
                         usecols=["TRASE_ID", "MUN_NAME", "SIGLA_UF", "NM_UF"]) #, "AREA_HA"

apb_mun = apb.merge(mun, how="left", on="TRASE_ID")
apb_mun["MUN_UF"] = apb_mun["MUN_NAME"] + " - " + apb_mun["SIGLA_UF"]

mun_geo = gpd.read_file("../Spatial/br_municipalities_2019_ibge.shp")
mun_geo = mun_geo[["TRASE_ID", "geometry"]].copy()

apb_mun_geo = mun_geo.merge(apb, how="left", on="TRASE_ID")

apb_mun_geo_soy = apb_mun_geo.merge(soy, how="inner", on=["TRASE_ID", "YEAR"])

soy_mun = soy.merge(mun, how="left", on=["TRASE_ID"])
soy_mun = soy_mun[(soy_mun["SIGLA_UF"].isin(apb_mun["SIGLA_UF"])) & (~soy_mun["SIGLA_UF"].isin(["AC", "AM", "AP", "RR"]))]
soy_mun = soy_mun.assign(MUN_UF=lambda x: x["MUN_NAME"] + " - " + x["SIGLA_UF"])

beef_mun = beef.merge(mun, how="left", on=["TRASE_ID"])
beef_mun = beef_mun[(beef_mun["SIGLA_UF"].isin(apb_mun["SIGLA_UF"])) & (~beef_mun["SIGLA_UF"].isin(["AC", "AM", "AP", "RR"]))]
beef_mun = beef_mun.assign(MUN_UF=lambda x: x["MUN_NAME"] + " - " + x["SIGLA_UF"])

beef_mun_geo = mun_geo.merge((beef.merge(mun, how="left", on=["TRASE_ID"])), how="left", on="TRASE_ID")

soy_mun_geo = mun_geo.merge((soy.merge(mun, how="left", on=["TRASE_ID"])), how="left", on="TRASE_ID")

beef_soy_mun_geo = soy_mun_geo.merge(beef.merge(mun, how="left", on=["TRASE_ID"]), how="left", on=["TRASE_ID", "MUN_NAME", "SIGLA_UF", "NM_UF", "YEAR"])

APublica - Land Conflict EDA

apb_per_year = apb.groupby(["YEAR"], as_index=False).agg({"NUM_CONFLICT":"sum"})
Chart(apb_per_year, "NUM_CONFLICT").bar_year("", "Number of Conflicts")
Chart(apb_mun, "NUM_CONFLICT").bar_year_uf("", "Number of Conflicts")
print(apb[apb["NUM_CONFLICT"]!=0]["TRASE_ID"].nunique(), "municipalities have land conflict between 2013-2020")
Chart(apb, "NUM_CONFLICT").histogram("", "Number of conflicts")
Chart(apb, "NUM_CONFLICT").subplot_hist("Num. conflicts")
Chart(apb_mun.sort_values("NUM_CONFLICT"), "NUM_CONFLICT").bar_mun_year("Top 20+ municipalities/year from 2013 to 2020", "Number of Conflicts")
Chart(apb_mun, "NUM_CONFLICT").bar_mun_uf_total("sum", "Top 20+ municipalities from 2013 to 2020", "Number of Conflicts")
apb_mun_geo_2020 = apb_mun_geo[apb_mun_geo["YEAR"]==2020]
y = apb_mun_geo_2020["NUM_CONFLICT"].values
w = Queen.from_dataframe(apb_mun_geo_2020)
w.transform = 'r'

moran = Moran(y, w)

moran_loc = Moran_Local(y, w)
lisa_cluster(moran_loc, apb_mun_geo_2020, p=0.05, figsize = (12,12))
print("Spatial autocorrelation on municipalities with Land Conflict in 2020:", round(moran.I, 4))
plt.show()

Trase - Soy Deforestation EDA

print("Municipalities been considered:", soy_mun[(soy_mun["YEAR"]==2017) & (soy_mun["SOY_DEF"]>0)].shape[0]) #.sort_values("SOY_DEF", ascending=False).head(20)
print("States been considered:", sorted(soy_mun["SIGLA_UF"].unique()))
Chart(soy_mun, "SOY_DEF").bar_year("", "Soy deforestation (ha)")
Chart(soy_mun, "SOY_DEF").bar_year_uf("", "Soy deforestation (ha)")
Chart(soy_mun, "SOY_DEF").bar_mun_year("Top 20+ municipalities/year from 2013 to 2020", "Soy deforestation (ha)")
Chart(soy_mun, "SOY_DEF").bar_mun_uf_total("mean", "Balsas (MA) has the biggest soy deforestation <b>mean</b><br>from 2013 to 2020", "Soy deforestation (ha)")
# soy_mun_geo_amz_2013_mt = soy_mun_geo_amz_2013[soy_mun_geo_amz_2013["SIGLA_UF"]=="MT"]
def filter_df_year_uf_moran(df: pd.DataFrame, year:int, filter_uf=None) -> pd.DataFrame:
    df = soy_mun_geo[(soy_mun_geo["YEAR"]==year) & (soy_mun_geo["SIGLA_UF"].isin(apb_mun["SIGLA_UF"]))]

    if filter_uf:
        df = df[df["SIGLA_UF"]==filter_uf]
    else:
        df
    return df

soy_mun_geo_amz_2013 = filter_df_year_uf_moran(soy_mun_geo, 2013)

y = soy_mun_geo_amz_2013["SOY_DEF"].values
w = Queen.from_dataframe(soy_mun_geo_amz_2013)
w.transform = 'r'

moran = Moran(y, w)

moran_loc = Moran_Local(y, w)
lisa_cluster(moran_loc, soy_mun_geo_amz_2013, p=0.05, figsize = (8,8))
print(f"Spatial autocorrelation on municipalities in the Amazon Biome with Soy Deforestation in {soy_mun_geo_amz_2013['YEAR'].astype(int).unique()}:", round(moran.I, 4))
plt.show()
soy_mun_geo_amz_2016 = filter_df_year_uf_moran(soy_mun_geo, 2016)
y = soy_mun_geo_amz_2016["SOY_DEF"].values
w = Queen.from_dataframe(soy_mun_geo_amz_2016)
w.transform = 'r'

moran = Moran(y, w)

moran_loc = Moran_Local(y, w)
lisa_cluster(moran_loc, soy_mun_geo_amz_2016, p=0.05, figsize = (8,8))
print(f"Spatial autocorrelation on municipalities in the Amazon Biome with Soy Deforestation in {soy_mun_geo_amz_2016['YEAR'].astype(int).unique()}:", round(moran.I, 4))
plt.show()
soy_mun_geo_amz_2020 = filter_df_year_uf_moran(soy_mun_geo, 2020)
y = soy_mun_geo_amz_2020["SOY_DEF"].values
w = Queen.from_dataframe(soy_mun_geo_amz_2020)
w.transform = 'r'

moran = Moran(y, w)

moran_loc = Moran_Local(y, w)
lisa_cluster(moran_loc, soy_mun_geo_amz_2020, p=0.05, figsize = (8,8))
print(f"Spatial autocorrelation on municipalities in the Amazon Biome with Soy Deforestation in {soy_mun_geo_amz_2020['YEAR'].astype(int).unique()}:", round(moran.I, 4))
plt.show()

Trase - Beef Deforestation EDA

Few considerations: - Beef deforestation dataset is from 2016-2020 - It is tonnes/hectares, so its a ratio value

print("Municipalities been considered:", beef_mun[(beef_mun["YEAR"]==2020) & (beef_mun["BEEF_DEF"]>0)].shape[0])
print("States been considered:", sorted(beef_mun["SIGLA_UF"].unique()))
Chart(beef_mun, "BEEF_DEF").bar_year("", "Beef deforestation (hton/a)")
Chart(beef_mun, "BEEF_DEF").bar_year_uf("", "Beef deforestation (ton/ha)")
Chart(beef_mun, "BEEF_DEF").bar_mun_year("Top 20+ municipalities/year from 2013 to 2020", "Beef deforestation (ton/ha)")
Chart(beef_mun, "BEEF_DEF").bar_mun_uf_total("mean", "Portel (PA) has the biggest beef deforestation <b>mean</b><br>from 2016 to 2020", "Beef deforestation (ton/ha)")
# beef_mun_geo_amz_2020 = beef_mun_geo[(beef_mun_geo["YEAR"]==2020) & (beef_mun_geo["SIGLA_UF"].isin(apb_mun["SIGLA_UF"]))]
# y = beef_mun_geo_amz_2020["BEEF_DEF"].values
# w = Queen.from_dataframe(beef_mun_geo_amz_2020)
# w.transform = 'r'

# moran = Moran(y, w)

# moran_loc = Moran_Local(y, w)
# lisa_cluster(moran_loc, beef_mun_geo_amz_2020, p=0.05, figsize = (12,12))
# print("Spatial autocorrelation on municipalities in the Amazon Biome with Beef Deforestation in 2020:", round(moran.I, 4))
# plt.show()
# import warnings
# warnings.filterwarnings("ignore")

# beef_mun_geo_2020 = beef_mun_geo[beef_mun_geo["YEAR"]==2020]
# y = beef_mun_geo_2020["BEEF_DEF"].values
# w = Queen.from_dataframe(beef_mun_geo_2020)
# w.transform = 'r'

# moran = Moran(y, w)

# moran_loc = Moran_Local(y, w)
# lisa_cluster(moran_loc, beef_mun_geo_2020, p=0.05, figsize = (12,12))
# print("Spatial autocorrelation on municipalities with Beef Deforestation in 2020:", round(moran.I, 4))
# plt.show()

Municipalities in which there has been both soy deforestation and land conflicts happening concurrently 2013-2020.

apb_soy = apb.merge(soy, how="left", on=["TRASE_ID", "YEAR"])
apb_soy = apb_soy[(apb_soy["NUM_CONFLICT"]!=0) & (~apb_soy["SOY_DEF"].isna()) & (apb_soy["SOY_DEF"]>0.0)]
apb_soy_mun = (apb_soy.merge(mun, how="left", on=["TRASE_ID"])
            [['TRASE_ID', "MUN_NAME", "SIGLA_UF", 'NM_LENTE', 'YEAR', 'NUM_CONFLICT', 'SOY_DEF']])
print("Top 10+ sample:")
apb_soy_mun.sort_values(["NUM_CONFLICT", "SOY_DEF"], ascending=[False, False]).head(10)

Municipalities in which there has been both beef deforestation and land conflicts happening concurrently 2016-2020.

apb_beef = apb.merge(beef, how="left", on=["TRASE_ID", "YEAR"])
apb_beef = apb_beef[(apb_beef["NUM_CONFLICT"]!=0) & (~apb_beef["BEEF_DEF"].isna()) & (apb_beef["BEEF_DEF"]>0.0)]
apb_beef_mun = (apb_beef.merge(mun, how="left", on=["TRASE_ID"])
            [['TRASE_ID', "MUN_NAME", "SIGLA_UF", 'NM_LENTE', 'YEAR', 'NUM_CONFLICT', 'BEEF_DEF']])
print("Top 10+ sample:")
apb_beef_mun.sort_values(["NUM_CONFLICT", "BEEF_DEF"], ascending=[False, False]).head(10)

Municipalities in which there has been both commodity expansion and land conflicts happening concurrently 2013-2020.

commodity expansion = municipalities where soy deforestation area increased from year to another

soy_expansion = mun_diff_capacity(soy_prod, "HA").rename(columns={"HA":"SOY_AREA"})

apb_soy_expansion = soy_expansion.merge(apb_soy_mun, how="left", on=["TRASE_ID", "YEAR"])
apb_soy_expansion = (apb_soy_expansion[(~apb_soy_expansion["NUM_CONFLICT"].isna()) | (~apb_soy_expansion["SOY_DEF"].isna())]
                    [['TRASE_ID', 'MUN_NAME', 'SIGLA_UF', 'YEAR', 'SOY_AREA', 'AREA_TREND', 'AREA_DIFF', 'NM_LENTE', 'NUM_CONFLICT', 'SOY_DEF']])

print("Unique municipalities:", apb_soy_expansion["TRASE_ID"].nunique())

apb_soy_expansion.head(10)

Municipalities in which land conflicts have been concentrated 2013-2020 and
establish whether or not there has been commodity expansion in these places

credit = get_pandas_df_once("brazil/indicators/out/social/br_soy_beef_rural_credit_2013_2023.csv", sep=";", dtype="str")
credit["TRASE_ID"] = "BR-" + credit["geocode"]
credit = credit.rename(columns={"year": "YEAR"}).astype({"YEAR": int, "value": float, "area": float})

credit_g = credit.groupby(["TRASE_ID", "geocode", "YEAR", "mun_name"], as_index=False).agg({"value": "sum", "area": "sum"})
credit_g.sample(5)
conflict_expansion = mun_diff_capacity(apb, "NUM_CONFLICT").rename(columns={"AREA_TREND": "CONFLICT_TREND", "AREA_DIFF": "CONFLICT_DIFF"})

apb_soy_conc = conflict_expansion.merge(soy_expansion, how="left", on=["TRASE_ID", "YEAR"])

apb_soy_conc_mun = apb_soy_conc.merge(mun, how="left", on=["TRASE_ID"])

soy_def_expansion = mun_diff_capacity(soy, "SOY_DEF").rename(columns={"AREA_TREND": "DEF_TREND", "AREA_DIFF": "DEF_DIFF"})
beef_def_expansion = mun_diff_capacity(beef, "BEEF_DEF").rename(columns={"AREA_TREND": "BEEF_DEF_TREND", "AREA_DIFF": "BEEF_DEF_DIFF"})
credit_expansion = mun_diff_capacity(credit_g, "value").rename(columns={"value":"CREDIT", "AREA_TREND": "CREDIT_TREND", "AREA_DIFF": "CREDIT_DIFF"})

apb_soy_def_conc_mun = apb_soy_conc_mun.merge(soy_def_expansion, how="left", on=["TRASE_ID", "YEAR"])

apb_beef_soy_def_conc_mun = apb_soy_def_conc_mun.merge(beef_def_expansion, how="left", on=["TRASE_ID", "YEAR"])

apb_beef_soy_def_conc_mun_credit = apb_beef_soy_def_conc_mun.merge(credit_expansion, how="left", on=["TRASE_ID", "YEAR"])

apb_beef_soy_def_conc_mun_credit = (apb_beef_soy_def_conc_mun_credit[(apb_beef_soy_def_conc_mun_credit["NUM_CONFLICT"]!=0) & (apb_beef_soy_def_conc_mun["YEAR"]>=2013)]
                    [['TRASE_ID', 'MUN_NAME', 'SIGLA_UF', 'YEAR', 'SOY_AREA', 'AREA_TREND', 'AREA_DIFF', 'NM_LENTE', 
                      "CREDIT", "CREDIT_TREND", "CREDIT_DIFF",
                      'BEEF_DEF', 'BEEF_DEF_TREND', 'BEEF_DEF_DIFF',
                      "NUM_CONFLICT", "CONFLICT_TREND", "CONFLICT_DIFF",
                      "SOY_DEF", "DEF_TREND", "DEF_DIFF"]])

area_mode = apb_beef_soy_def_conc_mun_credit.groupby("TRASE_ID")["AREA_TREND"].agg(pd.Series.mode).to_frame().reset_index().rename(columns={"AREA_TREND": "AREA_TREND_MODE"})
apb_beef_soy_def_conc_mun_credit = apb_beef_soy_def_conc_mun_credit.merge(area_mode, how="left", on="TRASE_ID")

def_mode = apb_beef_soy_def_conc_mun_credit.groupby("TRASE_ID")["DEF_TREND"].agg(pd.Series.mode).to_frame().reset_index().rename(columns={"DEF_TREND": "DEF_TREND_MODE"})
apb_beef_soy_def_conc_mun_credit = apb_beef_soy_def_conc_mun_credit.merge(def_mode, how="left", on="TRASE_ID")

conflict_trend = apb_beef_soy_def_conc_mun_credit.groupby("TRASE_ID", as_index=False).agg({"NUM_CONFLICT": "mean"}).rename(columns={"NUM_CONFLICT": "CONFLICT_MEAN"})
apb_beef_soy_def_conc_mun_credit = apb_beef_soy_def_conc_mun_credit.merge(conflict_trend, how="left", on="TRASE_ID")

conflict_mode = apb_beef_soy_def_conc_mun_credit.groupby("TRASE_ID")["CONFLICT_TREND"].agg(pd.Series.mode).to_frame().reset_index().rename(columns={"CONFLICT_TREND": "CONFLICT_MODE"})
apb_beef_soy_def_conc_mun_credit = apb_beef_soy_def_conc_mun_credit.merge(conflict_mode, how="left", on="TRASE_ID")

beef_def_mode = apb_beef_soy_def_conc_mun_credit.groupby("TRASE_ID")["BEEF_DEF_TREND"].agg(pd.Series.mode).to_frame().reset_index().rename(columns={"BEEF_DEF_TREND": "BEEF_DEF_TREND_MODE"})
apb_beef_soy_def_conc_mun_credit = apb_beef_soy_def_conc_mun_credit.merge(beef_def_mode, how="left", on="TRASE_ID")

credit_mode = apb_beef_soy_def_conc_mun_credit.groupby("TRASE_ID")["CREDIT_TREND"].agg(pd.Series.mode).to_frame().reset_index().rename(columns={"CREDIT_TREND": "CREDIT_TREND_MODE"})
apb_beef_soy_def_conc_mun_credit = apb_beef_soy_def_conc_mun_credit.merge(credit_mode, how="left", on="TRASE_ID")


apb_beef_soy_def_conc_mun_credit = (apb_beef_soy_def_conc_mun_credit
                        [['TRASE_ID', 'MUN_NAME', 'SIGLA_UF', 'YEAR', 
                          "NUM_CONFLICT", "CONFLICT_MEAN", "CONFLICT_MODE", #  "CONFLICT_DIFF", "CONFLICT_TREND", 
                          'SOY_AREA',  'AREA_DIFF', 'AREA_TREND_MODE',  #'AREA_TREND',
                          'SOY_DEF',  'DEF_DIFF',  'DEF_TREND_MODE',
                         'BEEF_DEF', 'BEEF_DEF_TREND', 'BEEF_DEF_TREND_MODE',
                          "CREDIT", "CREDIT_TREND", "CREDIT_DIFF", "CREDIT_TREND_MODE"]]) #'DEF_TREND',
all_increased[(all_increased["MUN_NAME"]=="Vilhena") & (all_increased["YEAR"]==2017)][["MUN_NAME", "SIGLA_UF", "NUM_CONFLICT", "SOY_DEF", "BEEF_DEF", "CREDIT"]]
px.histogram(apb_beef_soy_def_conc_mun_credit[apb_beef_soy_def_conc_mun_credit["YEAR"]==2020], x="CREDIT")
Regression(apb_beef_soy_def_conc_mun_credit[apb_beef_soy_def_conc_mun_credit["YEAR"]==2020]).plot_regression("NUM_CONFLICT", "CREDIT", "Conflict x Credit - 2020", "Credit", "Conflict")
px.histogram(apb_beef_soy_def_conc_mun_credit[apb_beef_soy_def_conc_mun_credit["YEAR"]==2020], x="BEEF_DEF")
apb_beef_soy_def_conc_mun_credit[apb_beef_soy_def_conc_mun_credit["YEAR"]==2020]["TRASE_ID"].nunique()
Regression(apb_beef_soy_def_conc_mun_credit[apb_beef_soy_def_conc_mun_credit["YEAR"]==2020]).plot_regression("BEEF_DEF", "CREDIT", "Conflict x Beef Def - 2020", "Credit", "Beef Deforestation")
px.histogram(apb_beef_soy_def_conc_mun_credit[apb_beef_soy_def_conc_mun_credit["YEAR"]==2020], x="SOY_DEF")
Regression(apb_beef_soy_def_conc_mun_credit[apb_beef_soy_def_conc_mun_credit["YEAR"]==2020]).plot_regression("SOY_DEF", "CREDIT", "Conflict x Soy Def - 2020", "Credit", "Soy Deforestation")
all_increased = (apb_beef_soy_def_conc_mun_credit[(apb_beef_soy_def_conc_mun_credit["DEF_TREND_MODE"].str.contains("Increased", na=False)) 
                                & (apb_beef_soy_def_conc_mun_credit["CONFLICT_MODE"].str.contains("Increased", na=False))
                                & (apb_beef_soy_def_conc_mun_credit["BEEF_DEF_TREND_MODE"].str.contains("Increased", na=False))
                                & (apb_beef_soy_def_conc_mun_credit["CREDIT_TREND_MODE"].str.contains("Increased", na=False)) ])

print(all_increased["TRASE_ID"].nunique(),
      "municipalities have increased land conflict with increasing soy deforestation and beef deforestation expansion")
print(", ".join(all_increased["SIGLA_UF"].unique()), ": states where all variables increased")

# print("15 rows sample:")
(all_increased[['MUN_NAME', 'SIGLA_UF', 'YEAR', 'NUM_CONFLICT',
       'CONFLICT_MEAN', 'CONFLICT_MODE', 'SOY_DEF', 'DEF_TREND_MODE', 'BEEF_DEF',
       'BEEF_DEF_TREND', 'BEEF_DEF_TREND_MODE',
               "CREDIT", "CREDIT_TREND", "CREDIT_DIFF"]]
         .head(35))