Qa phase3

View or edit on GitHub

This page is synchronized from trase/models/brazil/beef/qa_beef/archive/qa_phase3.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

from imports.reader import *
from imports.plots_general import *
from imports.functions import *
from imports.preprocess import *
from imports.constants import *
from imports.layout import *
import plotly.express as px

def query_old_data(analysis_col, group_by):

    query_old = f"""
                        SELECT version,
                        year AS YEAR,
                        {group_by},
                        ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW,
                        ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEF_TOTAL_EXPOSURE
                        FROM splitgraph."supply-chains-concatenated"
                        WHERE ref_id = {REF_ID_SEI_OLD}
                        GROUP BY 1,2,3
                        ORDER BY 1
                        """
    df_old = query_splitgraph(query_old)
    df_old["PERCENTAGE"] = calculate_percentage(df_old, ["YEAR"], analysis_col)
    df_old = order_df_per_column(df_old, group_by, "YEAR")
    return df_old


def query_new_data(analysis_col, group_by, extra_restriction):

    operator_vol = "ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW,"
    operator_def = "ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEF_TOTAL_EXPOSURE"
    operator = " ".join([operator_vol, operator_def])
    extra_restrictions = "AND year in (2015,2016,2017)" if extra_restriction else ""
    df_new = query_for_plots(operator, group_by, extra_restrictions).reset_index()
    df_new["PERCENTAGE"] = calculate_percentage(df_new, ["YEAR"], analysis_col)
    df_new = order_df_per_column(df_new, group_by, "YEAR")
    return df_new


def return_col_names_and_title(analysis_col):
    if analysis_col == "DEF_TOTAL_EXPOSURE":
        col_name_list = [
            "Deforestation exposure version 2.2.0",
            "Deforestation exposure version 2.1.0",
            "Percentage deforestation exposure version 2.2.0",
            "Percentage deforestation exposure version 2.1.0",
        ]
        title = (
            "SEI PCS versions comparision in Deforestation Exposure per state per year"
        )
    else:
        col_name_list = [
            "Volume raw version 2.2.0",
            "Volume raw version 2.1.0",
            "Percentage volume raw version 2.2.0",
            "Percentage volume raw version 2.1.0",
        ]
        title = "SEI PCS versions comparision in Volume Raw per state per year"
    return col_name_list, title


def plot_version_comparision(analysis_col):
    group_by = "REGION_PRODUCTION_2"
    dfs = [
        query_new_data(analysis_col, group_by, True),
        query_old_data(analysis_col, group_by),
    ]
    col_list = [analysis_col, "PERCENTAGE"]
    col_name_list, title = return_col_names_and_title(analysis_col)
    nrows = 2
    ncols = 2
    fig = make_subplots(
        rows=nrows, cols=ncols, subplot_titles=col_name_list, shared_yaxes=True
    )
    figures = np.empty((nrows, 0)).tolist()
    for i in range(nrows):
        for dataframe in dfs:
            figures[i].append(
                px.bar(
                    dataframe,
                    x="YEAR",
                    y=col_list[i],
                    color=group_by,
                    color_discrete_map=COLORS[group_by],
                    text=group_by,
                )
            )
    # add the traces for each subplot to the main figure
    fig = transform_plotly_px_to_go(figures, fig)
    # update the layout of the figure
    fig = update_layout(fig, nrows, title)
    fig.show()


def plot_bar_chart(df, group_by, col_list):
    nrows = 1
    ncols = 2
    fig = make_subplots(
        rows=nrows,
        cols=ncols,
        subplot_titles=[" ".join(title.lower().split("_")) for title in col_list],
    )

    figures = np.empty((nrows, 0)).tolist()
    for i in range(nrows):
        for j in range(len(col_list)):
            figures[i].append(
                px.bar(
                    df,
                    x="YEAR",
                    y=col_list[j],
                    color=group_by,
                    color_discrete_map=COLORS[group_by],
                    text=group_by,
                )
            )
    # add the traces for each subplot to the main figure
    fig = transform_plotly_px_to_go(figures, fig)

    # update the layout of the figure
    fig = update_layout(fig, nrows, " ".join(group_by.lower().split("_")))
    fig.show()


def plot_single_deforestation_exposure_bar_plot(group_by):
    analysis_col = "DEF_TOTAL_EXPOSURE"
    df = query_new_data(analysis_col, group_by, False)
    fig = px.bar(
        df,
        x="YEAR",
        y=analysis_col,
        color=group_by,
        color_discrete_map=COLORS[group_by],
        text=group_by,
    )
    fig.update_layout(
        title="Deforestation Exposure per state per year",
        xaxis_title="Year",
        yaxis_title="Deforestation Exposure (ha)",
        showlegend=False,
    )
    fig.show()


def plot_multiple_deforestation_exposure_bar_plot(group_by):
    operator_vol = "ROUND(SUM(commodity_deforestation_total_exposure / volume )::NUMERIC, 2) AS DEFORESTATION_PER_VOLUME_RAW,"
    operator_def = "ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEFORESTATION_TOTAL_EXPOSURE"
    operator = " ".join([operator_vol, operator_def])
    df = query_for_plots(operator, group_by)
    key_list = find_key_list_in_df(df, group_by, "DEFORESTATION_TOTAL_EXPOSURE")
    df[group_by] = np.where(df[group_by].isin(key_list[:5]), df[group_by], "OTHER")
    df = df.groupby(["YEAR", group_by]).sum().reset_index()
    plot_bar_chart(
        df, group_by, ["DEFORESTATION_TOTAL_EXPOSURE", "DEFORESTATION_PER_VOLUME_RAW"]
    )

plot_version_comparision("DEF_TOTAL_EXPOSURE")

plot_version_comparision("VOLUME_RAW")

group_by = "ECONOMIC_BLOC"
operator = "ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW"
df = query_for_plots(operator, group_by)
df["ECONOMIC_BLOC"] = np.where(
    df["ECONOMIC_BLOC"].isin(EU_LIST), "E.U.", df["ECONOMIC_BLOC"]
)
df = df.groupby(["YEAR", "ECONOMIC_BLOC"]).sum().reset_index()
# Add the percentage column
df["MARKET_SHARE"] = calculate_percentage(df, ["YEAR"], "VOLUME_RAW")
key_list = find_key_list_in_df(df, group_by)
df["ECONOMIC_BLOC"] = np.where(
    df["ECONOMIC_BLOC"].isin(key_list[:9]), df["ECONOMIC_BLOC"], "OTHER"
)
df = df.groupby(["YEAR", "ECONOMIC_BLOC"]).sum().reset_index()

plot_bar_chart(df, group_by, ["VOLUME_RAW", "MARKET_SHARE"])

group_by = "EXPORTER_GROUP"
operator = "ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW"
df = query_for_plots(operator, group_by)
# Add the percentage column
df["MARKET_SHARE"] = calculate_percentage(df, ["YEAR"], "VOLUME_RAW")
key_list = find_key_list_in_df(df, group_by)
df[group_by] = np.where(df[group_by].isin(key_list[:5]), df[group_by], "OTHER")
df = df.groupby(["YEAR", group_by]).sum().reset_index()
plot_bar_chart(df, group_by, ["VOLUME_RAW", "MARKET_SHARE"])

analysis_col = "DEF_TOTAL_EXPOSURE"
operator = "ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEF_TOTAL_EXPOSURE"
df = query_for_plots(operator).reset_index()
fig = px.bar(df, x="YEAR", y=analysis_col, color_discrete_sequence=[MAIN_COLOR])
fig.update_layout(
    title="Deforestation Exposure per year",
    xaxis_title="Year",
    yaxis_title="Deforestation Exposure (ha)",
    showlegend=False,
)
fig.show()

plot_single_deforestation_exposure_bar_plot("REGION_PRODUCTION_2")

plot_single_deforestation_exposure_bar_plot("BIOME")

plot_multiple_deforestation_exposure_bar_plot("EXPORTER_GROUP")

plot_multiple_deforestation_exposure_bar_plot("IMPORTER_GROUP")

plot_multiple_deforestation_exposure_bar_plot("ECONOMIC_BLOC")