Qa phase3
View or edit on GitHub
This page is synchronized from trase/models/brazil/beef/qa_beef/archive/qa_phase3.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
from imports.reader import *
from imports.plots_general import *
from imports.functions import *
from imports.preprocess import *
from imports.constants import *
from imports.layout import *
import plotly.express as px
def query_old_data(analysis_col, group_by):
query_old = f"""
SELECT version,
year AS YEAR,
{group_by},
ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW,
ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEF_TOTAL_EXPOSURE
FROM splitgraph."supply-chains-concatenated"
WHERE ref_id = {REF_ID_SEI_OLD}
GROUP BY 1,2,3
ORDER BY 1
"""
df_old = query_splitgraph(query_old)
df_old["PERCENTAGE"] = calculate_percentage(df_old, ["YEAR"], analysis_col)
df_old = order_df_per_column(df_old, group_by, "YEAR")
return df_old
def query_new_data(analysis_col, group_by, extra_restriction):
operator_vol = "ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW,"
operator_def = "ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEF_TOTAL_EXPOSURE"
operator = " ".join([operator_vol, operator_def])
extra_restrictions = "AND year in (2015,2016,2017)" if extra_restriction else ""
df_new = query_for_plots(operator, group_by, extra_restrictions).reset_index()
df_new["PERCENTAGE"] = calculate_percentage(df_new, ["YEAR"], analysis_col)
df_new = order_df_per_column(df_new, group_by, "YEAR")
return df_new
def return_col_names_and_title(analysis_col):
if analysis_col == "DEF_TOTAL_EXPOSURE":
col_name_list = [
"Deforestation exposure version 2.2.0",
"Deforestation exposure version 2.1.0",
"Percentage deforestation exposure version 2.2.0",
"Percentage deforestation exposure version 2.1.0",
]
title = (
"SEI PCS versions comparision in Deforestation Exposure per state per year"
)
else:
col_name_list = [
"Volume raw version 2.2.0",
"Volume raw version 2.1.0",
"Percentage volume raw version 2.2.0",
"Percentage volume raw version 2.1.0",
]
title = "SEI PCS versions comparision in Volume Raw per state per year"
return col_name_list, title
def plot_version_comparision(analysis_col):
group_by = "REGION_PRODUCTION_2"
dfs = [
query_new_data(analysis_col, group_by, True),
query_old_data(analysis_col, group_by),
]
col_list = [analysis_col, "PERCENTAGE"]
col_name_list, title = return_col_names_and_title(analysis_col)
nrows = 2
ncols = 2
fig = make_subplots(
rows=nrows, cols=ncols, subplot_titles=col_name_list, shared_yaxes=True
)
figures = np.empty((nrows, 0)).tolist()
for i in range(nrows):
for dataframe in dfs:
figures[i].append(
px.bar(
dataframe,
x="YEAR",
y=col_list[i],
color=group_by,
color_discrete_map=COLORS[group_by],
text=group_by,
)
)
# add the traces for each subplot to the main figure
fig = transform_plotly_px_to_go(figures, fig)
# update the layout of the figure
fig = update_layout(fig, nrows, title)
fig.show()
def plot_bar_chart(df, group_by, col_list):
nrows = 1
ncols = 2
fig = make_subplots(
rows=nrows,
cols=ncols,
subplot_titles=[" ".join(title.lower().split("_")) for title in col_list],
)
figures = np.empty((nrows, 0)).tolist()
for i in range(nrows):
for j in range(len(col_list)):
figures[i].append(
px.bar(
df,
x="YEAR",
y=col_list[j],
color=group_by,
color_discrete_map=COLORS[group_by],
text=group_by,
)
)
# add the traces for each subplot to the main figure
fig = transform_plotly_px_to_go(figures, fig)
# update the layout of the figure
fig = update_layout(fig, nrows, " ".join(group_by.lower().split("_")))
fig.show()
def plot_single_deforestation_exposure_bar_plot(group_by):
analysis_col = "DEF_TOTAL_EXPOSURE"
df = query_new_data(analysis_col, group_by, False)
fig = px.bar(
df,
x="YEAR",
y=analysis_col,
color=group_by,
color_discrete_map=COLORS[group_by],
text=group_by,
)
fig.update_layout(
title="Deforestation Exposure per state per year",
xaxis_title="Year",
yaxis_title="Deforestation Exposure (ha)",
showlegend=False,
)
fig.show()
def plot_multiple_deforestation_exposure_bar_plot(group_by):
operator_vol = "ROUND(SUM(commodity_deforestation_total_exposure / volume )::NUMERIC, 2) AS DEFORESTATION_PER_VOLUME_RAW,"
operator_def = "ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEFORESTATION_TOTAL_EXPOSURE"
operator = " ".join([operator_vol, operator_def])
df = query_for_plots(operator, group_by)
key_list = find_key_list_in_df(df, group_by, "DEFORESTATION_TOTAL_EXPOSURE")
df[group_by] = np.where(df[group_by].isin(key_list[:5]), df[group_by], "OTHER")
df = df.groupby(["YEAR", group_by]).sum().reset_index()
plot_bar_chart(
df, group_by, ["DEFORESTATION_TOTAL_EXPOSURE", "DEFORESTATION_PER_VOLUME_RAW"]
)
plot_version_comparision("DEF_TOTAL_EXPOSURE")
plot_version_comparision("VOLUME_RAW")
group_by = "ECONOMIC_BLOC"
operator = "ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW"
df = query_for_plots(operator, group_by)
df["ECONOMIC_BLOC"] = np.where(
df["ECONOMIC_BLOC"].isin(EU_LIST), "E.U.", df["ECONOMIC_BLOC"]
)
df = df.groupby(["YEAR", "ECONOMIC_BLOC"]).sum().reset_index()
# Add the percentage column
df["MARKET_SHARE"] = calculate_percentage(df, ["YEAR"], "VOLUME_RAW")
key_list = find_key_list_in_df(df, group_by)
df["ECONOMIC_BLOC"] = np.where(
df["ECONOMIC_BLOC"].isin(key_list[:9]), df["ECONOMIC_BLOC"], "OTHER"
)
df = df.groupby(["YEAR", "ECONOMIC_BLOC"]).sum().reset_index()
plot_bar_chart(df, group_by, ["VOLUME_RAW", "MARKET_SHARE"])
group_by = "EXPORTER_GROUP"
operator = "ROUND(SUM(volume )::NUMERIC, 2) AS VOLUME_RAW"
df = query_for_plots(operator, group_by)
# Add the percentage column
df["MARKET_SHARE"] = calculate_percentage(df, ["YEAR"], "VOLUME_RAW")
key_list = find_key_list_in_df(df, group_by)
df[group_by] = np.where(df[group_by].isin(key_list[:5]), df[group_by], "OTHER")
df = df.groupby(["YEAR", group_by]).sum().reset_index()
plot_bar_chart(df, group_by, ["VOLUME_RAW", "MARKET_SHARE"])
analysis_col = "DEF_TOTAL_EXPOSURE"
operator = "ROUND(SUM(commodity_deforestation_total_exposure )::NUMERIC, 2) AS DEF_TOTAL_EXPOSURE"
df = query_for_plots(operator).reset_index()
fig = px.bar(df, x="YEAR", y=analysis_col, color_discrete_sequence=[MAIN_COLOR])
fig.update_layout(
title="Deforestation Exposure per year",
xaxis_title="Year",
yaxis_title="Deforestation Exposure (ha)",
showlegend=False,
)
fig.show()
plot_single_deforestation_exposure_bar_plot("REGION_PRODUCTION_2")
plot_single_deforestation_exposure_bar_plot("BIOME")
plot_multiple_deforestation_exposure_bar_plot("EXPORTER_GROUP")
plot_multiple_deforestation_exposure_bar_plot("IMPORTER_GROUP")
plot_multiple_deforestation_exposure_bar_plot("ECONOMIC_BLOC")