View or edit on GitHub
This page is synchronized from trase/models/deduce/v1/notebooks/postprocessing.ipynb. Last modified on 2026-06-21 06:35 CEST by GitHub Actions.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
import pandas as pd
from glob import glob
import re
PATH_ORIGINAL = "data/ORIGINAL_SPATIAL_ALLOCATION"
PATH_NEW = "data/GLOBAL_DeDuCE_Area_Stats_Export_GADM"
TARGET_COLS_NEW = [
"CONTINENT",
"COUNTRY",
"GID_0",
"GID_1",
"GID_2",
"NAME_0",
"NAME_1",
"NAME_2",
"groups",
]
TARGET_COLS = [
"CONTINENT",
"COUNTRY",
"GID_0",
"GID_1",
"GID_2",
"Class",
] + [f"loss_{x}" for x in range(2001, 2023, 1)]
df_original = pd.concat([pd.read_csv(x) for x in glob(f"{PATH_ORIGINAL}/*.csv")])
df_new = pd.concat(
[pd.read_csv(x, usecols=TARGET_COLS_NEW) for x in glob(f"{PATH_NEW}/*.csv")]
)
Extract groups
def extract_groups(text):
if pd.isna(text):
return []
extracted_data = []
class_pattern = re.finditer(r"class_id=([-\d.]+).*?groups=\[(.*?)\]", text)
for c_match in class_pattern:
class_id = round(float(c_match.group(1)), 2)
inner_groups = c_match.group(2)
ys_pattern = re.finditer(r"year=(\d+),\s*sum=([0-9.eE+-]+)", inner_groups)
for ys_match in ys_pattern:
extracted_data.append(
{
"year": int(ys_match.group(1)),
"Class": class_id,
"area": float(ys_match.group(2)),
}
)
return extracted_data
df_new["parsed_groups"] = df_new["groups"].apply(extract_groups)
df_new = (
df_new.explode("parsed_groups")
.dropna(subset=["parsed_groups"])
.reset_index(drop=True)
)
new_columns = pd.json_normalize(df_new["parsed_groups"])
df_new = pd.concat(
[df_new.drop(columns=["groups", "parsed_groups"]), new_columns], axis=1
)
Pivot table
df_new["year_label"] = "loss_" + df_new["year"].astype(str)
index_cols = ["CONTINENT", "COUNTRY", "GID_0", "GID_1", "GID_2", "Class"]
df_pivoted = df_new.pivot_table(
index=index_cols, columns="year_label", values="area", aggfunc="sum", fill_value=0
).reset_index()
for col in TARGET_COLS:
if col not in df_pivoted.columns:
df_pivoted[col] = 0.0
df_final = df_pivoted[TARGET_COLS]
df_final.columns.name = None
Export Tables
df_final.to_csv("data/deduce_def_spatial_allocation_new.csv")
df_original.to_csv("data/deduce_def_spatial_allocation_ref.csv")