View or edit on GitHub

This page is synchronized from trase/models/deduce/v1/notebooks/postprocessing.ipynb. Last modified on 2026-06-21 06:35 CEST by GitHub Actions. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

import pandas as pd
from glob import glob
import re

PATH_ORIGINAL = "data/ORIGINAL_SPATIAL_ALLOCATION"

PATH_NEW = "data/GLOBAL_DeDuCE_Area_Stats_Export_GADM"

TARGET_COLS_NEW = [
    "CONTINENT",
    "COUNTRY",
    "GID_0",
    "GID_1",
    "GID_2",
    "NAME_0",
    "NAME_1",
    "NAME_2",
    "groups",
]

TARGET_COLS = [
    "CONTINENT",
    "COUNTRY",
    "GID_0",
    "GID_1",
    "GID_2",
    "Class",
] + [f"loss_{x}" for x in range(2001, 2023, 1)]

df_original = pd.concat([pd.read_csv(x) for x in glob(f"{PATH_ORIGINAL}/*.csv")])

df_new = pd.concat(
    [pd.read_csv(x, usecols=TARGET_COLS_NEW) for x in glob(f"{PATH_NEW}/*.csv")]
)

Extract groups

def extract_groups(text):
    if pd.isna(text):
        return []

    extracted_data = []

    class_pattern = re.finditer(r"class_id=([-\d.]+).*?groups=\[(.*?)\]", text)

    for c_match in class_pattern:

        class_id = round(float(c_match.group(1)), 2)
        inner_groups = c_match.group(2)

        ys_pattern = re.finditer(r"year=(\d+),\s*sum=([0-9.eE+-]+)", inner_groups)

        for ys_match in ys_pattern:
            extracted_data.append(
                {
                    "year": int(ys_match.group(1)),
                    "Class": class_id,
                    "area": float(ys_match.group(2)),
                }
            )

    return extracted_data


df_new["parsed_groups"] = df_new["groups"].apply(extract_groups)

df_new = (
    df_new.explode("parsed_groups")
    .dropna(subset=["parsed_groups"])
    .reset_index(drop=True)
)

new_columns = pd.json_normalize(df_new["parsed_groups"])

df_new = pd.concat(
    [df_new.drop(columns=["groups", "parsed_groups"]), new_columns], axis=1
)

Pivot table

df_new["year_label"] = "loss_" + df_new["year"].astype(str)

index_cols = ["CONTINENT", "COUNTRY", "GID_0", "GID_1", "GID_2", "Class"]

df_pivoted = df_new.pivot_table(
    index=index_cols, columns="year_label", values="area", aggfunc="sum", fill_value=0
).reset_index()


for col in TARGET_COLS:
    if col not in df_pivoted.columns:
        df_pivoted[col] = 0.0

df_final = df_pivoted[TARGET_COLS]

df_final.columns.name = None

Export Tables

df_final.to_csv("data/deduce_def_spatial_allocation_new.csv")
df_original.to_csv("data/deduce_def_spatial_allocation_ref.csv")