ComplexityAnalysis

View or edit on GitHub

This page is synchronized from trase/models/indonesia/palm_oil/ComplexityAnalysis.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

import pandas as pd

from trase.tools.aws.aws_helpers import get_pandas_df

df_list = []

for year in [2018, 2019, 2020]:
    for model_complexity in ["ZERO", "NULL", "SIMPLE", "FULL"]:
        df = get_pandas_df(
            f"indonesia/palm_oil/sei_pcs/v1.2.2/INDONESIA_PALM_OIL_COMPLEXITY_{model_complexity}_{year}_WITH_EXPORTS.csv",
            sep=";",
        )
        df = df.astype(
            {
                "vol": float,
                "def_10y_annual": float,
                "def_10y_total": float,
                "def_annual": float,
                "def_total": float,
            }
        )
        df["model_complexity"] = model_complexity
        df_list.append(df)

df = pd.concat(df_list)

df["model_complexity"] = df["model_complexity"].apply(
    lambda x: {"ZERO": 0, "NULL": 1, "SIMPLE": 2, "FULL": 3}[x]
)

df_exporters = (
    df.groupby(["year", "exporter", "model_complexity", "branch"])
    .agg(
        {
            "vol": "sum",
            "def_10y_annual": "sum",
            "def_10y_total": "sum",
            "def_annual": "sum",
            "def_total": "sum",
        }
    )
    .reset_index()
)

df_exporter_groups = (
    df.groupby(["year", "exporter_group", "model_complexity", "branch"])
    .agg(
        {
            "vol": "sum",
            "def_10y_annual": "sum",
            "def_10y_total": "sum",
            "def_annual": "sum",
            "def_total": "sum",
        }
    )
    .reset_index()
)

df_countries = (
    df.groupby(["year", "country", "model_complexity", "branch"])
    .agg(
        {
            "vol": "sum",
            "def_10y_annual": "sum",
            "def_10y_total": "sum",
            "def_annual": "sum",
            "def_total": "sum",
        }
    )
    .reset_index()
)
df_countries_unknown = df_countries[df_countries["branch"] == "UNKNOWN"]

df_countries_known = (
    df_countries[df_countries["branch"] != "UNKNOWN"]
    .drop(columns="branch")
    .groupby(["year", "country", "model_complexity"])
    .agg(
        {
            "vol": "sum",
            "def_10y_annual": "sum",
            #        "def_10y_total": "sum",
            "def_annual": "sum",
            #        "def_total": "sum"
        }
    )
    .reset_index()
)
df_countries_known["branch"] = "KNOWN"

df = pd.concat(
    [
        df_countries_known,
        #     df_countries_unknown
    ]
)
df = df[df["country"].str.startswith("JAPAN")]
df = df.sort_values(["country", "year", "model_complexity", "branch"]).set_index(
    ["country", "year", "model_complexity"]
)
df
vol def_10y_annual def_annual branch
country year model_complexity
JAPAN 2018 0 341505.09 1267.980794 396.674900 KNOWN
1 341196.97 672.975898 82.104033 KNOWN
2 341500.86 3032.475730 69.197272 KNOWN
3 341539.21 2236.340091 79.658992 KNOWN
2019 0 293822.22 946.310965 362.404690 KNOWN
1 255275.57 493.237745 512.774777 KNOWN
2 277230.97 1443.758834 241.610085 KNOWN
3 293865.59 986.383532 260.125396 KNOWN
2020 0 316566.92 1025.844013 151.482659 KNOWN
1 316586.25 451.267856 16.086457 KNOWN
2 237438.17 325.960636 7.543745 KNOWN
3 250907.96 427.701791 22.264914 KNOWN
df_exporters_unknown = df_exporters[df_exporters["branch"] == "UNKNOWN"]

df_exporters_known = (
    df_exporters[df_exporters["branch"] != "UNKNOWN"]
    .drop(columns="branch")
    .groupby(["year", "exporter", "model_complexity"])
    .agg(
        {
            "vol": "sum",
            "def_10y_annual": "sum",
            #        "def_10y_total": "sum",
            "def_annual": "sum",
            #        "def_total": "sum"
        }
    )
    .reset_index()
)
df_exporters_known["branch"] = "KNOWN"

df = pd.concat(
    [
        df_exporters_known,
        #     df_exporters_unknown
    ]
)
df = df[df["exporter"].str.startswith("S")]
df = df.sort_values(["exporter", "year", "model_complexity", "branch"]).set_index(
    ["exporter", "year", "model_complexity"]
)
df
vol def_10y_annual def_annual branch
exporter year model_complexity
SAHABAT LENGIS HIJAU 2020 0 113.10 0.370678 0.037233 KNOWN
3 115.25 0.528939 0.070684 KNOWN
SAHABAT MEWAH & MAKMUR 2018 0 15998.40 59.400195 18.014147 KNOWN
1 15999.97 0.000000 0.718911 KNOWN
2 16000.00 0.000000 0.000000 KNOWN
... ... ... ... ... ... ...
SYNERGY OIL NUSANTARA 2019 3 536154.03 2771.808842 118.561743 KNOWN
2020 0 435189.57 1410.363026 206.530253 KNOWN
1 435354.27 227.791945 0.000000 KNOWN
2 477410.04 2895.509512 0.000000 KNOWN
3 491406.88 3110.133992 3.347029 KNOWN

359 rows × 4 columns

df_exporter_groups_unknown = df_exporter_groups[
    df_exporter_groups["branch"] == "UNKNOWN"
]

df_exporter_groups_known = (
    df_exporter_groups[df_exporter_groups["branch"] != "UNKNOWN"]
    .drop(columns="branch")
    .groupby(["year", "exporter_group", "model_complexity"])
    .agg(
        {
            "vol": "sum",
            "def_10y_annual": "sum",
            #        "def_10y_total": "sum",
            "def_annual": "sum",
            #        "def_total": "sum"
        }
    )
    .reset_index()
)
df_exporter_groups_known["branch"] = "KNOWN"

df = pd.concat(
    [
        df_exporter_groups_known,
        #     df_exporter_groups_unknown
    ]
)
df = df[df["exporter_group"].str.startswith("P")]
df = df.sort_values(["exporter_group", "year", "model_complexity", "branch"]).set_index(
    ["exporter_group", "year", "model_complexity"]
)
df
vol def_10y_annual def_annual branch
exporter_group year model_complexity
PANCA PUTRA GANDA 2018 0 240593.75 893.286671 280.873073 KNOWN
1 240594.01 586.372993 10.999991 KNOWN
2 240594.00 395.259944 97.181277 KNOWN
3 240594.01 560.276019 82.365313 KNOWN
2019 0 416239.04 1340.518192 515.570330 KNOWN
1 416239.17 940.343162 922.126925 KNOWN
2 416239.11 773.960536 2973.421187 KNOWN
3 416239.09 528.482255 3116.734762 KNOWN
2020 0 418404.09 1355.812079 202.251464 KNOWN
1 418404.69 794.178480 0.000000 KNOWN
2 418404.51 387.973231 23.515611 KNOWN
3 418404.53 555.824898 1.568968 KNOWN
PASIFIK AGRO SENTOSA (PAS) 2019 0 3365.04 10.840540 4.029082 KNOWN
1 3367.83 92.940920 0.000000 KNOWN
2 3367.82 82.323675 0.000000 KNOWN
3 3367.82 88.134079 0.000000 KNOWN
2020 0 1585.47 5.142706 0.708508 KNOWN
1 1588.71 0.000000 0.000000 KNOWN
2 1588.71 0.000000 0.000000 KNOWN
3 1588.71 0.000000 0.000000 KNOWN
PERMATA HIJAU 2018 0 1527196.79 5670.490685 1763.222758 KNOWN
1 1527401.87 2325.781596 181.682395 KNOWN
2 97897.06 20.612302 0.000000 KNOWN
3 1547710.66 8753.916734 3647.702459 KNOWN
2019 0 449819.13 1448.799224 551.053756 KNOWN
1 449952.60 856.976153 368.046090 KNOWN
2 449950.35 419.467584 282.448911 KNOWN
3 449948.80 398.098374 325.899186 KNOWN
2020 0 709500.19 2299.328403 336.030238 KNOWN
1 709725.56 2471.784160 125.778456 KNOWN
2 709720.27 632.328033 2.038150 KNOWN
3 709722.03 702.393015 3.244813 KNOWN
POSCO INTERNATIONAL 2018 2 4.04 0.146044 0.003477 KNOWN
3 4.03 0.150697 0.000000 KNOWN
2019 2 26875.79 944.759116 0.000000 KNOWN
3 26875.80 922.234492 0.000000 KNOWN
2020 2 39205.69 994.913262 0.000000 KNOWN
3 39205.69 971.531047 0.000000 KNOWN
PTPN III 2018 0 217357.38 807.047747 249.261608 KNOWN
1 217376.17 558.481183 215.956921 KNOWN
2 217376.22 162.814781 0.784748 KNOWN
3 217376.29 120.187062 0.712854 KNOWN
2019 0 407368.54 1312.024212 500.104727 KNOWN
1 407439.95 839.696237 190.538229 KNOWN
2 291524.02 195.828662 71.341497 KNOWN
3 408727.36 641.303181 233.426543 KNOWN
2020 0 38531.70 124.917589 17.556467 KNOWN
1 38594.72 0.000000 0.000000 KNOWN
2 38594.56 0.230563 0.000000 KNOWN
3 38594.62 0.000000 0.000000 KNOWN