AddCountries

View or edit on GitHub

This page is synchronized from trase/models/indonesia/palm_oil_complexity/AddCountries.ipynb. Last modified on 2026-05-07 15:52 CEST by Nicolas Martin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

from trase.tools.aws.aws_helpers import *

df_exports = get_pandas_df(
    "indonesia/trade/bol/out/BOL_INDONESIA_COMPILED_2013_2022.csv", sep=";"
)
df_exports = df_exports.rename(
    columns={
        "EXPORTER": "exporter",
        "PORT_ID": "port_trase_id",
        "KG_NET": "vol",
        "TYPE": "commodity",
        "DATE": "year",
        "IMPORTER": "importer",
        "COUNTRY": "country",
    }
)
df_exports["port_trase_id"] = df_exports["port_trase_id"].str.replace("P-", "ID-PORT-")
df_exports["vol"] /= 1_000
df_exports["year"] = df_exports["year"].str.slice(0, 4).astype(int)
is_rpo = df_exports["commodity"] == "RPO"
cpo_to_rpo_factor = 1 / 0.95
df_exports.loc[is_rpo, "vol"] = df_exports[is_rpo]["vol"] * cpo_to_rpo_factor

df_exports = df_exports[
    [
        "exporter",
        "importer",
        "country",
        "commodity",
        "vol",
        "year",
        "port_trase_id",
    ]
]

df_results = get_pandas_df(
    "indonesia/palm_oil/sei_pcs/complexity_analysis/ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS.csv",
    sep=",",
)
from trase.tools.sps import stitch_dataframes

for model in ["V0", "V1", "V2"]:
    df = stitch_dataframes(
        df_results[df_results["model"] == model],
        df_exports,
        volume_column="vol",
        values_left=[
            "EMISSIONS_FIRE_ON_PEAT_tCO2eq_EXPOSURE",
            "EMISSIONS_SUBSIDENCE_tCO2eq_EXPOSURE",
            "GROSS_EMISSIONS_LUC_tCO2eq_EXPOSURE",
            "IND_ANNUAL_PALM_DEFORESTATION_EXPOSURE",
            "IND_PALM_DEFORESTATION_10A3L_EXPOSURE",
            "NET_EMISSIONS_LUC_tCO2eq_EXPOSURE",
            "PALM_AREA",
            "TOTAL_EMISSIONS_tCO2eq_EXPOSURE",
            "product_vol",
        ],
        indicator=True,
    )
    df[df.pop("_matched") == "both"].to_csv(f"results_{model}.csv", index=False)
import pandas as pd

columns = ["exporter", "port_trase_id", "commodity", "year"]
df = pd.merge(
    df_exports.groupby(columns)["vol"]
    .sum()
    .reset_index()
    .rename(columns={"vol": "vol_exports"}),
    df_results.groupby(columns + ["model"])["vol"]
    .sum()
    .reset_index()
    .rename(columns={"vol": "vol_results"}),
    on=columns,
    how="outer",
    indicator=True,
)
df = df[df["year"].isin([2018, 2019, 2020])]
df["diff"] = df["vol_exports"] - df["vol_results"]
df[df["diff"].abs() > 1]
df["diff"].max()
22.462960743344063
import pandas as pd

df = pd.concat(
    [
        pd.read_csv("results_V0.csv"),
        pd.read_csv("results_V1.csv"),
        pd.read_csv("results_V2.csv"),
    ]
)
df.columns
Index(['EMISSIONS_FIRE_ON_PEAT_tCO2eq_EXPOSURE',
       'EMISSIONS_SUBSIDENCE_tCO2eq_EXPOSURE',
       'GROSS_EMISSIONS_LUC_tCO2eq_EXPOSURE',
       'IND_ANNUAL_PALM_DEFORESTATION_EXPOSURE',
       'IND_PALM_DEFORESTATION_10A3L_EXPOSURE',
       'NET_EMISSIONS_LUC_tCO2eq_EXPOSURE', 'PALM_AREA',
       'TOTAL_EMISSIONS_tCO2eq_EXPOSURE', 'branch', 'commodity',
       'concession_trase_id', 'country', 'exporter', 'exporter_group',
       'exporter_trase_id', 'importer', 'kabupaten_trase_id', 'mill_trase_id',
       'model', 'port_trase_id', 'product_vol', 'province_trase_id',
       'refinery_trase_id', 'vol', 'year'],
      dtype='object')
df.to_csv("ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS_WITH_COUNTRIES.csv")
from trase.tools.aws.aws_helpers import *

upload_pandas_df_to_s3(
    df,
    "indonesia/palm_oil/sei_pcs/complexity_analysis/ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS_WITH_COUNTRIES.csv",
)
---------------------------------------------------------------------------

ClientError                               Traceback (most recent call last)

Cell In[9], line 3
      1 from trase.tools.aws.aws_helpers import *
----> 3 upload_pandas_df_to_s3(
      4     df,
      5     "indonesia/palm_oil/sei_pcs/complexity_analysis/ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS_WITH_COUNTRIES.csv"
      6 )


File /mnt/custom-file-systems/efs/fs-049d752ef37739434/shared/shared_repos/TRASE/trase/tools/aws/aws_helpers.py:256, in upload_pandas_df_to_s3(df, new_key, sep, encoding, float_format, quotechar, bucket_name)
    247 csv_buffer = io.StringIO()
    248 df.to_csv(
    249     csv_buffer,
    250     index=False,
   (...)
    254     quotechar=quotechar,
    255 )
--> 256 upload_s3_csv_buffer(csv_buffer, new_key, bucket_name=bucket_name)
    257 print(f" - complete")


File /mnt/custom-file-systems/efs/fs-049d752ef37739434/shared/shared_repos/TRASE/trase/tools/aws/aws_helpers.py:216, in upload_s3_csv_buffer(csv_buffer, key_name, s3_client, bucket_name)
    214 """Uploads local file to s3"""
    215 # s3_resource.meta.client.upload_file(path, settings.bucket, key_name)
--> 216 response = s3_client.put_object(
    217     Body=csv_buffer.getvalue(),
    218     ContentType="application/vnd.ms-excel",
    219     Bucket=bucket_name,
    220     Key=key_name,
    221 )
    222 return response


File ~/.conda/envs/trase-env/lib/python3.10/site-packages/botocore/client.py:569, in ClientCreator._create_api_method.<locals>._api_call(self, *args, **kwargs)
    565     raise TypeError(
    566         f"{py_operation_name}() only accepts keyword arguments."
    567     )
    568 # The "self" in this scope is referring to the BaseClient.
--> 569 return self._make_api_call(operation_name, kwargs)


File ~/.conda/envs/trase-env/lib/python3.10/site-packages/botocore/client.py:1023, in BaseClient._make_api_call(self, operation_name, api_params)
   1019     error_code = error_info.get("QueryErrorCode") or error_info.get(
   1020         "Code"
   1021     )
   1022     error_class = self.exceptions.from_code(error_code)
-> 1023     raise error_class(parsed_response, operation_name)
   1024 else:
   1025     return parsed_response


ClientError: An error occurred (EntityTooLarge) when calling the PutObject operation: Your proposed upload exceeds the maximum allowed size