AddCountries
View or edit on GitHub
This page is synchronized from trase/models/indonesia/palm_oil_complexity/AddCountries.ipynb. Last modified on 2026-05-07 15:52 CEST by Nicolas Martin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
from trase.tools.aws.aws_helpers import *
df_exports = get_pandas_df(
"indonesia/trade/bol/out/BOL_INDONESIA_COMPILED_2013_2022.csv", sep=";"
)
df_exports = df_exports.rename(
columns={
"EXPORTER": "exporter",
"PORT_ID": "port_trase_id",
"KG_NET": "vol",
"TYPE": "commodity",
"DATE": "year",
"IMPORTER": "importer",
"COUNTRY": "country",
}
)
df_exports["port_trase_id"] = df_exports["port_trase_id"].str.replace("P-", "ID-PORT-")
df_exports["vol"] /= 1_000
df_exports["year"] = df_exports["year"].str.slice(0, 4).astype(int)
is_rpo = df_exports["commodity"] == "RPO"
cpo_to_rpo_factor = 1 / 0.95
df_exports.loc[is_rpo, "vol"] = df_exports[is_rpo]["vol"] * cpo_to_rpo_factor
df_exports = df_exports[
[
"exporter",
"importer",
"country",
"commodity",
"vol",
"year",
"port_trase_id",
]
]
df_results = get_pandas_df(
"indonesia/palm_oil/sei_pcs/complexity_analysis/ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS.csv",
sep=",",
)
from trase.tools.sps import stitch_dataframes
for model in ["V0", "V1", "V2"]:
df = stitch_dataframes(
df_results[df_results["model"] == model],
df_exports,
volume_column="vol",
values_left=[
"EMISSIONS_FIRE_ON_PEAT_tCO2eq_EXPOSURE",
"EMISSIONS_SUBSIDENCE_tCO2eq_EXPOSURE",
"GROSS_EMISSIONS_LUC_tCO2eq_EXPOSURE",
"IND_ANNUAL_PALM_DEFORESTATION_EXPOSURE",
"IND_PALM_DEFORESTATION_10A3L_EXPOSURE",
"NET_EMISSIONS_LUC_tCO2eq_EXPOSURE",
"PALM_AREA",
"TOTAL_EMISSIONS_tCO2eq_EXPOSURE",
"product_vol",
],
indicator=True,
)
df[df.pop("_matched") == "both"].to_csv(f"results_{model}.csv", index=False)
import pandas as pd
columns = ["exporter", "port_trase_id", "commodity", "year"]
df = pd.merge(
df_exports.groupby(columns)["vol"]
.sum()
.reset_index()
.rename(columns={"vol": "vol_exports"}),
df_results.groupby(columns + ["model"])["vol"]
.sum()
.reset_index()
.rename(columns={"vol": "vol_results"}),
on=columns,
how="outer",
indicator=True,
)
df = df[df["year"].isin([2018, 2019, 2020])]
df["diff"] = df["vol_exports"] - df["vol_results"]
df[df["diff"].abs() > 1]
df["diff"].max()
22.462960743344063
import pandas as pd
df = pd.concat(
[
pd.read_csv("results_V0.csv"),
pd.read_csv("results_V1.csv"),
pd.read_csv("results_V2.csv"),
]
)
df.columns
Index(['EMISSIONS_FIRE_ON_PEAT_tCO2eq_EXPOSURE',
'EMISSIONS_SUBSIDENCE_tCO2eq_EXPOSURE',
'GROSS_EMISSIONS_LUC_tCO2eq_EXPOSURE',
'IND_ANNUAL_PALM_DEFORESTATION_EXPOSURE',
'IND_PALM_DEFORESTATION_10A3L_EXPOSURE',
'NET_EMISSIONS_LUC_tCO2eq_EXPOSURE', 'PALM_AREA',
'TOTAL_EMISSIONS_tCO2eq_EXPOSURE', 'branch', 'commodity',
'concession_trase_id', 'country', 'exporter', 'exporter_group',
'exporter_trase_id', 'importer', 'kabupaten_trase_id', 'mill_trase_id',
'model', 'port_trase_id', 'product_vol', 'province_trase_id',
'refinery_trase_id', 'vol', 'year'],
dtype='object')
df.to_csv("ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS_WITH_COUNTRIES.csv")
from trase.tools.aws.aws_helpers import *
upload_pandas_df_to_s3(
df,
"indonesia/palm_oil/sei_pcs/complexity_analysis/ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS_WITH_COUNTRIES.csv",
)
---------------------------------------------------------------------------
ClientError Traceback (most recent call last)
Cell In[9], line 3
1 from trase.tools.aws.aws_helpers import *
----> 3 upload_pandas_df_to_s3(
4 df,
5 "indonesia/palm_oil/sei_pcs/complexity_analysis/ID_PALM_OIL_COMPLEXITY_ANALYSIS_RESULTS_WITH_COUNTRIES.csv"
6 )
File /mnt/custom-file-systems/efs/fs-049d752ef37739434/shared/shared_repos/TRASE/trase/tools/aws/aws_helpers.py:256, in upload_pandas_df_to_s3(df, new_key, sep, encoding, float_format, quotechar, bucket_name)
247 csv_buffer = io.StringIO()
248 df.to_csv(
249 csv_buffer,
250 index=False,
(...)
254 quotechar=quotechar,
255 )
--> 256 upload_s3_csv_buffer(csv_buffer, new_key, bucket_name=bucket_name)
257 print(f" - complete")
File /mnt/custom-file-systems/efs/fs-049d752ef37739434/shared/shared_repos/TRASE/trase/tools/aws/aws_helpers.py:216, in upload_s3_csv_buffer(csv_buffer, key_name, s3_client, bucket_name)
214 """Uploads local file to s3"""
215 # s3_resource.meta.client.upload_file(path, settings.bucket, key_name)
--> 216 response = s3_client.put_object(
217 Body=csv_buffer.getvalue(),
218 ContentType="application/vnd.ms-excel",
219 Bucket=bucket_name,
220 Key=key_name,
221 )
222 return response
File ~/.conda/envs/trase-env/lib/python3.10/site-packages/botocore/client.py:569, in ClientCreator._create_api_method.<locals>._api_call(self, *args, **kwargs)
565 raise TypeError(
566 f"{py_operation_name}() only accepts keyword arguments."
567 )
568 # The "self" in this scope is referring to the BaseClient.
--> 569 return self._make_api_call(operation_name, kwargs)
File ~/.conda/envs/trase-env/lib/python3.10/site-packages/botocore/client.py:1023, in BaseClient._make_api_call(self, operation_name, api_params)
1019 error_code = error_info.get("QueryErrorCode") or error_info.get(
1020 "Code"
1021 )
1022 error_class = self.exceptions.from_code(error_code)
-> 1023 raise error_class(parsed_response, operation_name)
1024 else:
1025 return parsed_response
ClientError: An error occurred (EntityTooLarge) when calling the PutObject operation: Your proposed upload exceeds the maximum allowed size