Ar Territorial Deforestation Co2
s3://trase-storage/argentina/indicators/out/ar_territorial_deforestation_co2.csv
Dbt path: trase_production.main.ar_territorial_deforestation_co2
Explore on Metabase: Full table; summary statistics
Containing yaml file link: trase/data_pipeline/models/argentina/indicators/out/_schema.yml
Model file link: trase/data_pipeline/models/argentina/indicators/out/ar_territorial_deforestation_co2.py
Calls script: trase/data/argentina/indicators/ar_upload_indicators.py
Dbt test runs & lineage: Test results ยท Lineage
Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)
Tags: mock_model, argentina, indicators, out
ar_territorial_deforestation_co2
Description
This model was auto-generated based off .yml 'lineage' files in S3. The DBT model just raises an error; the actual script that created the data lives elsewhere. The script is located at trase/data/argentina/indicators/ar_upload_indicators.py [permalink].
Details
| Column | Type | Description |
|---|---|---|
Models / Seeds
source.trase_duckdb.trase-storage-raw.ar_soy_deforestation_2015_2020_q3_2022_geesource.trase_duckdb.trase-storage-raw.ar_territorial_deforestation_2001_2020_q1_2022_geesource.trase_duckdb.trase-storage-raw.ar_territorial_deforestation_co2_2001_2020_q1_2022_geesource.trase_duckdb.trase-storage-raw.ar_soy_deforestation_co2_2015_2020_q3_2022_gee
Sources
['trase-storage-raw', 'ar_soy_deforestation_2015_2020_q3_2022_gee']['trase-storage-raw', 'ar_territorial_deforestation_2001_2020_q1_2022_gee']['trase-storage-raw', 'ar_territorial_deforestation_co2_2001_2020_q1_2022_gee']['trase-storage-raw', 'ar_soy_deforestation_co2_2015_2020_q3_2022_gee']
"""
Argentina - Indicators - Soy and Territorial
1. Extract data processed on Google Earth Engine from S3,
2. Transform it in the long format,
3. Filter years of interesting for the following release (Q1-2022)
4. Upload it to S3 on appropriate directory `out/`
Last update: 2022-10-11
Quarter: 3
"""
import pandas as pd
from trase.tools.aws.aws_helpers_cached import get_pandas_df_once
from trase.tools.aws.metadata import write_csv_for_upload
def stack_df(path):
"""
Transform GEE data from wide format to long format
"""
data = get_pandas_df_once(path, sep=",", dtype=str)
data = data.set_index(["TRASE_ID", "NAME", "BIOME", "VARIABLE"])
data = data.stack().reset_index().rename(columns={"level_4": "YEAR", 0: "VALUE"})
return data
def main():
dfs_indicators = extract_and_transform_data()
upload_to_s3(dfs_indicators)
def extract_and_transform_data():
keys_ter_def = [
"argentina/indicators/ori/q1_2022/ar_territorial_deforestation_2001_2020_q1_2022_gee.csv",
"argentina/indicators/ori/q1_2022/ar_territorial_deforestation_co2_2001_2020_q1_2022_gee.csv",
]
dfs_indicators = []
for key in keys_ter_def:
df_s3_key = stack_df(key)
dfs_indicators.append(df_s3_key)
value_type = {"VALUE": float}
dfs_indicators = [df.astype(value_type) for df in dfs_indicators]
territorial_def = dfs_indicators[0]
territorial_def_emissions = dfs_indicators[1]
keys_soy_def = [
"argentina/soy/indicators/ori/ar_soy_deforestation_2015_2020_q3_2022_gee.csv",
"argentina/soy/indicators/ori/ar_soy_deforestation_co2_2015_2020_q3_2022_gee.csv",
]
soy_indicators = []
for key in keys_soy_def:
df_s3_key = get_pandas_df_once(key, sep=",")
soy_indicators.append(df_s3_key)
sd_total = pd.melt(
soy_indicators[0],
id_vars=["TRASE_ID", "NAME", "BIOME"],
value_vars=[
"sd_total_2015",
"sd_total_2016",
"sd_total_2017",
"sd_total_2018",
"sd_total_2019",
"sd_total_2020",
],
).rename(columns={"variable": "YEAR", "value": "SOY_DEF_TOTAL"})
sd_total["YEAR"] = sd_total["YEAR"].str[-4:]
sd_annual = pd.melt(
soy_indicators[0],
id_vars=["TRASE_ID", "NAME", "BIOME"],
value_vars=[
"sd_annual_2015",
"sd_annual_2016",
"sd_annual_2017",
"sd_annual_2018",
"sd_annual_2019",
"sd_annual_2020",
],
).rename(columns={"variable": "YEAR", "value": "SOY_DEF_ANNUAL"})
sd_annual["YEAR"] = sd_annual["YEAR"].str[-4:]
soy_def = sd_total.merge(
sd_annual, how="left", on=["TRASE_ID", "NAME", "BIOME", "YEAR"]
)
sd_total_co2 = pd.melt(
soy_indicators[1],
id_vars=["TRASE_ID", "NAME", "BIOME"],
value_vars=[
"sd_total_co2_2015",
"sd_total_co2_2016",
"sd_total_co2_2017",
"sd_total_co2_2018",
"sd_total_co2_2019",
"sd_total_co2_2020",
],
).rename(columns={"variable": "YEAR", "value": "SOY_DEF_EMISSIONS_TOTAL"})
sd_total_co2["YEAR"] = sd_total_co2["YEAR"].str[-4:]
sd_annual_co2 = pd.melt(
soy_indicators[1],
id_vars=["TRASE_ID", "NAME", "BIOME"],
value_vars=[
"sd_annual_co2_2015",
"sd_annual_co2_2016",
"sd_annual_co2_2017",
"sd_annual_co2_2018",
"sd_annual_co2_2019",
"sd_annual_co2_2020",
],
).rename(columns={"variable": "YEAR", "value": "SOY_DEF_EMISSIONS_ANNUAL"})
sd_annual_co2["YEAR"] = sd_annual_co2["YEAR"].str[-4:]
soy_def_emissions = sd_total_co2.merge(
sd_annual_co2, how="left", on=["TRASE_ID", "NAME", "BIOME", "YEAR"]
)
# Adjust columns and types
soy_def_emissions["YEAR"] = soy_def_emissions["YEAR"].str[-4:].astype(int)
year_type = {"YEAR": int}
soy_def = soy_def.astype(year_type, errors="raise")
territorial_def = territorial_def.astype(year_type, errors="raise")
territorial_def_emissions = territorial_def_emissions.astype(
year_type, errors="raise"
)
dfs_indicators = [
soy_def,
soy_def_emissions,
territorial_def,
territorial_def_emissions,
]
return dfs_indicators
def upload_to_s3(dfs_indicators):
# Soy Deforestation
write_csv_for_upload(
dfs_indicators[0],
"argentina/soy/indicators/out/q3_2022/ar_soy_deforestation.csv",
)
# Soy Deforestation CO2 Emissions
write_csv_for_upload(
dfs_indicators[1],
"argentina/soy/indicators/out/q3_2022/ar_soy_deforestation_co2.csv",
)
# Territorial Deforestation
write_csv_for_upload(
dfs_indicators[2],
"argentina/indicators/out/ar_territorial_deforestation.csv",
)
# Territorial Deforestation Emissions
write_csv_for_upload(
dfs_indicators[3],
"argentina/indicators/out/ar_territorial_deforestation_co2.csv",
)
if __name__ == "__main__":
main()
import pandas as pd
def model(dbt, cursor):
dbt.source("trase-storage-raw", "ar_soy_deforestation_2015_2020_q3_2022_gee")
dbt.source(
"trase-storage-raw", "ar_territorial_deforestation_2001_2020_q1_2022_gee"
)
dbt.source(
"trase-storage-raw", "ar_territorial_deforestation_co2_2001_2020_q1_2022_gee"
)
dbt.source("trase-storage-raw", "ar_soy_deforestation_co2_2015_2020_q3_2022_gee")
raise NotImplementedError()
return pd.DataFrame({"hello": ["world"]})