Runbook
View or edit on GitHub
This page is synchronized from trase/models/argentina/soy/Runbook.ipynb. Last modified on 2025-12-13 00:30 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
from tqdm import tqdm
from trase.tools.pcs import *
# from trase.runbook.traders.ingest_zdcs import calculate_percentage_traded_under_zdc
EMBEDDING_PARAMETERS_LIST = [
EmbedNodeIndFlowInd(
node_role="EXPORTER",
node_ind_name="FOREST_500_SOY",
node_ind_reference="TRADERS INDICATORS V3 FULL",
),
EmbedNodeQualFlowQual(
node_role="DEPARTMENT OF PRODUCTION",
node_qual_name="BIOME",
node_qual_reference="ARGENTINA INDICATORS V1 FULL",
),
EmbedNodeTraseIdFlowQual(node_role="DEPARTMENT OF PRODUCTION"),
EmbedNodeQuantFlowQuant(
node_role="DEPARTMENT OF PRODUCTION",
node_quant_name="SOY_AREA",
production_node_quant_name="SOY_TN",
node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
flow_quant_name="LAND_USE",
),
EmbedNodeQuantFlowQuant(
node_role="DEPARTMENT OF PRODUCTION",
node_quant_name="SOY_DEFORESTATION_5_YEAR_TOTAL",
flow_quant_name="SOY_DEFORESTATION_5_YEAR_TOTAL_EXPOSURE",
node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
production_node_quant_name="SOY_TN",
),
EmbedNodeQuantFlowQuant(
node_role="DEPARTMENT OF PRODUCTION",
node_quant_name="SOY_DEFORESTATION_5_YEAR_ANNUAL",
flow_quant_name="SOY_DEFORESTATION_5_YEAR_ANNUAL_EXPOSURE",
node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
production_node_quant_name="SOY_TN",
),
EmbedNodeQuantFlowQuant(
node_role="DEPARTMENT OF PRODUCTION",
node_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_TOTAL",
node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
flow_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_TOTAL_EXPOSURE",
production_node_quant_name="SOY_TN",
production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
),
EmbedNodeQuantFlowQuant(
node_role="DEPARTMENT OF PRODUCTION",
node_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_ANNUAL",
node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
flow_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_ANNUAL_EXPOSURE",
production_node_quant_name="SOY_TN",
production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
),
]
ZDC_NETWORK_REFERENCE_TITLE = "ARGENTINA SOY ZERO DEFORESTATION COMMITMENTS V3"
INDICATORS_REF_ID = get_node_attributes_reference_id("ARGENTINA SOY INDICATORS V3 FULL")
@parallel_enabled
def process(supply_chain, new_ref_id):
supply_chain = add_biome(supply_chain)
supply_chain = add_economic_bloc(supply_chain)
supply_chain, traders_hierarchy = fix_supply_chain_traders(supply_chain)
traders_hierarchy.to_db(new_ref_id)
supply_chain = embed(
supply_chain=supply_chain, embedding_parameters_list=EMBEDDING_PARAMETERS_LIST
)
supply_chain = embed_trader_zdcs(supply_chain, ZDC_NETWORK_REFERENCE_TITLE)
cur = supply_chain.cur
supply_chain = remove_supply_chain_labels(supply_chain)
# anonymize department for flows:
# - smaller than 28 tons
# - with <3 exporters (AR privacy law)
department_traders = {}
for flow in supply_chain.flows:
department_id = flow.path[2].id
if department_id not in department_traders:
department_traders[department_id] = []
trader_group_id = flow.path[6].id
if trader_group_id not in department_traders[department_id]:
department_traders[department_id].append(trader_group_id)
departments_to_anonymize = [
department_id
for department_id, trader_group_ids in department_traders.items()
if len(trader_group_ids) < 3
]
supply_chain_anonymized = SupplyChain(
[
flow
for flow in supply_chain.flows
if flow.raw_vol < 28 or flow.path[2].id in departments_to_anonymize
],
cur=cur,
)
supply_chain = SupplyChain(
[
flow
for flow in supply_chain.flows
if flow.raw_vol >= 28 and flow.path[2].id not in departments_to_anonymize
],
cur=cur,
)
supply_chain_anonymized.remove_quals(["BIOME", "TRASE_GEOCODE"])
unknown_department = get_node(
find_node_by_trase_id("AR-XXXXX", cur=cur), "DEPARTMENT OF PRODUCTION", cur=cur
)
unknown_biome = get_node(
find_node_by_trase_id("AR-BIO-X", cur=cur), "BIOME", cur=cur
)
for flow in supply_chain_anonymized.flows:
flow.path[1] = unknown_biome
flow.path[2] = unknown_department
supply_chain_anonymized = consolidate(supply_chain_anonymized)
supply_chain.to_db(new_ref_id)
supply_chain_anonymized.to_db(new_ref_id)
def main(only_year=None):
ref_id = get_flows_reference_id("SEI-PCS ARGENTINA SOY V1.1.0 RAW")
new_ref_id = insert_child_flows_reference(
parent_ref_id=ref_id,
version="1.1.1",
version_type=None,
title_suffix="TEST SINGLE SCRIPT RUNBOOK",
)
delete_dataset(new_ref_id, year=only_year)
years = [only_year] if only_year is not None else get_flows_dataset_years(ref_id)
for year in tqdm(years, "Creating full dataset"):
supply_chain = get_supply_chain(ref_id, year=year)
run_parallel(
process, supply_chain, new_ref_id, chunks_by="DEPARTMENT OF PRODUCTION"
)
# calculate_percentage_traded_under_zdc(
# ref_id=new_ref_id, indicators_ref_id=INDICATORS_REF_ID
# )
if __name__ == "__main__":
main(2015)