Runbook

View or edit on GitHub

This page is synchronized from trase/models/argentina/soy/Runbook.ipynb. Last modified on 2025-12-13 00:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

from tqdm import tqdm
from trase.tools.pcs import *

# from trase.runbook.traders.ingest_zdcs import calculate_percentage_traded_under_zdc

EMBEDDING_PARAMETERS_LIST = [
    EmbedNodeIndFlowInd(
        node_role="EXPORTER",
        node_ind_name="FOREST_500_SOY",
        node_ind_reference="TRADERS INDICATORS V3 FULL",
    ),
    EmbedNodeQualFlowQual(
        node_role="DEPARTMENT OF PRODUCTION",
        node_qual_name="BIOME",
        node_qual_reference="ARGENTINA INDICATORS V1 FULL",
    ),
    EmbedNodeTraseIdFlowQual(node_role="DEPARTMENT OF PRODUCTION"),
    EmbedNodeQuantFlowQuant(
        node_role="DEPARTMENT OF PRODUCTION",
        node_quant_name="SOY_AREA",
        production_node_quant_name="SOY_TN",
        node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        flow_quant_name="LAND_USE",
    ),
    EmbedNodeQuantFlowQuant(
        node_role="DEPARTMENT OF PRODUCTION",
        node_quant_name="SOY_DEFORESTATION_5_YEAR_TOTAL",
        flow_quant_name="SOY_DEFORESTATION_5_YEAR_TOTAL_EXPOSURE",
        node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        production_node_quant_name="SOY_TN",
    ),
    EmbedNodeQuantFlowQuant(
        node_role="DEPARTMENT OF PRODUCTION",
        node_quant_name="SOY_DEFORESTATION_5_YEAR_ANNUAL",
        flow_quant_name="SOY_DEFORESTATION_5_YEAR_ANNUAL_EXPOSURE",
        node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        production_node_quant_name="SOY_TN",
    ),
    EmbedNodeQuantFlowQuant(
        node_role="DEPARTMENT OF PRODUCTION",
        node_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_TOTAL",
        node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        flow_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_TOTAL_EXPOSURE",
        production_node_quant_name="SOY_TN",
        production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
    ),
    EmbedNodeQuantFlowQuant(
        node_role="DEPARTMENT OF PRODUCTION",
        node_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_ANNUAL",
        node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
        flow_quant_name="CO2_EMISSIONS_SOY_DEFORESTATION_5_YEAR_ANNUAL_EXPOSURE",
        production_node_quant_name="SOY_TN",
        production_node_quant_reference="ARGENTINA SOY INDICATORS V3 FULL",
    ),
]
ZDC_NETWORK_REFERENCE_TITLE = "ARGENTINA SOY ZERO DEFORESTATION COMMITMENTS V3"
INDICATORS_REF_ID = get_node_attributes_reference_id("ARGENTINA SOY INDICATORS V3 FULL")


@parallel_enabled
def process(supply_chain, new_ref_id):
    supply_chain = add_biome(supply_chain)
    supply_chain = add_economic_bloc(supply_chain)
    supply_chain, traders_hierarchy = fix_supply_chain_traders(supply_chain)
    traders_hierarchy.to_db(new_ref_id)

    supply_chain = embed(
        supply_chain=supply_chain, embedding_parameters_list=EMBEDDING_PARAMETERS_LIST
    )
    supply_chain = embed_trader_zdcs(supply_chain, ZDC_NETWORK_REFERENCE_TITLE)

    cur = supply_chain.cur

    supply_chain = remove_supply_chain_labels(supply_chain)

    # anonymize department for flows:
    # - smaller than 28 tons
    # - with <3 exporters (AR privacy law)
    department_traders = {}
    for flow in supply_chain.flows:
        department_id = flow.path[2].id
        if department_id not in department_traders:
            department_traders[department_id] = []
        trader_group_id = flow.path[6].id
        if trader_group_id not in department_traders[department_id]:
            department_traders[department_id].append(trader_group_id)

    departments_to_anonymize = [
        department_id
        for department_id, trader_group_ids in department_traders.items()
        if len(trader_group_ids) < 3
    ]

    supply_chain_anonymized = SupplyChain(
        [
            flow
            for flow in supply_chain.flows
            if flow.raw_vol < 28 or flow.path[2].id in departments_to_anonymize
        ],
        cur=cur,
    )
    supply_chain = SupplyChain(
        [
            flow
            for flow in supply_chain.flows
            if flow.raw_vol >= 28 and flow.path[2].id not in departments_to_anonymize
        ],
        cur=cur,
    )

    supply_chain_anonymized.remove_quals(["BIOME", "TRASE_GEOCODE"])

    unknown_department = get_node(
        find_node_by_trase_id("AR-XXXXX", cur=cur), "DEPARTMENT OF PRODUCTION", cur=cur
    )
    unknown_biome = get_node(
        find_node_by_trase_id("AR-BIO-X", cur=cur), "BIOME", cur=cur
    )

    for flow in supply_chain_anonymized.flows:
        flow.path[1] = unknown_biome
        flow.path[2] = unknown_department

    supply_chain_anonymized = consolidate(supply_chain_anonymized)

    supply_chain.to_db(new_ref_id)
    supply_chain_anonymized.to_db(new_ref_id)


def main(only_year=None):
    ref_id = get_flows_reference_id("SEI-PCS ARGENTINA SOY V1.1.0 RAW")
    new_ref_id = insert_child_flows_reference(
        parent_ref_id=ref_id,
        version="1.1.1",
        version_type=None,
        title_suffix="TEST SINGLE SCRIPT RUNBOOK",
    )
    delete_dataset(new_ref_id, year=only_year)

    years = [only_year] if only_year is not None else get_flows_dataset_years(ref_id)
    for year in tqdm(years, "Creating full dataset"):
        supply_chain = get_supply_chain(ref_id, year=year)
        run_parallel(
            process, supply_chain, new_ref_id, chunks_by="DEPARTMENT OF PRODUCTION"
        )

    # calculate_percentage_traded_under_zdc(
    #     ref_id=new_ref_id, indicators_ref_id=INDICATORS_REF_ID
    # )


if __name__ == "__main__":
    main(2015)