Main 2022 test

View or edit on GitHub

This page is synchronized from trase/models/bolivia/soy/main_2022_test.ipynb. Last modified on 2025-12-14 23:19 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

Documentation

The previous LP hardcoded the NIT of processor and exporter Prolega to that of exporter Cargill because of a contract existing between the two companies. The contract states that Cargill has the "option" of buying 50% of Prolega's soy oil and cake. In this iteration of the model, we allow exporter Prolega to source its soy from its own processing facility first, then Cargill can source the rest.

LP

Going 5-stage LP FTW; go big or go home

Bean --> Silo --> Port/Crusher --> Port(oil/cake/residue)

Other Notes

...

Todo:

stitch LP results together
NOTE: have had to apply ceil function to make some LPs work, thus discrepancy between input and output quantities; is this a problem for stitching LP results together?
DT/rule implementation (Need help from Harry/Nanxu/Clement)
- Want to implement self-sourcing rules pre-LP
- Special rules where traders have aggreements/arrangements to source from each other (once information complete and validated)
Cost matrices returning some funky results; consult with Vivi about OSM API/methods
- Need to also consult around e.g. railways etc. (Maria has research to do on this front)

import sys
from datetime import datetime

import numpy as np
import pandas as pd

# import math
from math import ceil
from tqdm import tqdm

from trase.tools.sps import *
from trase.tools.aws import upload_pandas_df_to_s3
from trase.tools.matching.fair_allocation import proportional_sourcing, direct_sourcing
from trase.tools.sps import split_flows_using_proportions

from definition import years as model_years, version
from constants import HS6_TO_PRODUCT

# from trase.tools.sps import TraseGlpkCommand

pd.set_option("display.float_format", "{:,.2f}".format)

HS6_TO_PRODUCT

A note for Maria/Simon: eventually, we want to move all the model code to a separate file called "model.py", and then we can run our models for multiple years very easily and simply by changing the values in range(year_start, year_end+1) in the cell below.

But for now, since it's easier to have the code available in cells while editing, we can only run one year at a time. So to run results for multiple years, you change the values in range(2019, 2020) below to match whatever year you want, then run the whole notebook (please use hotkeys or the menu bar to do this, or i'll be turning over in my grave lol), then change then value in range() and run everything again.

# Run supply chain model
for year in range(2021, 2021 + 1):
    # for year in range(2018, 2021 + 1):
    print("-" * 40)
    print("Running {} {} {}...".format("Bolivia", "Soy", year))
    sc = SupplyChain("bolivia/soy", year=year, bucket="trase-storage")
    # Set up Dataset
    sc.preparation()
    sc.load()

Production

# Municipality production
mun_production = sc.get("production").set_index("geocode.geocode")["value"]

# Get total annual production
total_production = sum(sc.get("production")["value"])

# print("production (sample)")
# print(mun_production.sample(3))
# print("total production")
# print(total_production)

Flows

# Call in flows
flows_df = sc.get("flows")
# flows_df.sample(3)

# Add in consolidated port id
# TODO put in prepatation.py
# flows_df["trase_port_consolidated.id"] = flows_df["trase_port.id"]
# flows_df["trase_port_consolidated.id"][flows_df["trase_port_consolidated.id"].isin(["BO-PORT-721", "BO-PORT-722", "BO-PORT-751", "BO-PORT-752"])] = "BO-PORT-XXX"

# Add product column to the flows based on HS6_TO_PRODUCT in the constants.py file
# TODO put in preparation.py
# flows_df["product"] = flows_df.hs6.apply(lambda hs6: HS6_TO_PRODUCT[hs6])

# Add volume column in tonnes
# TODO put in preparation.py
# flows_df["vol_tn"] = flows_df["vol_kg"] / 1000

# sc.update(flows_df, ["trase_port.id","trase_port_consolidated.id","product","vol_tn"])

# print("flows_df (sample)")
flows_df.sample(3)

# define export demand by product
# TODO could use sps.consolidate here
demand_export_beans = flows_df.loc[flows_df["product"] == "bean"]
demand_export_beans = demand_export_beans.set_index(["trase_port.id"])["vol_tn"]
demand_export_beans = demand_export_beans.groupby(level=["trase_port.id"]).sum()

demand_export_oil = flows_df.loc[flows_df["product"] == "oil"]
demand_export_oil = demand_export_oil.set_index(["trase_port.id"])["vol_tn"]
demand_export_oil = demand_export_oil.groupby(level=["trase_port.id"]).sum()

demand_export_cake = flows_df.loc[flows_df["product"] == "cake"]
demand_export_cake = demand_export_cake.set_index(["trase_port.id"])["vol_tn"]
demand_export_cake = demand_export_cake.groupby(level=["trase_port.id"]).sum()

demand_export_residue = flows_df.loc[flows_df["product"] == "residue"]
demand_export_residue = demand_export_residue.set_index(["trase_port.id"])["vol_tn"]
demand_export_residue = demand_export_residue.groupby(level=["trase_port.id"]).sum()

# print("demand_beans")
# print(demand_export_beans)
# print("demand_oil")
# print(demand_export_oil)
# print("demand_cake")
# print(demand_export_cake)
# print("demand_residue")
# print(demand_export_residue)

Storage

# Assigning allocation of beans to silos

# - read in silo list
silo_capacities = sc.get("assets")[["unique_id", "type"]]
silo_capacities = silo_capacities[silo_capacities["type"] == "STORAGE"]
# - divide total_production by number of silos and assign to each
silo_capacities["throughput"] = total_production / len(silo_capacities)

# - create indexed series
silo_throughput = silo_capacities.set_index("unique_id")["throughput"]

while sum(silo_throughput) > total_production:
    silo_throughput = silo_throughput * (1 - 10**-16)

# print("silo throughput (sample)")
# silo_throughput.sample(3)
silo_throughput

Crushing

# Getting daily capacities for appropriate year
# Create data frame with unique id and capacities

crusher_capacities = sc.get("assets")[
    ["unique_id", "exporter_id", "type", "DAILY_CAPACITY_TN_{}".format(year)]
]
crusher_capacities = crusher_capacities[crusher_capacities["type"] == "PROCESSING"]
crusher_capacities = crusher_capacities.rename(
    columns={"DAILY_CAPACITY_TN_{}".format(year): "daily_capacity"}, errors="raise"
)
crusher_capacities = crusher_capacities.drop(labels="type", axis="columns")

# Sidestep the preprocessor reading in blank values as strings by changing col type
crusher_capacities = crusher_capacities.astype({"daily_capacity": "float"})

# Calculate annual relative crusher throughput (raw soy demand) and adds as new col
crusher_capacities["proportions"] = crusher_capacities["daily_capacity"] / sum(
    crusher_capacities["daily_capacity"]
)

# # get list of exporters who own crushers
# exporters_with_crushers = crusher_capacities.exporter_id
# exporters_with_crushers = exporters_with_crushers.loc[exporters_with_crushers != "NA"]

# print("crusher capacities and proportions (sample)")
crusher_capacities

# Calculate crusher inputs

# Get total annual soybean exports
total_bean_exports = flows_df[flows_df["product"] == "bean"]["vol_tn"].sum()
# Get total annual soybeans to be sent to crushers
total_to_be_crushed = total_production - total_bean_exports
# Calculate crusher throughput and add as new col
crusher_capacities["demand"] = crusher_capacities["proportions"] * total_to_be_crushed

# print("total_soy_production")
# print(total_production)
# print("total_bean_exports")
# print(total_bean_exports)
# print("total_to_be_crushed")
# print(total_to_be_crushed)

# print("crusher demand (sample)")
# crusher_capacities.sample(3)

# Calculate crusher outputs
# TEMP CODE; calculate crusher outputs
crusher_capacities["oil_output"] = crusher_capacities["demand"] * 0.19
crusher_capacities["cake_output"] = crusher_capacities["demand"] * 0.78
crusher_capacities["residue_output"] = crusher_capacities["demand"] * 0.03

# Defining "output" which is actually crusher demand
crusher_demand = crusher_capacities.set_index("unique_id")["demand"]

crusher_output_oil = crusher_capacities.set_index("unique_id")["oil_output"]
crusher_output_cake = crusher_capacities.set_index("unique_id")["cake_output"]
crusher_output_residue = crusher_capacities.set_index("unique_id")["residue_output"]

# print("crusher demand (sample)")
# print(crusher_demand.sample(3))
# print("crusher oil output (sample)")
# print(crusher_output_oil.sample(3))
# print("crusher cake output (sample)")
# print(crusher_output_cake.sample(3))
# print("crusher residue output (sample)")
# print(crusher_output_residue.sample(3))

# print("crusher demand and outputs (sample)")
crusher_capacities.sample(3)

Population data

population = sc.get("populations")
population = population[population["year"] == format(year)]
population = population.set_index("geocode")["value"]
population_prop = population / sum(population)

# print(population)

Decision tree (assign initial branches)

# --------------------------------------------------------------
# Branch 1:
#  - Link crusher output to export flows based on trader ownership
# --------------------------------------------------------------
flows_df = sc.get("flows")

flows_temp = pd.merge(
    flows_df, crusher_capacities.exporter_id, on="exporter_id", validate="many_to_one"
)
flows_temp = flows_temp[flows_temp["product"] != "bean"]
flows_temp["branch"] = "1"

sc.update(flows_temp, ["branch"])

# --------------------------------------------------------------
# Branch 2:
#  - Link crusher output to export flows based on trader agreements
# --------------------------------------------------------------
flows_df = sc.get("flows")

flows_temp = flows_df[flows_df["exporter_id"] == "1015571020"]
flows_temp = flows_temp[flows_temp["product"] != "bean"]
flows_temp["branch"] = "2"

sc.update(flows_temp, ["branch"])

# --------------------------------------------------------------
# Branch 3:
#  - Link silo/crusher output to export flows based on LP
# --------------------------------------------------------------
flows_df = sc.get("flows")

flows_temp = flows_df[flows_df["branch"] == ""]
flows_temp["branch"] = "3"

sc.update(flows_temp, ["branch"])

Decision tree (actioning based on branches)

# BRANCH 1

# OIL
flows_df = sc.get("flows")
flows_temp = flows_df[flows_df["branch"] == "1"]  # flows assigned branch 1
flows_temp_oil = flows_temp[
    flows_temp["product"] == "oil"
]  # flows assigned branch 1 of oil

crusher_oil_info = flows_temp_oil.groupby("exporter_id")[
    "vol_tn"
].sum()  # group exports of oil by exporter id
crusher_oil_info = pd.merge(
    crusher_oil_info, crusher_capacities, how="outer", on="exporter_id"
)  # merge aggregated exports with crusher output data
crusher_oil_info = (
    crusher_oil_info[["unique_id", "oil_output", "exporter_id", "vol_tn"]]
    .fillna(0)
    .sort_values("unique_id")
)  # reduce to relevant columns
print(crusher_oil_info)

sources = pd.Series(
    crusher_oil_info["oil_output"].values, index=crusher_oil_info["unique_id"]
)  # supply from crushers
sinks = pd.Series(
    crusher_oil_info["vol_tn"].values, index=crusher_oil_info["exporter_id"]
)  # export demand
matches = pd.MultiIndex.from_frame(
    crusher_oil_info[["unique_id", "exporter_id"]]
)  # allowed connections between crushers and traders
# print(sources)
# print(sinks)
# print(matches)

allocation, _ = direct_sourcing(
    matches, sources, sinks
)  # resolved allocation of crushers to traders

allocation_crusher_oil = allocation.groupby(level=0).sum()
crusher_oil_remaining = sources - allocation_crusher_oil

allocation_trader = allocation.groupby(level=1).sum()
unsolved = sinks - allocation_trader

total = sinks
solved_proportion = allocation_trader / total
solved_proportion = solved_proportion.fillna(0)
unsolved_proportion = unsolved / total
unsolved_proportion = unsolved_proportion.fillna(0)

# combine
df_proportions = concat(
    [
        solved_proportion.to_frame(name="proportion").assign(
            status="SOLVED_TO_CRUSHER"
        ),
        unsolved_proportion.to_frame(name="proportion").assign(status="TO_LP"),
    ],
    ignore_index=False,
)
df_proportions = df_proportions[df_proportions["proportion"] > 0]
df_proportions.index = df_proportions.index.set_names(["exporter_id"])
df_proportions = df_proportions.reset_index()
df_proportions["product"] = "oil"

# split original flows
split_flows_using_proportions(
    sc,
    df_proportions,
    values=["vol_tn", "vol_kg", "fob"],
    updating=["status"],
    on=["exporter_id", "product"],
    by="proportion",
)


# CAKE
flows_df = sc.get("flows")
flows_temp = flows_df[flows_df["branch"] == "1"]  # flows assigned branch 1
flows_temp_cake = flows_temp[
    flows_temp["product"] == "cake"
]  # flows assigned branch 1 of cake

crusher_cake_info = flows_temp_cake.groupby("exporter_id")[
    "vol_tn"
].sum()  # group exports of cake by exporter id
crusher_cake_info = pd.merge(
    crusher_cake_info, crusher_capacities, how="outer", on="exporter_id"
)  # merge aggregated exports with crusher output data
crusher_cake_info = (
    crusher_cake_info[["unique_id", "cake_output", "exporter_id", "vol_tn"]]
    .fillna(0)
    .sort_values("unique_id")
)  # reduce to relevant columns


sources = pd.Series(
    crusher_cake_info["cake_output"].values, index=crusher_cake_info["unique_id"]
)  # supply from crushers
sinks = pd.Series(
    crusher_cake_info["vol_tn"].values, index=crusher_cake_info["exporter_id"]
)  # export demand
matches = pd.MultiIndex.from_frame(
    crusher_cake_info[["unique_id", "exporter_id"]]
)  # allowed connections between crushers and traders


allocation, _ = direct_sourcing(
    matches, sources, sinks
)  # resolved allocation of crushers to traders

allocation_crusher_cake = allocation.groupby(level=0).sum()
crusher_cake_remaining = sources - allocation_crusher_cake

allocation_trader = allocation.groupby(level=1).sum()
unsolved = sinks - allocation_trader

total = sinks
solved_proportion = allocation_trader / total
solved_proportion = solved_proportion.fillna(0)
unsolved_proportion = unsolved / total
unsolved_proportion = unsolved_proportion.fillna(0)

# combine
df_proportions = concat(
    [
        solved_proportion.to_frame(name="proportion").assign(
            status="SOLVED_TO_CRUSHER"
        ),
        unsolved_proportion.to_frame(name="proportion").assign(status="TO_LP"),
    ],
    ignore_index=False,
)
df_proportions = df_proportions[df_proportions["proportion"] > 0]
df_proportions.index = df_proportions.index.set_names(["exporter_id"])
df_proportions = df_proportions.reset_index()
df_proportions["product"] = "cake"

# split original flows
split_flows_using_proportions(
    sc,
    df_proportions,
    values=["vol_tn", "vol_kg", "fob"],
    updating=["status"],
    on=["exporter_id", "product"],
    by="proportion",
)


# RESIDUE
flows_df = sc.get("flows")
flows_temp = flows_df[flows_df["branch"] == "1"]  # flows assigned branch 1
flows_temp_residue = flows_temp[
    flows_temp["product"] == "residue"
]  # flows assigned branch 1 of residue

crusher_residue_info = flows_temp_residue.groupby("exporter_id")[
    "vol_tn"
].sum()  # group exports of residue by exporter id
crusher_residue_info = pd.merge(
    crusher_residue_info, crusher_capacities, how="outer", on="exporter_id"
)  # merge aggregated exports with crusher output data
crusher_residue_info = (
    crusher_residue_info[["unique_id", "residue_output", "exporter_id", "vol_tn"]]
    .fillna(0)
    .sort_values("unique_id")
)  # reduce to relevant columns


sources = pd.Series(
    crusher_residue_info["residue_output"].values,
    index=crusher_residue_info["unique_id"],
)  # supply from crushers
sinks = pd.Series(
    crusher_residue_info["vol_tn"].values, index=crusher_residue_info["exporter_id"]
)  # export demand
matches = pd.MultiIndex.from_frame(
    crusher_residue_info[["unique_id", "exporter_id"]]
)  # allowed connections between crushers and traders


allocation, _ = direct_sourcing(
    matches, sources, sinks
)  # resolved allocation of crushers to traders

allocation_crusher_residue = allocation.groupby(level=0).sum()
crusher_residue_remaining = sources - allocation_crusher_residue

allocation_trader = allocation.groupby(level=1).sum()
unsolved = sinks - allocation_trader

total = sinks
solved_proportion = allocation_trader / total
solved_proportion = solved_proportion.fillna(0)
unsolved_proportion = unsolved / total
unsolved_proportion = unsolved_proportion.fillna(0)

# combine
df_proportions = concat(
    [
        solved_proportion.to_frame(name="proportion").assign(
            status="SOLVED_TO_CRUSHER"
        ),
        unsolved_proportion.to_frame(name="proportion").assign(status="TO_LP"),
    ],
    ignore_index=False,
)
df_proportions = df_proportions[df_proportions["proportion"] > 0]
df_proportions.index = df_proportions.index.set_names(["exporter_id"])
df_proportions = df_proportions.reset_index()
df_proportions["product"] = "residue"

# split original flows
split_flows_using_proportions(
    sc,
    df_proportions,
    values=["vol_tn", "vol_kg", "fob"],
    updating=["status"],
    on=["exporter_id", "product"],
    by="proportion",
)

year

# THIS CELL IS WIP CURRENTLY

# BRANCH 2a
# CARGILL BOLIVIA S.A. 1015571020 source 120,000 from Gravetal 1028385027; BO-CRUSHER-10
# CONSTRAIN TO SUPPLYING ONLY EXPORTS FROM PORT SUAREZ/JENNIFER; BO-PORT-721, BO-PORT-722, BO-PORT-751, BO-PORT-752

# Cargill oil and cake demand
flows_df = sc.get("flows")
flows_temp = flows_df[flows_df["branch"] == "2"]  # flows assigned branch 2
# flows_temp = flows_temp[flows_temp["trase_port.id"].isin(["BO-PORT-721", "BO-PORT-722", "BO-PORT-751", "BO-PORT-752"])] # flows from allowed ports
flows_temp = flows_temp[
    flows_temp["trase_port_consolidated.id"] == "BO-PORT-XXX"
]  # flows from allowed ports
print(flows_temp)

flows_temp_oil = flows_temp[
    flows_temp["product"] == "oil"
]  # flows assigned branch 2 of oil
crusher_oil_info = flows_temp_oil.groupby("exporter_id")[
    "vol_tn"
].sum()  # group exports of oil by exporter id
print(crusher_oil_info)

flows_temp_cake = flows_temp[
    flows_temp["product"] == "cake"
]  # flows assigned branch 2 of cake
crusher_cake_info = flows_temp_cake.groupby("exporter_id")[
    "vol_tn"
].sum()  # group exports of oil by exporter id
print(crusher_cake_info)

oil_ratio = crusher_oil_info / (crusher_oil_info + crusher_cake_info)
cake_ratio = crusher_cake_info / (crusher_oil_info + crusher_cake_info)

if (
    crusher_oil_info["1015571020"] < crusher_oil_remaining["BO-CRUSHER-10"]
    and crusher_cake_info["1015571020"] < crusher_cake_remaining["BO-CRUSHER-10"]
):
    oil_demand = crusher_oil_info
    cake_demand = crusher_cake_info
else:
    oil_ratio = crusher_oil_info / (crusher_oil_info + crusher_cake_info)
    cake_ratio = crusher_cake_info / (crusher_oil_info + crusher_cake_info)

    oil_scaled_demand = 120000 * oil_ratio
    cake_scaled_demand = 120000 * cake_ratio

    if (
        oil_scaled_demand["1015571020"] < crusher_oil_remaining["BO-CRUSHER-10"]
        and cake_scaled_demand["1015571020"] < crusher_cake_remaining["BO-CRUSHER-10"]
    ):
        print("Enough supply!")
        oil_demand = oil_scaled_demand
        cake_demand = cake_scaled_demand
    else:
        print("Not enough supply; check!")

crusher_oil_remaining["BO-CRUSHER-10"] = (
    crusher_oil_remaining["BO-CRUSHER-10"] - oil_demand
)
crusher_cake_remaining["BO-CRUSHER-10"] = (
    crusher_cake_remaining["BO-CRUSHER-10"] - cake_demand
)

# OIL
total = crusher_oil_info
solved_proportion = oil_demand / total
unsolved_proportion = 1 - solved_proportion

print(total)
print(oil_demand)
print(solved_proportion)
print(unsolved_proportion)

# combine
df_proportions = concat(
    [
        solved_proportion.to_frame(name="proportion").assign(
            status="SOLVED_TO_CRUSHER"
        ),
        unsolved_proportion.to_frame(name="proportion").assign(status="TO_LP"),
    ],
    ignore_index=False,
)
df_proportions = df_proportions[df_proportions["proportion"] > 0]
df_proportions.index = df_proportions.index.set_names(["exporter_id"])
df_proportions = df_proportions.reset_index()
df_proportions["product"] = "oil"
# ****** NEED TO ADD IN PORT ID ******
df_proportions["trase_port_consolidated.id"] = "BO-PORT-XXX"

# split original flows
split_flows_using_proportions(
    sc,
    df_proportions,
    values=["vol_tn", "vol_kg", "fob"],
    updating=["status"],
    on=[
        "exporter_id",
        "product",
        "trase_port_consolidated.id",
    ],  # ****** NEED TO ADD IN PORT ID ******
    by="proportion",
)

# CAKE
total = crusher_cake_info
solved_proportion = cake_demand / total
unsolved_proportion = 1 - solved_proportion

print(total)
print(cake_demand)
print(solved_proportion)
print(unsolved_proportion)

# combine
df_proportions = concat(
    [
        solved_proportion.to_frame(name="proportion").assign(
            status="SOLVED_TO_CRUSHER"
        ),
        unsolved_proportion.to_frame(name="proportion").assign(status="UNSOLVED"),
    ],
    ignore_index=False,
)
df_proportions = df_proportions[df_proportions["proportion"] > 0]
df_proportions.index = df_proportions.index.set_names(["exporter_id"])
df_proportions = df_proportions.reset_index()
df_proportions["product"] = "cake"
# ****** NEED TO ADD IN PORT ID ******
df_proportions["trase_port_consolidated.id"] = "BO-PORT-XXX"

# split original flows
split_flows_using_proportions(
    sc,
    df_proportions,
    values=["vol_tn", "vol_kg", "fob"],
    updating=["status"],
    on=[
        "exporter_id",
        "product",
        "trase_port_consolidated.id",
    ],  # ****** NEED TO ADD IN PORT ID ******
    by="proportion",
)

flows_df = sc.get("flows")
flows_temp = flows_df[flows_df["branch"] == "2"]  # flows assigned branch 2
flows_temp = flows_temp[
    ~flows_temp["trase_port.id"].isin(
        ["BO-PORT-721", "BO-PORT-722", "BO-PORT-751", "BO-PORT-752"]
    )
]  # flows from allowed ports
flows_temp

# BRANCH 2b
# CARGILL BOLIVIA S.A. 1015571020 source from Prolega 181110026; BO-CRUSHER-12

# OIL
flows_df = sc.get("flows")
# flows_temp = flows_df[flows_df["branch"] == "2"] # flows assigned branch 2
flows_temp = flows_df[
    (flows_df["branch"] == "2") & (flows_df["status"] == "UNSOLVED")
]  # flows assigned branch 2
flows_temp_oil = flows_temp[
    flows_temp["product"] == "oil"
]  # flows assigned branch 1 of oil

crusher_oil_info = flows_temp_oil.groupby("exporter_id")[
    "vol_tn"
].sum()  # group exports of oil by exporter id

sources = crusher_oil_remaining  # supply from crushers
sinks = crusher_oil_info
matches = [("BO-CRUSHER-12", "1015571020")]

allocation, _ = direct_sourcing(
    matches, sources, sinks
)  # resolved allocation of crushers to traders
allocation_crusher_oil = allocation.groupby(level=0).sum()
crusher_oil_remaining = sources - allocation_crusher_oil

allocation_trader = allocation.groupby(level=1).sum()
unsolved = sinks - allocation_trader

total = sinks
solved_proportion = allocation_trader / total
solved_proportion = solved_proportion.fillna(0)
unsolved_proportion = unsolved / total
unsolved_proportion = unsolved_proportion.fillna(0)

# combine
df_proportions = concat(
    [
        solved_proportion.to_frame(name="proportion").assign(
            status="SOLVED_TO_CRUSHER"
        ),
        unsolved_proportion.to_frame(name="proportion").assign(status="TO_LP"),
    ],
    ignore_index=False,
)
df_proportions = df_proportions[df_proportions["proportion"] > 0]
df_proportions.index = df_proportions.index.set_names(["exporter_id"])
df_proportions = df_proportions.reset_index()
df_proportions["product"] = "oil"

# split original flows
split_flows_using_proportions(
    sc,
    df_proportions,
    values=["vol_tn", "vol_kg", "fob"],
    updating=["status"],
    on=["exporter_id", "product"],
    by="proportion",
)

# CAKE
flows_df = sc.get("flows")
# ****** NEED TO JUST BE UNSOLVED FLOWS BELOW! ******
# flows_temp = flows_df[flows_df["branch"] == "2"] # flows assigned branch 2
flows_temp = flows_df[
    (flows_df["branch"] == "2") & (flows_df["status"] == "UNSOLVED")
]  # flows assigned branch 2
flows_temp_cake = flows_temp[
    flows_temp["product"] == "oil"
]  # flows assigned branch 1 of oil

crusher_cake_info = flows_temp_cake.groupby("exporter_id")[
    "vol_tn"
].sum()  # group exports of oil by exporter id

sources = crusher_oil_remaining  # supply from crushers
sinks = crusher_oil_info
matches = [("BO-CRUSHER-12", "1015571020")]

allocation, _ = direct_sourcing(
    matches, sources, sinks
)  # resolved allocation of crushers to traders
allocation_crusher_oil = allocation.groupby(level=0).sum()
crusher_oil_remaining = sources - allocation_crusher_oil

allocation_trader = allocation.groupby(level=1).sum()
unsolved = sinks - allocation_trader

total = sinks
solved_proportion = allocation_trader / total
solved_proportion = solved_proportion.fillna(0)
unsolved_proportion = unsolved / total
unsolved_proportion = unsolved_proportion.fillna(0)

# combine
df_proportions = concat(
    [
        solved_proportion.to_frame(name="proportion").assign(
            status="SOLVED_TO_CRUSHER"
        ),
        unsolved_proportion.to_frame(name="proportion").assign(status="TO_LP"),
    ],
    ignore_index=False,
)
df_proportions = df_proportions[df_proportions["proportion"] > 0]
df_proportions.index = df_proportions.index.set_names(["exporter_id"])
df_proportions = df_proportions.reset_index()
df_proportions["product"] = "oil"

# split original flows
split_flows_using_proportions(
    sc,
    df_proportions,
    values=["vol_tn", "vol_kg", "fob"],
    updating=["status"],
    on=["exporter_id", "product"],
    by="proportion",
)

flows_df = sc.get("flows")
flows_df

# DT LOGIC
# Branch 1: if trader owns crusher, source as much as you can/need from own crusher
# e.g. ADM-SAO S.A. 1028167028 owns BO-CRUSHER-06
# if demand < than output, excess output goes to next branch/LP
# if demand > than output, deplete and excess demand goes to next branch/LP
# Branch 2: if trader has contract, source as much as you can/need from contract crusher
# e.g. CARGILL BOLIVIA S.A. 1015571020 has contract with Prolega 1015571020 who own BO-CRUSHER-12
# need to check numbers to determine rules, e.g. can force sourcing from contracted crushers, but potential for e.g. just sourcing from one
# Branch 3: to LP

# HARD CONSTRAINTS
# crushers located at ports only supply export demand from that port (LP2)
# if any Menonite production, might be specific relationships with certain crushers (LP1)

# SOFT CONSTRAINTS (discounts in LP)
# exporters sourcing from silos
# crushers sourcing from silos

LP inputs (NEEDS ADJUSTING AFTER RESOLVING DT ABOVE!)

# mun-to-silo
supply_mun_silo_bean = mun_production
demand_mun_silo_bean = silo_throughput

costs_mun_silo_bean = sc.get("distance_matrix_mun_silo")
costs_mun_silo_bean = costs_mun_silo_bean.set_index(
    ["origin", "destination.unique_id"]
)["total_cost"]

# print("supply (sample)")
# print(supply_mun_silo_bean.sample(3))
# print("demand (sample)")
# print(demand_mun_silo_bean.sample(3))
# print("costs (sample)")
# print(costs_mun_silo_bean.sample(3))

# silo-to-crusher/port

supply_silo_proc_port_bean = silo_throughput
demand_silo_proc_port_bean = crusher_demand
costs_silo_proc_port_bean = sc.get("distance_matrix_silo_proc")

if not demand_export_beans.empty:
    demand_silo_proc_port_bean = demand_silo_proc_port_bean.append(demand_export_beans)
    costs_silo_port = sc.get("distance_matrix_silo_port")
    costs_silo_proc_port_bean = costs_silo_proc_port_bean.append(costs_silo_port)

costs_silo_proc_port_bean = costs_silo_proc_port_bean.set_index(
    ["origin.unique_id", "destination.unique_id"]
)["total_cost"]

# print("supply (sample)")
# print(supply_silo_proc_port_bean.sample(3))
# print("demand (sample)")
# print(demand_silo_proc_port_bean.sample(3))
# print("costs (sample)")
# print(costs_silo_proc_port_bean.sample(3))

print(sum(supply_silo_proc_port_bean))
print(sum(demand_silo_proc_port_bean))
print(sum(supply_silo_proc_port_bean) - sum(demand_silo_proc_port_bean))

# crusher-to-mun data prep
costs_proc_mun = sc.get("distance_matrix_proc_mun")
costs_proc_mun = costs_proc_mun.set_index(["origin.unique_id", "destination.geocode"])[
    "total_cost"
]

# crusher-to-port/mun oil
supply_proc_oil = crusher_output_oil

# total remaining oil supply from crushers after export demand which we allocate to domestic demand
demand_domestic_oil_total = sum(supply_proc_oil) - sum(demand_export_oil)
# split total domestic demand across jurisdictions by population density
demand_domestic_oil = population_prop * demand_domestic_oil_total

demand_proc_oil = demand_export_oil
costs_proc_port_oil = sc.get("distance_matrix_proc_port")
costs_proc_port_oil = costs_proc_port_oil.set_index(
    ["origin.unique_id", "destination.unique_id"]
)["total_cost"]
if not demand_domestic_oil_total == 0:
    demand_proc_oil = demand_proc_oil.append(demand_domestic_oil)
    costs_proc_oil = costs_proc_port_oil.append(costs_proc_mun)
else:
    costs_proc_oil = costs_proc_port_oil

# print("supply (sample)")
# print(supply_proc_oil.sample(3))
# print("demand (sample)")
# print(demand_proc_oil.sample(3))
# print("costs (sample)")
# print(costs_proc_port_oil.sample(3))

# crusher-to-port/mun cake
supply_proc_cake = crusher_output_cake

# total remaining cake supply from crushers after export demand which we allocate to domestic demand
demand_domestic_cake_total = sum(supply_proc_cake) - sum(demand_export_cake)
# split total domestic demand across jurisdictions by population density
demand_domestic_cake = population_prop * demand_domestic_cake_total

demand_proc_cake = demand_export_cake
costs_proc_port_cake = sc.get("distance_matrix_proc_port")
costs_proc_port_cake = costs_proc_port_cake.set_index(
    ["origin.unique_id", "destination.unique_id"]
)["total_cost"]
if not demand_domestic_cake_total == 0:
    demand_proc_cake = demand_proc_cake.append(demand_domestic_cake)
    costs_proc_cake = costs_proc_port_cake.append(costs_proc_mun)
else:
    costs_proc_cake = costs_proc_port_cake

# print("supply (sample)")
# print(supply_proc_cake.sample(3))
# print("demand (sample)")
# print(demand_proc_cake.sample(3))
# print("costs (sample)")
# print(costs_proc_port_cake.sample(3))

print(sum(supply_proc_cake))
print(sum(demand_proc_cake))
print(sum(supply_proc_cake) - sum(demand_proc_cake))

# crusher-to-port/mun residue
supply_proc_residue = crusher_output_residue

# total remaining res supply from crushers after export demand which we allocate to domestic demand
demand_domestic_residue_total = sum(supply_proc_residue) - sum(demand_export_residue)
# split total domestic demand across jurisdictions by population density
demand_domestic_residue = population_prop * demand_domestic_residue_total

demand_proc_residue = demand_export_residue
costs_proc_port_residue = sc.get("distance_matrix_proc_port")
costs_proc_port_residue = costs_proc_port_residue.set_index(
    ["origin.unique_id", "destination.unique_id"]
)["total_cost"]
if not demand_domestic_residue_total == 0:
    demand_proc_residue = demand_proc_residue.append(demand_domestic_residue)
    costs_proc_residue = costs_proc_port_residue.append(costs_proc_mun)
else:
    costs_proc_residue = costs_proc_port_residue

# print("supply (sample)")
# print(supply_proc_res.sample(3))
# print("demand (sample)")
# print(demand_proc_res.sample(3))
# print("costs (sample)")
# print(costs_proc_port_res.sample(3))

LPs

# LP mun to silo

solve_transportation_problem(
    supply=supply_mun_silo_bean,
    demand=demand_mun_silo_bean,
    costs=costs_mun_silo_bean,
    on_missing="warn",
)

# LP silo to crusher/port

solve_transportation_problem(
    supply=supply_silo_proc_port_bean.apply(ceil),
    demand=demand_silo_proc_port_bean,
    costs=costs_silo_proc_port_bean,
    on_missing="warn",
)

# LP crusher to port/mun oil

solve_transportation_problem(
    supply=supply_proc_oil,
    demand=demand_proc_oil,
    costs=costs_proc_oil,
    on_missing="warn",
)

# LP crusher to port/mun cake

solve_transportation_problem(
    supply=supply_proc_cake.apply(ceil),
    demand=demand_proc_cake,
    costs=costs_proc_cake,
    on_missing="warn",
)

# LP crusher to port/mun residue

solve_transportation_problem(
    supply=supply_proc_residue,
    demand=demand_proc_residue,
    costs=costs_proc_residue,
    on_missing="warn",
)

Everything below here old code to check/re-write

# format results
results = results[results > 0]
results = results.reset_index()
results = results.rename(columns={"sink": "processing_facility_id", "quantity": "tons"})

results = results.reset_index(drop=True).set_index("processing_facility_id")
results = results.join(demand).rename(
    columns={"weight_tn": "total_processing_facility_tn"}
)
results["proportion"] = results["tons"] / results["total_processing_facility_tn"]

flows_with_facility["weight_tn"] = flows_with_facility["weight_kg"] / 1000

In the next two cells, we're linking our LP results (COMMUNITY or MUNICIPALITY --> CRUSHER)

# for each row in demand, get unique id
for facility_id, facility_tons in demand.iteritems():
    supply_shed = results[results.index == facility_id]

expanded_flows = []

for ix, flow_row in tqdm(flows_with_facility.iterrows()):
    facility_id = flow_row["unique_id"]
    supply_shed = results[results.index == facility_id]

    for supply_ix, supply_row in supply_shed.iterrows():

        expanded_flows.append(
            {
                "nit": flow_row["nit"],
                "exporter_trase_id": flow_row["exporter_trase_id"],
                "hs6": flow_row["hs6"],
                "importer": flow_row["importer"],
                "port_of_export_code": flow_row["port_of_export_code"],
                "exporter": flow_row["exporter"],
                "country": flow_row["country"],
                "unique_id": flow_row["unique_id"],
                "tons": flow_row["weight_tn"] * supply_row["proportion"],
                model_resolution: supply_row["source"],
            }
        )

out = pd.DataFrame(expanded_flows)

Checking some sum totals here, as a sanity check:

out.tons.sum()

Add domestic flows below

(Maria/Simon: Please double check this logic / code. It seems to work at both resolution levels (just assigning leftover production), but I didn't have time to QA at all. - Andrew 7/30)

exports_grouped_by_municipality = (
    out.groupby("municipality").agg(sum).rename(columns={"tons": "export_tn"})
)

municipality_lvl_domestic_volume = (
    exports_grouped_by_municipality.join(
        sc.get("production").set_index("geocode")["value"],
        how="outer",
    )
    .rename(columns={"value": "production_tn"})
    .fillna(0)
)

# mostly only municipality 071005 has some domestic and some exported volume as of 7/30. Is this a problem? IDK! But worth noting.
municipality_lvl_domestic_volume["domestic_tn"] = (
    municipality_lvl_domestic_volume["production_tn"]
    - municipality_lvl_domestic_volume["export_tn"]
)

municipalities_with_domestic_demand = municipality_lvl_domestic_volume[
    municipality_lvl_domestic_volume["domestic_tn"] > 0.0001
]

domestic_demand_rows = []
for municipality, row in municipalities_with_domestic_demand.iterrows():
    dd_row = {
        "nit": "DOMESTIC",
        "exporter_trase_id": "DOMESTIC",
        "hs6": "120100",  # hard to know what product to assign to domestic flows, so I just did it as soybean. We could change this to oil or cake if we add crusher outputs/limitations to the model.
        "importer": "DOMESTIC",
        "port_of_export_code": "DOMESTIC",
        "exporter": "DOMESTIC",
        "country": "DOMESTIC",
        "unique_id": "DOMESTIC",
        "tons": row["domestic_tn"],
        "municipality": municipality,
    }
    domestic_demand_rows.append(dd_row)

out = out.append(domestic_demand_rows)

link hs6 and product

out["product"] = out.hs6.apply(lambda hs6: HS6_TO_PRODUCT[hs6])

Add municipality name to municipality level results (based on geocode)

if model_resolution == "municipality":
    municipality_name_and_geocode = sc.get("municipalities")[
        ["municipality", "geocode"]
    ].rename(columns={"geocode": "municipality_code"})
    out = out.rename(columns={"municipality": "municipality_code"}).merge(
        municipality_name_and_geocode, how="left", on="municipality_code"
    )

# group by columns used in output to have smaller datasets
all_columns_except_tons = list(out.drop(columns="tons").columns)
out = out.groupby(all_columns_except_tons).agg(sum)["tons"].reset_index()

convert asset ids to trase id format

out["asset_trase_id"] = out["unique_id"]
out = out.drop(columns="unique_id")

Write results to S3

write_results = False

if write_results:
    upload_pandas_df_to_s3(
        out,
        f"bolivia/soy/sei_pcs/results/{model_resolution}/SEIPCS_BO_SOY_{sc.context.year}_{datetime.now().strftime('%m%d%Y_%H%M')}.csv",
        bucket_name="trase-storage",
    )

Setting up a sankey just to check results

from trase.tools.jupyter import observable

for_observable = out.merge(
    sc.get("municipalities")[["geocode", "municipality"]],
    how="left",
    left_on="municipality",
    right_on="geocode",
).rename(
    columns={"municipality_x": "municipality"}
)  # .drop('municipality_x', axis=1)

for_observable

observable.sankey(
    for_observable,
    value_column="tons",
    categorical_columns=["municipality", "exporter", "country"],
)

# observable.notebook(
#     "@trase/sankey",
#     ["chart"],
#     {
#         "data": results_map.to_dict(orient="records"),
#         "tooltipKey": "Results (TONS)"}
# )

# WE WILL GET RID OF THE FOLLOWING CODE FROM PREVIOUS MODEL, BUT KEEPING FOR FUTURE REFERENCE FOR NOW

## show flows without processing ownership; these are currently later excluded which we don't want
# flows_df[flows_df['has_processing_facility'] == False]
#
## get flows with ownership
# flows_with_facility = flows_df[flows_df['has_processing_facility'] == True]
# flows_with_facility.head
#
## set index to be the nit code; removes nit column from frame
# flows_with_facility = flows_with_facility.set_index("nit")
# flows_with_facility.head
#
##list of assets; index is nit_tax_id, column is unique_id
# assets_to_merge = sc.get("assets").set_index("nit_tax_id")["unique_id"]
# assets_to_merge
#
## adds unique_id column from assets
# flows_with_facility = flows_with_facility.join(assets_to_merge)
# flows_with_facility.head
#
## reset index, and nit column returned to frame, but with column name "index"
# flows_with_facility = flows_with_facility.reset_index()
# flows_with_facility.head
#
## rename nit column from "index" to "nit"
# flows_with_facility = flows_with_facility.rename(columns={'index':'nit'})
# flows_with_facility.head