Analyze results

View or edit on GitHub

This page is synchronized from trase/models/brazil/soy_supply_sheds/Analyze_results.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt


def plot_port_shed(port_trase_id, buffer=1, product=None):

    df_with_trains = pd.read_csv("df_final.csv")
    df_no_trains = pd.read_csv("df_final_no_trains.csv")
    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df_mun_of_interest = df_mun[df_mun["trase_id"] == port_trase_id]
    mun_name = df_mun_of_interest["name"].unique()[0]

    df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    if product is not None:
        df_with_trains = df_with_trains[df_with_trains["product"] == product]
        df_no_trains = df_no_trains[df_no_trains["product"] == product]

    df_port_with_trains = df_with_trains[
        df_with_trains["municipality_port"] == port_trase_id
    ]
    df_port_no_trains = df_no_trains[df_no_trains["municipality_port"] == port_trase_id]

    df_shed_with_trains = pd.merge(
        df_port_with_trains.groupby("municipality_production")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_with_trains["geometry"] = df_shed_with_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_with_trains = gpd.GeoDataFrame(df_shed_with_trains).set_geometry("geometry")

    df_aggregators_with_trains = pd.merge(
        df_port_with_trains.groupby("municipality_aggregator")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_with_trains["geometry"] = df_aggregators_with_trains[
        "geometry"
    ].apply(lambda x: shapely.from_wkt(x))
    df_aggregators_with_trains = gpd.GeoDataFrame(
        df_aggregators_with_trains
    ).set_geometry("geometry")

    df_hubs_with_trains = pd.merge(
        df_port_with_trains.groupby("municipality_hub")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_hub",
        right_on="trase_id",
    )
    df_hubs_with_trains["geometry"] = df_hubs_with_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_hubs_with_trains = gpd.GeoDataFrame(df_hubs_with_trains).set_geometry("geometry")

    df_shed_no_trains = pd.merge(
        df_port_no_trains.groupby("municipality_production")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_no_trains["geometry"] = df_shed_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_no_trains = gpd.GeoDataFrame(df_shed_no_trains).set_geometry("geometry")

    df_aggregators_no_trains = pd.merge(
        df_port_no_trains.groupby("municipality_aggregator")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_no_trains["geometry"] = df_aggregators_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_aggregators_no_trains = gpd.GeoDataFrame(df_aggregators_no_trains).set_geometry(
        "geometry"
    )

    df_hubs_no_trains = pd.merge(
        df_port_no_trains.groupby("municipality_hub")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_hub",
        right_on="trase_id",
    )
    df_hubs_no_trains["geometry"] = df_hubs_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_hubs_no_trains = gpd.GeoDataFrame(df_hubs_no_trains).set_geometry("geometry")

    x_mun_min, y_mun_min, x_mun_max, y_mun_max = df_mun_of_interest.total_bounds

    x_min, y_min, x_max, y_max = df_shed_with_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    print(x_min, x_max, y_min, y_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax1.set_xlim(x_min, x_max)
    ax1.set_ylim(y_min, y_max)

    x_min, y_min, x_max, y_max = df_shed_no_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    print(x_min, x_max, y_min, y_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax2.set_xlim(x_min, x_max)
    ax2.set_ylim(y_min, y_max)

    df_shed_with_trains.plot(ax=ax1, column="vol", color="Green")
    df_aggregators_with_trains.plot(ax=ax1, column="vol", color="Purple")
    df_hubs_with_trains.plot(ax=ax1, column="vol", color="Blue")

    df_shed_no_trains.plot(ax=ax2, column="vol", color="Green")
    df_aggregators_no_trains.plot(ax=ax2, column="vol", color="Purple")
    df_hubs_no_trains.plot(ax=ax2, column="vol", color="Blue")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    title = mun_name + " with trains"
    ax1.set_title(title, fontsize=30)

    title = mun_name + " without trains"
    ax2.set_title(title, fontsize=30)

    fig.savefig(
        ((product.replace(" ", "_") + "_") if product is not None else "")
        + f"PORT_{port_trase_id}_{mun_name.replace(' ', '_')}.png"
    )

    return


def plot_hub_shed(hub_trase_id, buffer=1, product=None):

    df_with_trains = pd.read_csv("df_final.csv")
    df_no_trains = pd.read_csv("df_final_no_trains.csv")
    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df_mun_of_interest = df_mun[df_mun["trase_id"] == hub_trase_id]
    mun_name = df_mun_of_interest["name"].unique()[0]

    df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    if product is not None:
        df_with_trains = df_with_trains[df_with_trains["product"] == product]
        df_no_trains = df_no_trains[df_no_trains["product"] == product]

    df_hub_with_trains = df_with_trains[
        df_with_trains["municipality_hub"] == hub_trase_id
    ]
    df_hub_no_trains = df_no_trains[df_no_trains["municipality_hub"] == hub_trase_id]

    df_shed_with_trains = pd.merge(
        df_hub_with_trains.groupby("municipality_production")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_with_trains["geometry"] = df_shed_with_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_with_trains = gpd.GeoDataFrame(df_shed_with_trains).set_geometry("geometry")

    df_aggregators_with_trains = pd.merge(
        df_hub_with_trains.groupby("municipality_aggregator")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_with_trains["geometry"] = df_aggregators_with_trains[
        "geometry"
    ].apply(lambda x: shapely.from_wkt(x))
    df_aggregators_with_trains = gpd.GeoDataFrame(
        df_aggregators_with_trains
    ).set_geometry("geometry")

    df_shed_no_trains = pd.merge(
        df_hub_no_trains.groupby("municipality_production")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_no_trains["geometry"] = df_shed_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_no_trains = gpd.GeoDataFrame(df_shed_no_trains).set_geometry("geometry")

    df_aggregators_no_trains = pd.merge(
        df_hub_no_trains.groupby("municipality_aggregator")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_no_trains["geometry"] = df_aggregators_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_aggregators_no_trains = gpd.GeoDataFrame(df_aggregators_no_trains).set_geometry(
        "geometry"
    )

    x_mun_min, y_mun_min, x_mun_max, y_mun_max = df_mun_of_interest.total_bounds

    x_min, y_min, x_max, y_max = df_shed_with_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax1.set_xlim(x_min, x_max)
    ax1.set_ylim(y_min, y_max)

    x_min, y_min, x_max, y_max = df_shed_no_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax2.set_xlim(x_min, x_max)
    ax2.set_ylim(y_min, y_max)

    df_shed_with_trains.plot(ax=ax1, column="vol", color="Green")
    df_aggregators_with_trains.plot(ax=ax1, column="vol", color="Purple")

    df_shed_no_trains.plot(ax=ax2, column="vol", color="Green")
    df_aggregators_no_trains.plot(ax=ax2, column="vol", color="Purple")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    title = mun_name + " with trains"
    ax1.set_title(title, fontsize=30)

    title = mun_name + " without trains"
    ax2.set_title(title, fontsize=30)

    fig.savefig(
        ((product.replace(" ", "_") + "_") if product is not None else "")
        + f"HUB_{hub_trase_id}_{mun_name.replace(' ', '_')}.png"
    )

    return


df_with_trains = pd.read_csv("df_final.csv")
df_no_trains = pd.read_csv("df_final_no_trains.csv")

# all_ports = df_with_trains["municipality_port"].unique()
# for port_trase_id in all_ports:
#     plot_port_shed(port_trase_id)

all_hubs = df_with_trains["municipality_hub"].unique()
for hub_trase_id in all_hubs:
    plot_hub_shed(hub_trase_id, product="SOYBEAN CAKE")

Find busiest train station of origin

Train stations from which the most soy gets shipped are

Rondonopolis (Mato Grosso)

Uberaba (Minas Gerais)

Araguari (Minas Gerais)

Rio Verde (Goias)

Porto Nacional (Tocantins)

df_trains = pd.read_csv("2023/prepared/train_flows.csv", sep=";")

df_trains = df_trains[df_trains.pop("commodity") == "SOJA"]
df_busiest = (
    df_trains.groupby("origin_trase_id")["vol"]
    .sum()
    .reset_index()
    .sort_values("vol", ascending=False)
    .head(5)
)
busiest_stations_origin = df_busiest["origin_trase_id"].tolist()
busiest_stations_origin
pd.merge(
    df_busiest,
    df_mun[["trase_id", "name"]],
    left_on="origin_trase_id",
    right_on="trase_id",
)

	origin_trase_id	vol	trase_id	name
0	BR-5107602	6962792.0	BR-5107602	RONDONOPOLIS
1	BR-3170107	4936111.0	BR-3170107	UBERABA
2	BR-3103504	2882767.0	BR-3103504	ARAGUARI
3	BR-5218805	2717120.0	BR-5218805	RIO VERDE
4	BR-1718204	2675925.0	BR-1718204	PORTO NACIONA

Find suppy sheds of busiest stations

df_shed = plot_origins(busiest_stations_origin[0])

/tmp/ipykernel_4134/4044652755.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(



---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
   3804 try:
-> 3805     return self._engine.get_loc(casted_key)
   3806 except KeyError as err:


File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()


File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()


File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()


File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'municipality_demand'


The above exception was the direct cause of the following exception:


KeyError                                  Traceback (most recent call last)

Cell In[5], line 1
----> 1 df_shed = plot_origins(busiest_stations_origin[0])


Cell In[3], line 15, in plot_origins(trase_id, buffer)
     14 def plot_origins(trase_id, buffer=5):
---> 15     return plot_shed(
     16         trase_id=trase_id,
     17         var_1="municipality_demand",
     18         var_2="municipality_supply",
     19         buffer=buffer,
     20         title_prefix="SUPPLY SHED - ",
     21     )


Cell In[3], line 50, in plot_shed(trase_id, var_1, var_2, buffer, title_prefix)
     47 df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")
     48 df_mun_of_interest["geometry"].boundary.plot(ax=ax, color="Red")
---> 50 df_shed = df_solution[df_solution[var_1] == trase_id]
     51 df_shed = pd.merge(df_shed, df_mun, left_on=var_2, right_on="trase_id")
     52 df_shed["geometry"] = df_shed["geometry"].apply(lambda x: shapely.from_wkt(x))


File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
   4100 if self.columns.nlevels > 1:
   4101     return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
   4103 if is_integer(indexer):
   4104     indexer = [indexer]


File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
   3807     if isinstance(casted_key, slice) or (
   3808         isinstance(casted_key, abc.Iterable)
   3809         and any(isinstance(x, slice) for x in casted_key)
   3810     ):
   3811         raise InvalidIndexError(key)
-> 3812     raise KeyError(key) from err
   3813 except TypeError:
   3814     # If we have a listlike key, _check_indexing_error will raise
   3815     #  InvalidIndexError. Otherwise we fall through and re-raise
   3816     #  the TypeError.
   3817     self._check_indexing_error(key)


KeyError: 'municipality_demand'

png

df_shed = plot_origins(busiest_stations_origin[1], buffer=1)

df_shed.sort_values("vol", ascending=False).head(3)

Top municipality supplying Uberaba is Vitoria, which is one of the destinations of trains from Uberaba!

df_trains[df_trains["origin_trase_id"] == "BR-3170107"]

All municipalities supplied by Vitoria

df_shed = plot_destinations("BR-3205309")

df_shed = plot_origins(busiest_stations_origin[2])

df_shed = plot_origins(busiest_stations_origin[3])

df_shed = plot_origins(busiest_stations_origin[4])

df_shed

Crushing

import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt


def plot_crushing_demand():
    df_crushing = pd.read_csv("2023/prepared/crushing_demand.csv", sep=";")
    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, ax = plt.subplots()
    fig.set_figheight(35)
    fig.set_figwidth(35)

    df_brazil.boundary.plot(ax=ax, color="Black")

    df_crushing = pd.merge(
        df_crushing, df_mun, left_on="municipality", right_on="trase_id"
    )
    df_crushing["geometry"] = df_crushing["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_crushing = gpd.GeoDataFrame(df_crushing).set_geometry("geometry")

    # df_crushing.plot(ax=ax, column="vol", cmap="Greens")
    df_crushing.plot(ax=ax, column="vol", color="Red")

    return df_crushing


plot_crushing_demand()

plot_origins("BR-4315602")

plot_destinations("BR-4315602")

import pandas as pd

df_supply = pd.read_csv("supply.csv").rename(columns={"vol": "vol_supply"})
df_demand = pd.read_csv("demand.csv").rename(columns={"vol": "vol_demand"})
df_lp = pd.read_csv("lp_solution.csv")

df_lp_supply = (
    df_lp.groupby("municipality_supply")["vol"]
    .sum()
    .reset_index()
    .rename(columns={"vol": "vol_supply_lp"})
)
df_lp_demand = (
    df_lp.groupby("trase_id")["vol"]
    .sum()
    .reset_index()
    .rename(columns={"vol": "vol_demand_lp"})
)

df = pd.merge(df_supply, df_lp_supply, on="municipality_supply")

df["diff"] = df["vol_supply"] - df["vol_supply_lp"]
df.sort_values("diff")

df = pd.merge(df_demand, df_lp_demand, on="trase_id")

df["diff"] = df["vol_demand"] - df["vol_demand_lp"]
df.sort_values("diff")

import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt

df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")


def plot_lp_1():
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df = pd.read_csv("lp_1_supply.csv")
    df = pd.merge(df, df_mun, left_on="municipality_production", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax1, column="vol", cmap="Greens")

    df_solution = pd.read_csv("lp_1_solution.csv")
    df = df_solution.groupby("municipality_aggregator")["vol"].sum().reset_index()

    df = pd.merge(df, df_mun, left_on="municipality_aggregator", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax2, column="vol", cmap="Purples")


def plot_lp_2():
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df = pd.read_csv("lp_2_supply.csv")
    df = pd.merge(df, df_mun, left_on="municipality_supply", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax1, column="vol", cmap="Purples")

    df_solution = pd.read_csv("lp_2_solution.csv")
    df = df_solution.groupby("municipality_demand")["vol"].sum().reset_index()

    df = pd.merge(df, df_mun, left_on="municipality_demand", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax2, column="vol", cmap="Reds")

plot_lp_1()

plot_lp_2()

import pandas as pd

df_lp_1_solution = pd.read_csv("lp_1_solution.csv")
df_lp_1_solution.groupby("municipality_aggregator")["vol"].sum().reset_index()

import pandas as pd
from trase.tools.sps import solve_transportation_problem, stitch_dataframes


def main():
    # --- Train flows between municipalities
    df_train_flows = pd.read_csv("2023/prepared/train_flows.csv", sep=";")

    # --- Arrival at destination station treated as a supply
    df_train_soybean_flows = df_train_flows[
        df_train_flows.pop("commodity") == "SOJA"
    ].rename(
        columns={
            "origin_trase_id": "station_origin",
            "destination_trase_id": "station_destination",
        }
    )

    df_lp_1_solution = pd.read_csv("lp_1_solution.csv")
    df_lp_2_solution = pd.read_csv("lp_2_solution.csv")

    # --- Connect aggregators to train flows
    df_aggregators_trains = stitch_dataframes(
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "municipality_aggregator",
                "municipality_demand": "station_origin",
            }
        ),
        df_train_soybean_flows,
        volume_column="vol",
        indicator=True,
    )

    df_aggregators_to_trains = df_aggregators_trains[
        df_aggregators_trains["_matched"] == "both"
    ].drop(columns="_matched")
    df_aggregators_to_hubs = (
        df_aggregators_trains[df_aggregators_trains["_matched"] == "left_only"]
        .drop(columns=["_matched", "station_destination"])
        .rename(columns={"station_origin": "municipality_hub"})
    )

    # --- Connect aggregator-train flows to hubs
    df_aggregators_to_trains_to_hubs = stitch_dataframes(
        df_aggregators_to_trains,
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "station_destination",
                "municipality_demand": "municipality_hub",
            }
        ),
        volume_column="vol",
        indicator=True,
    )
    df_aggregators_to_trains_to_hubs = df_aggregators_to_trains_to_hubs[
        df_aggregators_to_trains_to_hubs.pop("_matched") == "both"
    ]

    # --- Combine
    df = pd.concat([df_aggregators_to_trains_to_hubs, df_aggregators_to_hubs])

    return df

    # --- Find origins (aggregators) of soybeans arriving at train stations of destination
    df_aggregators_to_trains = stitch_dataframes(
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "municipality_aggregator",
                "municipality_demand": "station_origin",
            }
        ),
        df_train_soybean_flows,
        volume_column="vol",
        indicator=True,
    )

    df_aggregators_to_trains.to_csv("aggregators_to_trains.csv")

    df_aggregators_to_trains = df_aggregators_to_trains[
        df_aggregators_to_trains.pop("_matched") == "both"
    ]

    print(df_aggregators_to_trains)
    print(df_aggregators_to_trains["vol"].sum())

    # --- Find origin (train stations of destination) of soybeans arriving at hubs from trains
    df_aggregators_to_trains_to_hubs = stitch_dataframes(
        df_aggregators_to_trains,
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "station_destination",
                "municipality_demand": "municipality_hub",
            }
        ),
        volume_column="vol",
        indicator=True,
    )
    df_aggregators_to_trains_to_hubs = df_aggregators_to_trains_to_hubs[
        df_aggregators_to_trains_to_hubs.pop("_matched") == "both"
    ]

    print(df_aggregators_to_trains_to_hubs)
    print(df_aggregators_to_trains_to_hubs["vol"].sum())

    return

    # --- Find origin of soybeans arriving at silos directly
    df_farms_to_hubs = stitch_dataframes(
        df_lp_1_solution.rename(
            columns={"municipality_aggregator": "municipality_supply"}
        ),
        df_lp_2_solution,
        volume_column="vol",
        indicator=True,
    )
    df_farms_to_hubs = df_farms_to_hubs[df_farms_to_hubs.pop("_matched") == "both"]

    df_solution = pd.concat(
        [
            df_trains_to_hubs[["municipality_supply", "municipality_demand", "vol"]],
            df_farms_to_hubs[["municipality_supply", "municipality_demand", "vol"]],
        ]
    )

    return df_solution


main()

import pandas as pd

df_lp_2 = pd.read_csv("lp_2_solution.csv")
df_lp_2_demand = pd.read_csv("lp_2_demand.csv")
df_train_flows = pd.read_csv("2023/prepared/train_flows.csv", sep=";")
df_train_soybean_flows = df_train_flows[
    df_train_flows.pop("commodity") == "SOJA"
].rename(
    columns={
        "origin_trase_id": "station_origin",
        "destination_trase_id": "station_destination",
    }
)
df_lp_2[df_lp_2["municipality_demand"] == "BR-1715705"]
# df_lp_2_demand[df_lp_2_demand["municipality_demand"] == "BR-1715705"]

import pandas as pd
from trase.tools.sps import solve_transportation_problem, stitch_dataframes


import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt


def plot_issues():

    df_lp_1_supply = pd.read_csv("lp_1_supply.csv")
    df_lp_1_demand = pd.read_csv("lp_1_demand.csv")
    df_lp_1_solution = pd.read_csv("df_lp_1_solution.csv")

    df_lp_2_supply = pd.read_csv("lp_2_supply.csv")
    df_lp_2_demand = pd.read_csv("lp_2_demand.csv")
    df_lp_2_solution = pd.read_csv("df_lp_2_solution.csv")

    print(df_lp_1_supply["vol"].sum())
    print(df_lp_1_demand["vol"].sum())
    print(df_lp_1_solution["vol"].sum())

    print(df_lp_2_supply["vol"].sum())
    print(df_lp_2_demand["vol"].sum())
    print(df_lp_2_solution["vol"].sum())

    df = stitch_dataframes(
        df_lp_2_supply, df_lp_2_solution, volume_column="vol", indicator=True
    )
    df = df.rename(columns={"_matched": "_matched_supply"})
    df = stitch_dataframes(df, df_lp_2_demand, volume_column="vol", indicator=True)
    df = df.rename(columns={"_matched": "_matched_demand"})

    df = df[
        ((df["_matched_supply"] != "both") | (df["_matched_demand"] != "both"))
        & (df["vol"] > 0.01)
    ].sort_values("vol")

    display(df)

    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    # TODO: unallocated supply

    df = df[df["_matched_supply"] != "both"]

    df = pd.merge(df, df_mun, left_on="municipality_supply", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    # df.plot(ax=ax1, column="vol", cmap="Greens")
    df.plot(ax=ax1, column="vol", color="Green")

    # TODO: unmet demand

    df = df[df["_matched_demand"] != "both"]

    df = pd.merge(df, df_mun, left_on="municipality_demand", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax2, column="vol", cmap="Purples")


plot_issues()

import pandas as pd

df = pd.read_csv("df_final.csv")
df

/tmp/ipykernel_4206/98622176.py:3: DtypeWarning: Columns (6,7) have mixed types. Specify dtype option on import or set low_memory=False.
  df = pd.read_csv("df_final.csv")

	Unnamed: 0	municipality_production	municipality_aggregator	municipality_station_origin	municipality_station_destination	municipality_hub	municipality_station_origin_cake	municipality_station_destination_cake	municipality_port	product	vol
0	0	BR-1100031	BR-1100031	NaN	NaN	BR-1301902	NaN	NaN	BR-1302603	SOYBEAN CAKE	13943.666346
1	1	BR-1100049	BR-1100049	NaN	NaN	BR-1301902	NaN	NaN	BR-1302603	SOYBEAN CAKE	6996.841343
2	2	BR-1100809	BR-1100049	NaN	NaN	BR-1301902	NaN	NaN	BR-1302603	SOYBEAN CAKE	11212.801305
3	3	BR-1100908	BR-1100049	NaN	NaN	BR-1301902	NaN	NaN	BR-1302603	SOYBEAN CAKE	8309.281742
4	4	BR-1100056	BR-1100056	NaN	NaN	BR-1301902	NaN	NaN	BR-1302603	SOYBEAN CAKE	20915.499312
...	...	...	...	...	...	...	...	...	...	...	...
88568	112782	BR-5300108	BR-5222005	NaN	NaN	BR-5201108	NaN	NaN	UNKNOWN	SOYBEANS	0.000632
88569	112834	BR-5222054	BR-5222054	BR-5218805	BR-3518701	BR-3554102	NaN	NaN	UNKNOWN	SOYBEANS	0.000008
88570	112846	BR-5222054	BR-5222054	NaN	NaN	BR-5218805	NaN	NaN	UNKNOWN	SOYBEANS	0.000188
88571	112852	BR-5222302	BR-5222302	NaN	NaN	BR-5201108	NaN	NaN	UNKNOWN	SOYBEANS	0.000092
88572	112854	BR-5300108	BR-5300108	NaN	NaN	BR-5300108	NaN	NaN	UNKNOWN	SOYBEANS	0.001657

88573 rows × 11 columns