Skip to content

Analyze results

View or edit on GitHub

This page is synchronized from trase/models/brazil/soy_supply_sheds/Analyze_results.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt


def plot_port_shed(port_trase_id, buffer=1, product=None):

    df_with_trains = pd.read_csv("df_final.csv")
    df_no_trains = pd.read_csv("df_final_no_trains.csv")
    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df_mun_of_interest = df_mun[df_mun["trase_id"] == port_trase_id]
    mun_name = df_mun_of_interest["name"].unique()[0]

    df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    if product is not None:
        df_with_trains = df_with_trains[df_with_trains["product"] == product]
        df_no_trains = df_no_trains[df_no_trains["product"] == product]

    df_port_with_trains = df_with_trains[
        df_with_trains["municipality_port"] == port_trase_id
    ]
    df_port_no_trains = df_no_trains[df_no_trains["municipality_port"] == port_trase_id]

    df_shed_with_trains = pd.merge(
        df_port_with_trains.groupby("municipality_production")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_with_trains["geometry"] = df_shed_with_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_with_trains = gpd.GeoDataFrame(df_shed_with_trains).set_geometry("geometry")

    df_aggregators_with_trains = pd.merge(
        df_port_with_trains.groupby("municipality_aggregator")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_with_trains["geometry"] = df_aggregators_with_trains[
        "geometry"
    ].apply(lambda x: shapely.from_wkt(x))
    df_aggregators_with_trains = gpd.GeoDataFrame(
        df_aggregators_with_trains
    ).set_geometry("geometry")

    df_hubs_with_trains = pd.merge(
        df_port_with_trains.groupby("municipality_hub")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_hub",
        right_on="trase_id",
    )
    df_hubs_with_trains["geometry"] = df_hubs_with_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_hubs_with_trains = gpd.GeoDataFrame(df_hubs_with_trains).set_geometry("geometry")

    df_shed_no_trains = pd.merge(
        df_port_no_trains.groupby("municipality_production")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_no_trains["geometry"] = df_shed_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_no_trains = gpd.GeoDataFrame(df_shed_no_trains).set_geometry("geometry")

    df_aggregators_no_trains = pd.merge(
        df_port_no_trains.groupby("municipality_aggregator")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_no_trains["geometry"] = df_aggregators_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_aggregators_no_trains = gpd.GeoDataFrame(df_aggregators_no_trains).set_geometry(
        "geometry"
    )

    df_hubs_no_trains = pd.merge(
        df_port_no_trains.groupby("municipality_hub")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_hub",
        right_on="trase_id",
    )
    df_hubs_no_trains["geometry"] = df_hubs_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_hubs_no_trains = gpd.GeoDataFrame(df_hubs_no_trains).set_geometry("geometry")

    x_mun_min, y_mun_min, x_mun_max, y_mun_max = df_mun_of_interest.total_bounds

    x_min, y_min, x_max, y_max = df_shed_with_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    print(x_min, x_max, y_min, y_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax1.set_xlim(x_min, x_max)
    ax1.set_ylim(y_min, y_max)

    x_min, y_min, x_max, y_max = df_shed_no_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    print(x_min, x_max, y_min, y_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax2.set_xlim(x_min, x_max)
    ax2.set_ylim(y_min, y_max)

    df_shed_with_trains.plot(ax=ax1, column="vol", color="Green")
    df_aggregators_with_trains.plot(ax=ax1, column="vol", color="Purple")
    df_hubs_with_trains.plot(ax=ax1, column="vol", color="Blue")

    df_shed_no_trains.plot(ax=ax2, column="vol", color="Green")
    df_aggregators_no_trains.plot(ax=ax2, column="vol", color="Purple")
    df_hubs_no_trains.plot(ax=ax2, column="vol", color="Blue")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    title = mun_name + " with trains"
    ax1.set_title(title, fontsize=30)

    title = mun_name + " without trains"
    ax2.set_title(title, fontsize=30)

    fig.savefig(
        ((product.replace(" ", "_") + "_") if product is not None else "")
        + f"PORT_{port_trase_id}_{mun_name.replace(' ', '_')}.png"
    )

    return


def plot_hub_shed(hub_trase_id, buffer=1, product=None):

    df_with_trains = pd.read_csv("df_final.csv")
    df_no_trains = pd.read_csv("df_final_no_trains.csv")
    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df_mun_of_interest = df_mun[df_mun["trase_id"] == hub_trase_id]
    mun_name = df_mun_of_interest["name"].unique()[0]

    df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    if product is not None:
        df_with_trains = df_with_trains[df_with_trains["product"] == product]
        df_no_trains = df_no_trains[df_no_trains["product"] == product]

    df_hub_with_trains = df_with_trains[
        df_with_trains["municipality_hub"] == hub_trase_id
    ]
    df_hub_no_trains = df_no_trains[df_no_trains["municipality_hub"] == hub_trase_id]

    df_shed_with_trains = pd.merge(
        df_hub_with_trains.groupby("municipality_production")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_with_trains["geometry"] = df_shed_with_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_with_trains = gpd.GeoDataFrame(df_shed_with_trains).set_geometry("geometry")

    df_aggregators_with_trains = pd.merge(
        df_hub_with_trains.groupby("municipality_aggregator")["vol"]
        .sum()
        .reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_with_trains["geometry"] = df_aggregators_with_trains[
        "geometry"
    ].apply(lambda x: shapely.from_wkt(x))
    df_aggregators_with_trains = gpd.GeoDataFrame(
        df_aggregators_with_trains
    ).set_geometry("geometry")

    df_shed_no_trains = pd.merge(
        df_hub_no_trains.groupby("municipality_production")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_production",
        right_on="trase_id",
    )
    df_shed_no_trains["geometry"] = df_shed_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_shed_no_trains = gpd.GeoDataFrame(df_shed_no_trains).set_geometry("geometry")

    df_aggregators_no_trains = pd.merge(
        df_hub_no_trains.groupby("municipality_aggregator")["vol"].sum().reset_index(),
        df_mun,
        left_on="municipality_aggregator",
        right_on="trase_id",
    )
    df_aggregators_no_trains["geometry"] = df_aggregators_no_trains["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_aggregators_no_trains = gpd.GeoDataFrame(df_aggregators_no_trains).set_geometry(
        "geometry"
    )

    x_mun_min, y_mun_min, x_mun_max, y_mun_max = df_mun_of_interest.total_bounds

    x_min, y_min, x_max, y_max = df_shed_with_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax1.set_xlim(x_min, x_max)
    ax1.set_ylim(y_min, y_max)

    x_min, y_min, x_max, y_max = df_shed_no_trains.total_bounds
    x_min = min(x_min, x_mun_min)
    x_max = max(x_max, x_mun_max)
    y_min = min(y_min, y_mun_min)
    y_max = max(y_max, y_mun_max)

    x_min -= buffer
    x_max += buffer
    y_min -= buffer
    y_max += buffer
    ax2.set_xlim(x_min, x_max)
    ax2.set_ylim(y_min, y_max)

    df_shed_with_trains.plot(ax=ax1, column="vol", color="Green")
    df_aggregators_with_trains.plot(ax=ax1, column="vol", color="Purple")

    df_shed_no_trains.plot(ax=ax2, column="vol", color="Green")
    df_aggregators_no_trains.plot(ax=ax2, column="vol", color="Purple")

    df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
    df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")

    title = mun_name + " with trains"
    ax1.set_title(title, fontsize=30)

    title = mun_name + " without trains"
    ax2.set_title(title, fontsize=30)

    fig.savefig(
        ((product.replace(" ", "_") + "_") if product is not None else "")
        + f"HUB_{hub_trase_id}_{mun_name.replace(' ', '_')}.png"
    )

    return


df_with_trains = pd.read_csv("df_final.csv")
df_no_trains = pd.read_csv("df_final_no_trains.csv")

# all_ports = df_with_trains["municipality_port"].unique()
# for port_trase_id in all_ports:
#     plot_port_shed(port_trase_id)

all_hubs = df_with_trains["municipality_hub"].unique()
for hub_trase_id in all_hubs:
    plot_hub_shed(hub_trase_id, product="SOYBEAN CAKE")

Find busiest train station of origin

Train stations from which the most soy gets shipped are

Rondonopolis (Mato Grosso)

Uberaba (Minas Gerais)

Araguari (Minas Gerais)

Rio Verde (Goias)

Porto Nacional (Tocantins)

df_trains = pd.read_csv("2023/prepared/train_flows.csv", sep=";")

df_trains = df_trains[df_trains.pop("commodity") == "SOJA"]
df_busiest = (
    df_trains.groupby("origin_trase_id")["vol"]
    .sum()
    .reset_index()
    .sort_values("vol", ascending=False)
    .head(5)
)
busiest_stations_origin = df_busiest["origin_trase_id"].tolist()
busiest_stations_origin
pd.merge(
    df_busiest,
    df_mun[["trase_id", "name"]],
    left_on="origin_trase_id",
    right_on="trase_id",
)
origin_trase_id vol trase_id name
0 BR-5107602 6962792.0 BR-5107602 RONDONOPOLIS
1 BR-3170107 4936111.0 BR-3170107 UBERABA
2 BR-3103504 2882767.0 BR-3103504 ARAGUARI
3 BR-5218805 2717120.0 BR-5218805 RIO VERDE
4 BR-1718204 2675925.0 BR-1718204 PORTO NACIONA

Find suppy sheds of busiest stations

df_shed = plot_origins(busiest_stations_origin[0])
/tmp/ipykernel_4134/4044652755.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(



---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
   3804 try:
-> 3805     return self._engine.get_loc(casted_key)
   3806 except KeyError as err:


File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()


File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()


File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()


File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'municipality_demand'


The above exception was the direct cause of the following exception:


KeyError                                  Traceback (most recent call last)

Cell In[5], line 1
----> 1 df_shed = plot_origins(busiest_stations_origin[0])


Cell In[3], line 15, in plot_origins(trase_id, buffer)
     14 def plot_origins(trase_id, buffer=5):
---> 15     return plot_shed(
     16         trase_id=trase_id,
     17         var_1="municipality_demand",
     18         var_2="municipality_supply",
     19         buffer=buffer,
     20         title_prefix="SUPPLY SHED - ",
     21     )


Cell In[3], line 50, in plot_shed(trase_id, var_1, var_2, buffer, title_prefix)
     47 df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")
     48 df_mun_of_interest["geometry"].boundary.plot(ax=ax, color="Red")
---> 50 df_shed = df_solution[df_solution[var_1] == trase_id]
     51 df_shed = pd.merge(df_shed, df_mun, left_on=var_2, right_on="trase_id")
     52 df_shed["geometry"] = df_shed["geometry"].apply(lambda x: shapely.from_wkt(x))


File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
   4100 if self.columns.nlevels > 1:
   4101     return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
   4103 if is_integer(indexer):
   4104     indexer = [indexer]


File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
   3807     if isinstance(casted_key, slice) or (
   3808         isinstance(casted_key, abc.Iterable)
   3809         and any(isinstance(x, slice) for x in casted_key)
   3810     ):
   3811         raise InvalidIndexError(key)
-> 3812     raise KeyError(key) from err
   3813 except TypeError:
   3814     # If we have a listlike key, _check_indexing_error will raise
   3815     #  InvalidIndexError. Otherwise we fall through and re-raise
   3816     #  the TypeError.
   3817     self._check_indexing_error(key)


KeyError: 'municipality_demand'

png

df_shed = plot_origins(busiest_stations_origin[1], buffer=1)
df_shed.sort_values("vol", ascending=False).head(3)

Top municipality supplying Uberaba is Vitoria, which is one of the destinations of trains from Uberaba!

df_trains[df_trains["origin_trase_id"] == "BR-3170107"]

All municipalities supplied by Vitoria

df_shed = plot_destinations("BR-3205309")
df_shed = plot_origins(busiest_stations_origin[2])
df_shed = plot_origins(busiest_stations_origin[3])
df_shed = plot_origins(busiest_stations_origin[4])
df_shed

Crushing

import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt


def plot_crushing_demand():
    df_crushing = pd.read_csv("2023/prepared/crushing_demand.csv", sep=";")
    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, ax = plt.subplots()
    fig.set_figheight(35)
    fig.set_figwidth(35)

    df_brazil.boundary.plot(ax=ax, color="Black")

    df_crushing = pd.merge(
        df_crushing, df_mun, left_on="municipality", right_on="trase_id"
    )
    df_crushing["geometry"] = df_crushing["geometry"].apply(
        lambda x: shapely.from_wkt(x)
    )
    df_crushing = gpd.GeoDataFrame(df_crushing).set_geometry("geometry")

    # df_crushing.plot(ax=ax, column="vol", cmap="Greens")
    df_crushing.plot(ax=ax, column="vol", color="Red")

    return df_crushing


plot_crushing_demand()
plot_origins("BR-4315602")
plot_destinations("BR-4315602")

import pandas as pd

df_supply = pd.read_csv("supply.csv").rename(columns={"vol": "vol_supply"})
df_demand = pd.read_csv("demand.csv").rename(columns={"vol": "vol_demand"})
df_lp = pd.read_csv("lp_solution.csv")

df_lp_supply = (
    df_lp.groupby("municipality_supply")["vol"]
    .sum()
    .reset_index()
    .rename(columns={"vol": "vol_supply_lp"})
)
df_lp_demand = (
    df_lp.groupby("trase_id")["vol"]
    .sum()
    .reset_index()
    .rename(columns={"vol": "vol_demand_lp"})
)

df = pd.merge(df_supply, df_lp_supply, on="municipality_supply")

df["diff"] = df["vol_supply"] - df["vol_supply_lp"]
df.sort_values("diff")

df = pd.merge(df_demand, df_lp_demand, on="trase_id")

df["diff"] = df["vol_demand"] - df["vol_demand_lp"]
df.sort_values("diff")
import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt

df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")


def plot_lp_1():
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df = pd.read_csv("lp_1_supply.csv")
    df = pd.merge(df, df_mun, left_on="municipality_production", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax1, column="vol", cmap="Greens")

    df_solution = pd.read_csv("lp_1_solution.csv")
    df = df_solution.groupby("municipality_aggregator")["vol"].sum().reset_index()

    df = pd.merge(df, df_mun, left_on="municipality_aggregator", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax2, column="vol", cmap="Purples")


def plot_lp_2():
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    df = pd.read_csv("lp_2_supply.csv")
    df = pd.merge(df, df_mun, left_on="municipality_supply", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax1, column="vol", cmap="Purples")

    df_solution = pd.read_csv("lp_2_solution.csv")
    df = df_solution.groupby("municipality_demand")["vol"].sum().reset_index()

    df = pd.merge(df, df_mun, left_on="municipality_demand", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax2, column="vol", cmap="Reds")
plot_lp_1()
plot_lp_2()
import pandas as pd

df_lp_1_solution = pd.read_csv("lp_1_solution.csv")
df_lp_1_solution.groupby("municipality_aggregator")["vol"].sum().reset_index()
import pandas as pd
from trase.tools.sps import solve_transportation_problem, stitch_dataframes


def main():
    # --- Train flows between municipalities
    df_train_flows = pd.read_csv("2023/prepared/train_flows.csv", sep=";")

    # --- Arrival at destination station treated as a supply
    df_train_soybean_flows = df_train_flows[
        df_train_flows.pop("commodity") == "SOJA"
    ].rename(
        columns={
            "origin_trase_id": "station_origin",
            "destination_trase_id": "station_destination",
        }
    )

    df_lp_1_solution = pd.read_csv("lp_1_solution.csv")
    df_lp_2_solution = pd.read_csv("lp_2_solution.csv")

    # --- Connect aggregators to train flows
    df_aggregators_trains = stitch_dataframes(
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "municipality_aggregator",
                "municipality_demand": "station_origin",
            }
        ),
        df_train_soybean_flows,
        volume_column="vol",
        indicator=True,
    )

    df_aggregators_to_trains = df_aggregators_trains[
        df_aggregators_trains["_matched"] == "both"
    ].drop(columns="_matched")
    df_aggregators_to_hubs = (
        df_aggregators_trains[df_aggregators_trains["_matched"] == "left_only"]
        .drop(columns=["_matched", "station_destination"])
        .rename(columns={"station_origin": "municipality_hub"})
    )

    # --- Connect aggregator-train flows to hubs
    df_aggregators_to_trains_to_hubs = stitch_dataframes(
        df_aggregators_to_trains,
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "station_destination",
                "municipality_demand": "municipality_hub",
            }
        ),
        volume_column="vol",
        indicator=True,
    )
    df_aggregators_to_trains_to_hubs = df_aggregators_to_trains_to_hubs[
        df_aggregators_to_trains_to_hubs.pop("_matched") == "both"
    ]

    # --- Combine
    df = pd.concat([df_aggregators_to_trains_to_hubs, df_aggregators_to_hubs])

    return df

    # --- Find origins (aggregators) of soybeans arriving at train stations of destination
    df_aggregators_to_trains = stitch_dataframes(
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "municipality_aggregator",
                "municipality_demand": "station_origin",
            }
        ),
        df_train_soybean_flows,
        volume_column="vol",
        indicator=True,
    )

    df_aggregators_to_trains.to_csv("aggregators_to_trains.csv")

    df_aggregators_to_trains = df_aggregators_to_trains[
        df_aggregators_to_trains.pop("_matched") == "both"
    ]

    print(df_aggregators_to_trains)
    print(df_aggregators_to_trains["vol"].sum())

    # --- Find origin (train stations of destination) of soybeans arriving at hubs from trains
    df_aggregators_to_trains_to_hubs = stitch_dataframes(
        df_aggregators_to_trains,
        df_lp_2_solution.rename(
            columns={
                "municipality_supply": "station_destination",
                "municipality_demand": "municipality_hub",
            }
        ),
        volume_column="vol",
        indicator=True,
    )
    df_aggregators_to_trains_to_hubs = df_aggregators_to_trains_to_hubs[
        df_aggregators_to_trains_to_hubs.pop("_matched") == "both"
    ]

    print(df_aggregators_to_trains_to_hubs)
    print(df_aggregators_to_trains_to_hubs["vol"].sum())

    return

    # --- Find origin of soybeans arriving at silos directly
    df_farms_to_hubs = stitch_dataframes(
        df_lp_1_solution.rename(
            columns={"municipality_aggregator": "municipality_supply"}
        ),
        df_lp_2_solution,
        volume_column="vol",
        indicator=True,
    )
    df_farms_to_hubs = df_farms_to_hubs[df_farms_to_hubs.pop("_matched") == "both"]

    df_solution = pd.concat(
        [
            df_trains_to_hubs[["municipality_supply", "municipality_demand", "vol"]],
            df_farms_to_hubs[["municipality_supply", "municipality_demand", "vol"]],
        ]
    )

    return df_solution


main()
import pandas as pd

df_lp_2 = pd.read_csv("lp_2_solution.csv")
df_lp_2_demand = pd.read_csv("lp_2_demand.csv")
df_train_flows = pd.read_csv("2023/prepared/train_flows.csv", sep=";")
df_train_soybean_flows = df_train_flows[
    df_train_flows.pop("commodity") == "SOJA"
].rename(
    columns={
        "origin_trase_id": "station_origin",
        "destination_trase_id": "station_destination",
    }
)
df_lp_2[df_lp_2["municipality_demand"] == "BR-1715705"]
# df_lp_2_demand[df_lp_2_demand["municipality_demand"] == "BR-1715705"]
import pandas as pd
from trase.tools.sps import solve_transportation_problem, stitch_dataframes


import pandas as pd
import geopandas as gpd
import shapely

from matplotlib import pyplot as plt


def plot_issues():

    df_lp_1_supply = pd.read_csv("lp_1_supply.csv")
    df_lp_1_demand = pd.read_csv("lp_1_demand.csv")
    df_lp_1_solution = pd.read_csv("df_lp_1_solution.csv")

    df_lp_2_supply = pd.read_csv("lp_2_supply.csv")
    df_lp_2_demand = pd.read_csv("lp_2_demand.csv")
    df_lp_2_solution = pd.read_csv("df_lp_2_solution.csv")

    print(df_lp_1_supply["vol"].sum())
    print(df_lp_1_demand["vol"].sum())
    print(df_lp_1_solution["vol"].sum())

    print(df_lp_2_supply["vol"].sum())
    print(df_lp_2_demand["vol"].sum())
    print(df_lp_2_solution["vol"].sum())

    df = stitch_dataframes(
        df_lp_2_supply, df_lp_2_solution, volume_column="vol", indicator=True
    )
    df = df.rename(columns={"_matched": "_matched_supply"})
    df = stitch_dataframes(df, df_lp_2_demand, volume_column="vol", indicator=True)
    df = df.rename(columns={"_matched": "_matched_demand"})

    df = df[
        ((df["_matched_supply"] != "both") | (df["_matched_demand"] != "both"))
        & (df["vol"] > 0.01)
    ].sort_values("vol")

    display(df)

    df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
    df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
    df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
    df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")

    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figheight(35)
    fig.set_figwidth(70)

    df_brazil.boundary.plot(ax=ax1, color="Black")
    df_brazil.boundary.plot(ax=ax2, color="Black")

    # TODO: unallocated supply

    df = df[df["_matched_supply"] != "both"]

    df = pd.merge(df, df_mun, left_on="municipality_supply", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    # df.plot(ax=ax1, column="vol", cmap="Greens")
    df.plot(ax=ax1, column="vol", color="Green")

    # TODO: unmet demand

    df = df[df["_matched_demand"] != "both"]

    df = pd.merge(df, df_mun, left_on="municipality_demand", right_on="trase_id")
    df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
    df = gpd.GeoDataFrame(df).set_geometry("geometry")

    df.plot(ax=ax2, column="vol", cmap="Purples")


plot_issues()
import pandas as pd

df = pd.read_csv("df_final.csv")
df
/tmp/ipykernel_4206/98622176.py:3: DtypeWarning: Columns (6,7) have mixed types. Specify dtype option on import or set low_memory=False.
  df = pd.read_csv("df_final.csv")
Unnamed: 0 municipality_production municipality_aggregator municipality_station_origin municipality_station_destination municipality_hub municipality_station_origin_cake municipality_station_destination_cake municipality_port product vol
0 0 BR-1100031 BR-1100031 NaN NaN BR-1301902 NaN NaN BR-1302603 SOYBEAN CAKE 13943.666346
1 1 BR-1100049 BR-1100049 NaN NaN BR-1301902 NaN NaN BR-1302603 SOYBEAN CAKE 6996.841343
2 2 BR-1100809 BR-1100049 NaN NaN BR-1301902 NaN NaN BR-1302603 SOYBEAN CAKE 11212.801305
3 3 BR-1100908 BR-1100049 NaN NaN BR-1301902 NaN NaN BR-1302603 SOYBEAN CAKE 8309.281742
4 4 BR-1100056 BR-1100056 NaN NaN BR-1301902 NaN NaN BR-1302603 SOYBEAN CAKE 20915.499312
... ... ... ... ... ... ... ... ... ... ... ...
88568 112782 BR-5300108 BR-5222005 NaN NaN BR-5201108 NaN NaN UNKNOWN SOYBEANS 0.000632
88569 112834 BR-5222054 BR-5222054 BR-5218805 BR-3518701 BR-3554102 NaN NaN UNKNOWN SOYBEANS 0.000008
88570 112846 BR-5222054 BR-5222054 NaN NaN BR-5218805 NaN NaN UNKNOWN SOYBEANS 0.000188
88571 112852 BR-5222302 BR-5222302 NaN NaN BR-5201108 NaN NaN UNKNOWN SOYBEANS 0.000092
88572 112854 BR-5300108 BR-5300108 NaN NaN BR-5300108 NaN NaN UNKNOWN SOYBEANS 0.001657

88573 rows × 11 columns