Analyze results
View or edit on GitHub
This page is synchronized from trase/models/brazil/soy_supply_sheds/Analyze_results.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
import pandas as pd
import geopandas as gpd
import shapely
from matplotlib import pyplot as plt
def plot_port_shed(port_trase_id, buffer=1, product=None):
df_with_trains = pd.read_csv("df_final.csv")
df_no_trains = pd.read_csv("df_final_no_trains.csv")
df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figheight(35)
fig.set_figwidth(70)
df_brazil.boundary.plot(ax=ax1, color="Black")
df_brazil.boundary.plot(ax=ax2, color="Black")
df_mun_of_interest = df_mun[df_mun["trase_id"] == port_trase_id]
mun_name = df_mun_of_interest["name"].unique()[0]
df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")
df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")
if product is not None:
df_with_trains = df_with_trains[df_with_trains["product"] == product]
df_no_trains = df_no_trains[df_no_trains["product"] == product]
df_port_with_trains = df_with_trains[
df_with_trains["municipality_port"] == port_trase_id
]
df_port_no_trains = df_no_trains[df_no_trains["municipality_port"] == port_trase_id]
df_shed_with_trains = pd.merge(
df_port_with_trains.groupby("municipality_production")["vol"]
.sum()
.reset_index(),
df_mun,
left_on="municipality_production",
right_on="trase_id",
)
df_shed_with_trains["geometry"] = df_shed_with_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_shed_with_trains = gpd.GeoDataFrame(df_shed_with_trains).set_geometry("geometry")
df_aggregators_with_trains = pd.merge(
df_port_with_trains.groupby("municipality_aggregator")["vol"]
.sum()
.reset_index(),
df_mun,
left_on="municipality_aggregator",
right_on="trase_id",
)
df_aggregators_with_trains["geometry"] = df_aggregators_with_trains[
"geometry"
].apply(lambda x: shapely.from_wkt(x))
df_aggregators_with_trains = gpd.GeoDataFrame(
df_aggregators_with_trains
).set_geometry("geometry")
df_hubs_with_trains = pd.merge(
df_port_with_trains.groupby("municipality_hub")["vol"].sum().reset_index(),
df_mun,
left_on="municipality_hub",
right_on="trase_id",
)
df_hubs_with_trains["geometry"] = df_hubs_with_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_hubs_with_trains = gpd.GeoDataFrame(df_hubs_with_trains).set_geometry("geometry")
df_shed_no_trains = pd.merge(
df_port_no_trains.groupby("municipality_production")["vol"].sum().reset_index(),
df_mun,
left_on="municipality_production",
right_on="trase_id",
)
df_shed_no_trains["geometry"] = df_shed_no_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_shed_no_trains = gpd.GeoDataFrame(df_shed_no_trains).set_geometry("geometry")
df_aggregators_no_trains = pd.merge(
df_port_no_trains.groupby("municipality_aggregator")["vol"].sum().reset_index(),
df_mun,
left_on="municipality_aggregator",
right_on="trase_id",
)
df_aggregators_no_trains["geometry"] = df_aggregators_no_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_aggregators_no_trains = gpd.GeoDataFrame(df_aggregators_no_trains).set_geometry(
"geometry"
)
df_hubs_no_trains = pd.merge(
df_port_no_trains.groupby("municipality_hub")["vol"].sum().reset_index(),
df_mun,
left_on="municipality_hub",
right_on="trase_id",
)
df_hubs_no_trains["geometry"] = df_hubs_no_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_hubs_no_trains = gpd.GeoDataFrame(df_hubs_no_trains).set_geometry("geometry")
x_mun_min, y_mun_min, x_mun_max, y_mun_max = df_mun_of_interest.total_bounds
x_min, y_min, x_max, y_max = df_shed_with_trains.total_bounds
x_min = min(x_min, x_mun_min)
x_max = max(x_max, x_mun_max)
y_min = min(y_min, y_mun_min)
y_max = max(y_max, y_mun_max)
print(x_min, x_max, y_min, y_max)
x_min -= buffer
x_max += buffer
y_min -= buffer
y_max += buffer
ax1.set_xlim(x_min, x_max)
ax1.set_ylim(y_min, y_max)
x_min, y_min, x_max, y_max = df_shed_no_trains.total_bounds
x_min = min(x_min, x_mun_min)
x_max = max(x_max, x_mun_max)
y_min = min(y_min, y_mun_min)
y_max = max(y_max, y_mun_max)
print(x_min, x_max, y_min, y_max)
x_min -= buffer
x_max += buffer
y_min -= buffer
y_max += buffer
ax2.set_xlim(x_min, x_max)
ax2.set_ylim(y_min, y_max)
df_shed_with_trains.plot(ax=ax1, column="vol", color="Green")
df_aggregators_with_trains.plot(ax=ax1, column="vol", color="Purple")
df_hubs_with_trains.plot(ax=ax1, column="vol", color="Blue")
df_shed_no_trains.plot(ax=ax2, column="vol", color="Green")
df_aggregators_no_trains.plot(ax=ax2, column="vol", color="Purple")
df_hubs_no_trains.plot(ax=ax2, column="vol", color="Blue")
df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")
title = mun_name + " with trains"
ax1.set_title(title, fontsize=30)
title = mun_name + " without trains"
ax2.set_title(title, fontsize=30)
fig.savefig(
((product.replace(" ", "_") + "_") if product is not None else "")
+ f"PORT_{port_trase_id}_{mun_name.replace(' ', '_')}.png"
)
return
def plot_hub_shed(hub_trase_id, buffer=1, product=None):
df_with_trains = pd.read_csv("df_final.csv")
df_no_trains = pd.read_csv("df_final_no_trains.csv")
df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figheight(35)
fig.set_figwidth(70)
df_brazil.boundary.plot(ax=ax1, color="Black")
df_brazil.boundary.plot(ax=ax2, color="Black")
df_mun_of_interest = df_mun[df_mun["trase_id"] == hub_trase_id]
mun_name = df_mun_of_interest["name"].unique()[0]
df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")
df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")
if product is not None:
df_with_trains = df_with_trains[df_with_trains["product"] == product]
df_no_trains = df_no_trains[df_no_trains["product"] == product]
df_hub_with_trains = df_with_trains[
df_with_trains["municipality_hub"] == hub_trase_id
]
df_hub_no_trains = df_no_trains[df_no_trains["municipality_hub"] == hub_trase_id]
df_shed_with_trains = pd.merge(
df_hub_with_trains.groupby("municipality_production")["vol"]
.sum()
.reset_index(),
df_mun,
left_on="municipality_production",
right_on="trase_id",
)
df_shed_with_trains["geometry"] = df_shed_with_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_shed_with_trains = gpd.GeoDataFrame(df_shed_with_trains).set_geometry("geometry")
df_aggregators_with_trains = pd.merge(
df_hub_with_trains.groupby("municipality_aggregator")["vol"]
.sum()
.reset_index(),
df_mun,
left_on="municipality_aggregator",
right_on="trase_id",
)
df_aggregators_with_trains["geometry"] = df_aggregators_with_trains[
"geometry"
].apply(lambda x: shapely.from_wkt(x))
df_aggregators_with_trains = gpd.GeoDataFrame(
df_aggregators_with_trains
).set_geometry("geometry")
df_shed_no_trains = pd.merge(
df_hub_no_trains.groupby("municipality_production")["vol"].sum().reset_index(),
df_mun,
left_on="municipality_production",
right_on="trase_id",
)
df_shed_no_trains["geometry"] = df_shed_no_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_shed_no_trains = gpd.GeoDataFrame(df_shed_no_trains).set_geometry("geometry")
df_aggregators_no_trains = pd.merge(
df_hub_no_trains.groupby("municipality_aggregator")["vol"].sum().reset_index(),
df_mun,
left_on="municipality_aggregator",
right_on="trase_id",
)
df_aggregators_no_trains["geometry"] = df_aggregators_no_trains["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_aggregators_no_trains = gpd.GeoDataFrame(df_aggregators_no_trains).set_geometry(
"geometry"
)
x_mun_min, y_mun_min, x_mun_max, y_mun_max = df_mun_of_interest.total_bounds
x_min, y_min, x_max, y_max = df_shed_with_trains.total_bounds
x_min = min(x_min, x_mun_min)
x_max = max(x_max, x_mun_max)
y_min = min(y_min, y_mun_min)
y_max = max(y_max, y_mun_max)
x_min -= buffer
x_max += buffer
y_min -= buffer
y_max += buffer
ax1.set_xlim(x_min, x_max)
ax1.set_ylim(y_min, y_max)
x_min, y_min, x_max, y_max = df_shed_no_trains.total_bounds
x_min = min(x_min, x_mun_min)
x_max = max(x_max, x_mun_max)
y_min = min(y_min, y_mun_min)
y_max = max(y_max, y_mun_max)
x_min -= buffer
x_max += buffer
y_min -= buffer
y_max += buffer
ax2.set_xlim(x_min, x_max)
ax2.set_ylim(y_min, y_max)
df_shed_with_trains.plot(ax=ax1, column="vol", color="Green")
df_aggregators_with_trains.plot(ax=ax1, column="vol", color="Purple")
df_shed_no_trains.plot(ax=ax2, column="vol", color="Green")
df_aggregators_no_trains.plot(ax=ax2, column="vol", color="Purple")
df_mun_of_interest["geometry"].plot(ax=ax1, color="Red")
df_mun_of_interest["geometry"].plot(ax=ax2, color="Red")
title = mun_name + " with trains"
ax1.set_title(title, fontsize=30)
title = mun_name + " without trains"
ax2.set_title(title, fontsize=30)
fig.savefig(
((product.replace(" ", "_") + "_") if product is not None else "")
+ f"HUB_{hub_trase_id}_{mun_name.replace(' ', '_')}.png"
)
return
df_with_trains = pd.read_csv("df_final.csv")
df_no_trains = pd.read_csv("df_final_no_trains.csv")
# all_ports = df_with_trains["municipality_port"].unique()
# for port_trase_id in all_ports:
# plot_port_shed(port_trase_id)
all_hubs = df_with_trains["municipality_hub"].unique()
for hub_trase_id in all_hubs:
plot_hub_shed(hub_trase_id, product="SOYBEAN CAKE")
Find busiest train station of origin
Train stations from which the most soy gets shipped are
Rondonopolis (Mato Grosso)
Uberaba (Minas Gerais)
Araguari (Minas Gerais)
Rio Verde (Goias)
Porto Nacional (Tocantins)
df_trains = pd.read_csv("2023/prepared/train_flows.csv", sep=";")
df_trains = df_trains[df_trains.pop("commodity") == "SOJA"]
df_busiest = (
df_trains.groupby("origin_trase_id")["vol"]
.sum()
.reset_index()
.sort_values("vol", ascending=False)
.head(5)
)
busiest_stations_origin = df_busiest["origin_trase_id"].tolist()
busiest_stations_origin
pd.merge(
df_busiest,
df_mun[["trase_id", "name"]],
left_on="origin_trase_id",
right_on="trase_id",
)
| origin_trase_id | vol | trase_id | name | |
|---|---|---|---|---|
| 0 | BR-5107602 | 6962792.0 | BR-5107602 | RONDONOPOLIS |
| 1 | BR-3170107 | 4936111.0 | BR-3170107 | UBERABA |
| 2 | BR-3103504 | 2882767.0 | BR-3103504 | ARAGUARI |
| 3 | BR-5218805 | 2717120.0 | BR-5218805 | RIO VERDE |
| 4 | BR-1718204 | 2675925.0 | BR-1718204 | PORTO NACIONA |
Find suppy sheds of busiest stations
df_shed = plot_origins(busiest_stations_origin[0])
/tmp/ipykernel_4134/4044652755.py:44: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_mun_of_interest["geometry"] = df_mun_of_interest["geometry"].apply(
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'municipality_demand'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[5], line 1
----> 1 df_shed = plot_origins(busiest_stations_origin[0])
Cell In[3], line 15, in plot_origins(trase_id, buffer)
14 def plot_origins(trase_id, buffer=5):
---> 15 return plot_shed(
16 trase_id=trase_id,
17 var_1="municipality_demand",
18 var_2="municipality_supply",
19 buffer=buffer,
20 title_prefix="SUPPLY SHED - ",
21 )
Cell In[3], line 50, in plot_shed(trase_id, var_1, var_2, buffer, title_prefix)
47 df_mun_of_interest = gpd.GeoDataFrame(df_mun_of_interest).set_geometry("geometry")
48 df_mun_of_interest["geometry"].boundary.plot(ax=ax, color="Red")
---> 50 df_shed = df_solution[df_solution[var_1] == trase_id]
51 df_shed = pd.merge(df_shed, df_mun, left_on=var_2, right_on="trase_id")
52 df_shed["geometry"] = df_shed["geometry"].apply(lambda x: shapely.from_wkt(x))
File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/frame.py:4102, in DataFrame.__getitem__(self, key)
4100 if self.columns.nlevels > 1:
4101 return self._getitem_multilevel(key)
-> 4102 indexer = self.columns.get_loc(key)
4103 if is_integer(indexer):
4104 indexer = [indexer]
File ~/.conda/envs/trase-env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'municipality_demand'

df_shed = plot_origins(busiest_stations_origin[1], buffer=1)
df_shed.sort_values("vol", ascending=False).head(3)
Top municipality supplying Uberaba is Vitoria, which is one of the destinations of trains from Uberaba!
df_trains[df_trains["origin_trase_id"] == "BR-3170107"]
All municipalities supplied by Vitoria
df_shed = plot_destinations("BR-3205309")
df_shed = plot_origins(busiest_stations_origin[2])
df_shed = plot_origins(busiest_stations_origin[3])
df_shed = plot_origins(busiest_stations_origin[4])
df_shed
Crushing
import pandas as pd
import geopandas as gpd
import shapely
from matplotlib import pyplot as plt
def plot_crushing_demand():
df_crushing = pd.read_csv("2023/prepared/crushing_demand.csv", sep=";")
df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")
fig, ax = plt.subplots()
fig.set_figheight(35)
fig.set_figwidth(35)
df_brazil.boundary.plot(ax=ax, color="Black")
df_crushing = pd.merge(
df_crushing, df_mun, left_on="municipality", right_on="trase_id"
)
df_crushing["geometry"] = df_crushing["geometry"].apply(
lambda x: shapely.from_wkt(x)
)
df_crushing = gpd.GeoDataFrame(df_crushing).set_geometry("geometry")
# df_crushing.plot(ax=ax, column="vol", cmap="Greens")
df_crushing.plot(ax=ax, column="vol", color="Red")
return df_crushing
plot_crushing_demand()
plot_origins("BR-4315602")
plot_destinations("BR-4315602")
import pandas as pd
df_supply = pd.read_csv("supply.csv").rename(columns={"vol": "vol_supply"})
df_demand = pd.read_csv("demand.csv").rename(columns={"vol": "vol_demand"})
df_lp = pd.read_csv("lp_solution.csv")
df_lp_supply = (
df_lp.groupby("municipality_supply")["vol"]
.sum()
.reset_index()
.rename(columns={"vol": "vol_supply_lp"})
)
df_lp_demand = (
df_lp.groupby("trase_id")["vol"]
.sum()
.reset_index()
.rename(columns={"vol": "vol_demand_lp"})
)
df = pd.merge(df_supply, df_lp_supply, on="municipality_supply")
df["diff"] = df["vol_supply"] - df["vol_supply_lp"]
df.sort_values("diff")
df = pd.merge(df_demand, df_lp_demand, on="trase_id")
df["diff"] = df["vol_demand"] - df["vol_demand_lp"]
df.sort_values("diff")
import pandas as pd
import geopandas as gpd
import shapely
from matplotlib import pyplot as plt
df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")
def plot_lp_1():
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figheight(35)
fig.set_figwidth(70)
df_brazil.boundary.plot(ax=ax1, color="Black")
df_brazil.boundary.plot(ax=ax2, color="Black")
df = pd.read_csv("lp_1_supply.csv")
df = pd.merge(df, df_mun, left_on="municipality_production", right_on="trase_id")
df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
df = gpd.GeoDataFrame(df).set_geometry("geometry")
df.plot(ax=ax1, column="vol", cmap="Greens")
df_solution = pd.read_csv("lp_1_solution.csv")
df = df_solution.groupby("municipality_aggregator")["vol"].sum().reset_index()
df = pd.merge(df, df_mun, left_on="municipality_aggregator", right_on="trase_id")
df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
df = gpd.GeoDataFrame(df).set_geometry("geometry")
df.plot(ax=ax2, column="vol", cmap="Purples")
def plot_lp_2():
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figheight(35)
fig.set_figwidth(70)
df_brazil.boundary.plot(ax=ax1, color="Black")
df_brazil.boundary.plot(ax=ax2, color="Black")
df = pd.read_csv("lp_2_supply.csv")
df = pd.merge(df, df_mun, left_on="municipality_supply", right_on="trase_id")
df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
df = gpd.GeoDataFrame(df).set_geometry("geometry")
df.plot(ax=ax1, column="vol", cmap="Purples")
df_solution = pd.read_csv("lp_2_solution.csv")
df = df_solution.groupby("municipality_demand")["vol"].sum().reset_index()
df = pd.merge(df, df_mun, left_on="municipality_demand", right_on="trase_id")
df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
df = gpd.GeoDataFrame(df).set_geometry("geometry")
df.plot(ax=ax2, column="vol", cmap="Reds")
plot_lp_1()
plot_lp_2()
import pandas as pd
df_lp_1_solution = pd.read_csv("lp_1_solution.csv")
df_lp_1_solution.groupby("municipality_aggregator")["vol"].sum().reset_index()
import pandas as pd
from trase.tools.sps import solve_transportation_problem, stitch_dataframes
def main():
# --- Train flows between municipalities
df_train_flows = pd.read_csv("2023/prepared/train_flows.csv", sep=";")
# --- Arrival at destination station treated as a supply
df_train_soybean_flows = df_train_flows[
df_train_flows.pop("commodity") == "SOJA"
].rename(
columns={
"origin_trase_id": "station_origin",
"destination_trase_id": "station_destination",
}
)
df_lp_1_solution = pd.read_csv("lp_1_solution.csv")
df_lp_2_solution = pd.read_csv("lp_2_solution.csv")
# --- Connect aggregators to train flows
df_aggregators_trains = stitch_dataframes(
df_lp_2_solution.rename(
columns={
"municipality_supply": "municipality_aggregator",
"municipality_demand": "station_origin",
}
),
df_train_soybean_flows,
volume_column="vol",
indicator=True,
)
df_aggregators_to_trains = df_aggregators_trains[
df_aggregators_trains["_matched"] == "both"
].drop(columns="_matched")
df_aggregators_to_hubs = (
df_aggregators_trains[df_aggregators_trains["_matched"] == "left_only"]
.drop(columns=["_matched", "station_destination"])
.rename(columns={"station_origin": "municipality_hub"})
)
# --- Connect aggregator-train flows to hubs
df_aggregators_to_trains_to_hubs = stitch_dataframes(
df_aggregators_to_trains,
df_lp_2_solution.rename(
columns={
"municipality_supply": "station_destination",
"municipality_demand": "municipality_hub",
}
),
volume_column="vol",
indicator=True,
)
df_aggregators_to_trains_to_hubs = df_aggregators_to_trains_to_hubs[
df_aggregators_to_trains_to_hubs.pop("_matched") == "both"
]
# --- Combine
df = pd.concat([df_aggregators_to_trains_to_hubs, df_aggregators_to_hubs])
return df
# --- Find origins (aggregators) of soybeans arriving at train stations of destination
df_aggregators_to_trains = stitch_dataframes(
df_lp_2_solution.rename(
columns={
"municipality_supply": "municipality_aggregator",
"municipality_demand": "station_origin",
}
),
df_train_soybean_flows,
volume_column="vol",
indicator=True,
)
df_aggregators_to_trains.to_csv("aggregators_to_trains.csv")
df_aggregators_to_trains = df_aggregators_to_trains[
df_aggregators_to_trains.pop("_matched") == "both"
]
print(df_aggregators_to_trains)
print(df_aggregators_to_trains["vol"].sum())
# --- Find origin (train stations of destination) of soybeans arriving at hubs from trains
df_aggregators_to_trains_to_hubs = stitch_dataframes(
df_aggregators_to_trains,
df_lp_2_solution.rename(
columns={
"municipality_supply": "station_destination",
"municipality_demand": "municipality_hub",
}
),
volume_column="vol",
indicator=True,
)
df_aggregators_to_trains_to_hubs = df_aggregators_to_trains_to_hubs[
df_aggregators_to_trains_to_hubs.pop("_matched") == "both"
]
print(df_aggregators_to_trains_to_hubs)
print(df_aggregators_to_trains_to_hubs["vol"].sum())
return
# --- Find origin of soybeans arriving at silos directly
df_farms_to_hubs = stitch_dataframes(
df_lp_1_solution.rename(
columns={"municipality_aggregator": "municipality_supply"}
),
df_lp_2_solution,
volume_column="vol",
indicator=True,
)
df_farms_to_hubs = df_farms_to_hubs[df_farms_to_hubs.pop("_matched") == "both"]
df_solution = pd.concat(
[
df_trains_to_hubs[["municipality_supply", "municipality_demand", "vol"]],
df_farms_to_hubs[["municipality_supply", "municipality_demand", "vol"]],
]
)
return df_solution
main()
import pandas as pd
df_lp_2 = pd.read_csv("lp_2_solution.csv")
df_lp_2_demand = pd.read_csv("lp_2_demand.csv")
df_train_flows = pd.read_csv("2023/prepared/train_flows.csv", sep=";")
df_train_soybean_flows = df_train_flows[
df_train_flows.pop("commodity") == "SOJA"
].rename(
columns={
"origin_trase_id": "station_origin",
"destination_trase_id": "station_destination",
}
)
df_lp_2[df_lp_2["municipality_demand"] == "BR-1715705"]
# df_lp_2_demand[df_lp_2_demand["municipality_demand"] == "BR-1715705"]
import pandas as pd
from trase.tools.sps import solve_transportation_problem, stitch_dataframes
import pandas as pd
import geopandas as gpd
import shapely
from matplotlib import pyplot as plt
def plot_issues():
df_lp_1_supply = pd.read_csv("lp_1_supply.csv")
df_lp_1_demand = pd.read_csv("lp_1_demand.csv")
df_lp_1_solution = pd.read_csv("df_lp_1_solution.csv")
df_lp_2_supply = pd.read_csv("lp_2_supply.csv")
df_lp_2_demand = pd.read_csv("lp_2_demand.csv")
df_lp_2_solution = pd.read_csv("df_lp_2_solution.csv")
print(df_lp_1_supply["vol"].sum())
print(df_lp_1_demand["vol"].sum())
print(df_lp_1_solution["vol"].sum())
print(df_lp_2_supply["vol"].sum())
print(df_lp_2_demand["vol"].sum())
print(df_lp_2_solution["vol"].sum())
df = stitch_dataframes(
df_lp_2_supply, df_lp_2_solution, volume_column="vol", indicator=True
)
df = df.rename(columns={"_matched": "_matched_supply"})
df = stitch_dataframes(df, df_lp_2_demand, volume_column="vol", indicator=True)
df = df.rename(columns={"_matched": "_matched_demand"})
df = df[
((df["_matched_supply"] != "both") | (df["_matched_demand"] != "both"))
& (df["vol"] > 0.01)
].sort_values("vol")
display(df)
df_mun = pd.read_csv("2023/prepared/municipality.csv", sep=";")
df_brazil = pd.read_csv("2023/prepared/brazil.csv", sep=";")
df_brazil["geometry"] = df_brazil["geometry"].apply(lambda x: shapely.from_wkt(x))
df_brazil = gpd.GeoDataFrame(df_brazil).set_geometry("geometry")
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figheight(35)
fig.set_figwidth(70)
df_brazil.boundary.plot(ax=ax1, color="Black")
df_brazil.boundary.plot(ax=ax2, color="Black")
# TODO: unallocated supply
df = df[df["_matched_supply"] != "both"]
df = pd.merge(df, df_mun, left_on="municipality_supply", right_on="trase_id")
df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
df = gpd.GeoDataFrame(df).set_geometry("geometry")
# df.plot(ax=ax1, column="vol", cmap="Greens")
df.plot(ax=ax1, column="vol", color="Green")
# TODO: unmet demand
df = df[df["_matched_demand"] != "both"]
df = pd.merge(df, df_mun, left_on="municipality_demand", right_on="trase_id")
df["geometry"] = df["geometry"].apply(lambda x: shapely.from_wkt(x))
df = gpd.GeoDataFrame(df).set_geometry("geometry")
df.plot(ax=ax2, column="vol", cmap="Purples")
plot_issues()
import pandas as pd
df = pd.read_csv("df_final.csv")
df
/tmp/ipykernel_4206/98622176.py:3: DtypeWarning: Columns (6,7) have mixed types. Specify dtype option on import or set low_memory=False.
df = pd.read_csv("df_final.csv")
| Unnamed: 0 | municipality_production | municipality_aggregator | municipality_station_origin | municipality_station_destination | municipality_hub | municipality_station_origin_cake | municipality_station_destination_cake | municipality_port | product | vol | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | BR-1100031 | BR-1100031 | NaN | NaN | BR-1301902 | NaN | NaN | BR-1302603 | SOYBEAN CAKE | 13943.666346 |
| 1 | 1 | BR-1100049 | BR-1100049 | NaN | NaN | BR-1301902 | NaN | NaN | BR-1302603 | SOYBEAN CAKE | 6996.841343 |
| 2 | 2 | BR-1100809 | BR-1100049 | NaN | NaN | BR-1301902 | NaN | NaN | BR-1302603 | SOYBEAN CAKE | 11212.801305 |
| 3 | 3 | BR-1100908 | BR-1100049 | NaN | NaN | BR-1301902 | NaN | NaN | BR-1302603 | SOYBEAN CAKE | 8309.281742 |
| 4 | 4 | BR-1100056 | BR-1100056 | NaN | NaN | BR-1301902 | NaN | NaN | BR-1302603 | SOYBEAN CAKE | 20915.499312 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 88568 | 112782 | BR-5300108 | BR-5222005 | NaN | NaN | BR-5201108 | NaN | NaN | UNKNOWN | SOYBEANS | 0.000632 |
| 88569 | 112834 | BR-5222054 | BR-5222054 | BR-5218805 | BR-3518701 | BR-3554102 | NaN | NaN | UNKNOWN | SOYBEANS | 0.000008 |
| 88570 | 112846 | BR-5222054 | BR-5222054 | NaN | NaN | BR-5218805 | NaN | NaN | UNKNOWN | SOYBEANS | 0.000188 |
| 88571 | 112852 | BR-5222302 | BR-5222302 | NaN | NaN | BR-5201108 | NaN | NaN | UNKNOWN | SOYBEANS | 0.000092 |
| 88572 | 112854 | BR-5300108 | BR-5300108 | NaN | NaN | BR-5300108 | NaN | NaN | UNKNOWN | SOYBEANS | 0.001657 |
88573 rows × 11 columns