View or edit on GitHub
This page is synchronized from trase/data/brazil/logistics/cost_matrix/cost_matrix_v2/qa/freight_cost_exploration.ipynb. Last modified on 2026-02-03 10:30 CET by Jason J. Benedict.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
import pandas as pd
from trase.tools.aws.aws_helpers_cached import get_pandas_df_once
from trase.tools.aws.aws_helpers import read_geojson
"""
Config Session
"""
PATH_FREIGHT = 'brazil/logistics/freight/freight_prices_conab_{}.csv'
PATH_LOCATIONS_CITIES = 'brazil/spatial/boundaries/ibge/old/municipalities_headquarts_ibge_2015.geojson'
YEARS = list(range(2016, 2026))
df = pd.concat([
get_pandas_df_once(key=PATH_FREIGHT.format(x), bucket='trase-storage', sep=';')
for x in YEARS
])
df_municipalities_locations = read_geojson(PATH_LOCATIONS_CITIES, bucket='trase-storage')
df.columns
Index(['source', 'municipality_origin', 'id_ibge_origin', 'state_origin',
'municipality_destination', 'id_ibge_destination', 'state_destination',
'year', 'month', 'distance_km', 'price_ton', 'price_ton_km',
'trase_id_origin', 'trase_id_destination'],
dtype='object')
df.dtypes
source object
municipality_origin object
id_ibge_origin int64
state_origin object
municipality_destination object
id_ibge_destination int64
state_destination object
year int64
month int64
distance_km int64
price_ton float64
price_ton_km float64
trase_id_origin object
trase_id_destination object
dtype: object
df_municipalities_locations
| NAME | GEOCODIGO_ | NOME_MUNIC | UF | geometry | |
|---|---|---|---|---|---|
| 0 | ALTA FLORESTA D'OESTE | 1100015 | ALTA FLORESTA D'OESTE | ro | POINT (-61.99982 -11.93554) |
| 1 | ARIQUEMES | 1100023 | ARIQUEMES | ro | POINT (-63.03327 -9.90846) |
| 2 | CABIXI | 1100031 | CABIXI | ro | POINT (-60.54431 -13.49976) |
| 3 | CACOAL | 1100049 | CACOAL | ro | POINT (-61.44294 -11.43386) |
| 4 | CEREJEIRAS | 1100056 | CEREJEIRAS | ro | POINT (-60.81843 -13.19503) |
| ... | ... | ... | ... | ... | ... |
| 5565 | MOJUÍ DOS CAMPOS | 1504752 | MOJUÍ DOS CAMPOS | pa | POINT (-54.65062 -26.82213) |
| 5566 | PESCARIA BRAVA | 4212650 | PESCARIA BRAVA | sc | POINT (-48.866 -28.38671) |
| 5567 | BALNEÁRIO RINCÃO | 4220000 | BALNEÁRIO RINCÃO | sc | POINT (-49.24067 -28.80429) |
| 5568 | PINTO BANDEIRA | 4314548 | PINTO BANDEIRA | rs | POINT (-51.45222 -29.09692) |
| 5569 | PARAÍSO DAS ÁGUAS | 5006275 | PARAÍSO DAS ÁGUAS | ms | POINT (-53.01307 -19.01739) |
5570 rows × 5 columns
How freight prices changes over time ?
import seaborn as sns
from matplotlib import pyplot as plt
df['time'] = pd.to_datetime(pd.DataFrame({
'year': df['year'],
'month': df['month'],
'day': 1
}))
sns.relplot(
data=df, kind="line", legend=False,
x="time", y="price_ton", size=3, aspect=1.7
)
plt.title('Freight Prices per Ton')
plt.xlabel('')
plt.ylabel('Price Ton/Km')
plt.show()

import seaborn as sns
sns.relplot(
data=df.loc[df['time'].dt.year > 2018],
kind="line",
x="time",
y="price_ton",
col='year',
col_wrap=2,
legend=False,
facet_kws={'sharey': False, 'sharex': False}, # Added missing comma here
height=5, # Note: 'height' is the standard param; 'size' is often deprecated for facet height
aspect=1.1
)
plt.title('Freight Prices per Ton')
plt.xlabel('')
plt.ylabel('Price Ton/Km')
plt.show()

How many unique combinations origin-destination does it have?
origin_destination = df[['municipality_origin', 'municipality_destination', 'year']].drop_duplicates()
count_combinations_by_year = (
origin_destination
.groupby('year')['municipality_destination']
.count().reset_index().rename(columns={'municipality_destination': 'count'})
)
count_combinations_by_year
| year | count | |
|---|---|---|
| 0 | 2016 | 111 |
| 1 | 2017 | 138 |
| 2 | 2018 | 158 |
| 3 | 2019 | 92 |
| 4 | 2020 | 91 |
| 5 | 2021 | 160 |
| 6 | 2022 | 172 |
| 7 | 2023 | 223 |
| 8 | 2024 | 248 |
| 9 | 2025 | 261 |
# 1. Group the data by 'year'
grouped_by_year = origin_destination.groupby('year')
# 2. Perform the aggregation on the grouped object
result = grouped_by_year.agg(
count_origin=pd.NamedAgg(column='municipality_origin', aggfunc='nunique'),
count_destination=pd.NamedAgg(column='municipality_destination', aggfunc='nunique')
)
result
| count_origin | count_destination | |
|---|---|---|
| year | ||
| 2016 | 10 | 65 |
| 2017 | 15 | 70 |
| 2018 | 17 | 73 |
| 2019 | 15 | 56 |
| 2020 | 7 | 64 |
| 2021 | 27 | 68 |
| 2022 | 45 | 67 |
| 2023 | 65 | 82 |
| 2024 | 60 | 87 |
| 2025 | 68 | 93 |