Skip to content

View or edit on GitHub

This page is synchronized from trase/data/brazil/logistics/cost_matrix/cost_matrix_v2/qa/freight_cost_exploration.ipynb. Last modified on 2026-02-03 10:30 CET by Jason J. Benedict. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

import pandas as pd

from trase.tools.aws.aws_helpers_cached import get_pandas_df_once

from trase.tools.aws.aws_helpers import read_geojson

"""

Config Session

"""

PATH_FREIGHT = 'brazil/logistics/freight/freight_prices_conab_{}.csv'

PATH_LOCATIONS_CITIES = 'brazil/spatial/boundaries/ibge/old/municipalities_headquarts_ibge_2015.geojson'

YEARS = list(range(2016, 2026))
df = pd.concat([
    get_pandas_df_once(key=PATH_FREIGHT.format(x), bucket='trase-storage', sep=';')
        for x in YEARS
])

df_municipalities_locations = read_geojson(PATH_LOCATIONS_CITIES, bucket='trase-storage')

df.columns
Index(['source', 'municipality_origin', 'id_ibge_origin', 'state_origin',
       'municipality_destination', 'id_ibge_destination', 'state_destination',
       'year', 'month', 'distance_km', 'price_ton', 'price_ton_km',
       'trase_id_origin', 'trase_id_destination'],
      dtype='object')
df.dtypes
source                       object
municipality_origin          object
id_ibge_origin                int64
state_origin                 object
municipality_destination     object
id_ibge_destination           int64
state_destination            object
year                          int64
month                         int64
distance_km                   int64
price_ton                   float64
price_ton_km                float64
trase_id_origin              object
trase_id_destination         object
dtype: object
df_municipalities_locations
NAME GEOCODIGO_ NOME_MUNIC UF geometry
0 ALTA FLORESTA D'OESTE 1100015 ALTA FLORESTA D'OESTE ro POINT (-61.99982 -11.93554)
1 ARIQUEMES 1100023 ARIQUEMES ro POINT (-63.03327 -9.90846)
2 CABIXI 1100031 CABIXI ro POINT (-60.54431 -13.49976)
3 CACOAL 1100049 CACOAL ro POINT (-61.44294 -11.43386)
4 CEREJEIRAS 1100056 CEREJEIRAS ro POINT (-60.81843 -13.19503)
... ... ... ... ... ...
5565 MOJUÍ DOS CAMPOS 1504752 MOJUÍ DOS CAMPOS pa POINT (-54.65062 -26.82213)
5566 PESCARIA BRAVA 4212650 PESCARIA BRAVA sc POINT (-48.866 -28.38671)
5567 BALNEÁRIO RINCÃO 4220000 BALNEÁRIO RINCÃO sc POINT (-49.24067 -28.80429)
5568 PINTO BANDEIRA 4314548 PINTO BANDEIRA rs POINT (-51.45222 -29.09692)
5569 PARAÍSO DAS ÁGUAS 5006275 PARAÍSO DAS ÁGUAS ms POINT (-53.01307 -19.01739)

5570 rows × 5 columns

How freight prices changes over time ?

import seaborn as sns
from matplotlib import pyplot as plt

df['time'] = pd.to_datetime(pd.DataFrame({
    'year': df['year'],
    'month': df['month'],
    'day': 1
}))


sns.relplot(
    data=df, kind="line", legend=False,
    x="time", y="price_ton", size=3, aspect=1.7
)

plt.title('Freight Prices per Ton')
plt.xlabel('')
plt.ylabel('Price Ton/Km')
plt.show()

png

import seaborn as sns

sns.relplot(
    data=df.loc[df['time'].dt.year > 2018], 
    kind="line", 
    x="time", 
    y="price_ton", 
    col='year', 
    col_wrap=2,
    legend=False, 
    facet_kws={'sharey': False, 'sharex': False}, # Added missing comma here
    height=5, # Note: 'height' is the standard param; 'size' is often deprecated for facet height
    aspect=1.1
)


plt.title('Freight Prices per Ton')
plt.xlabel('')
plt.ylabel('Price Ton/Km')
plt.show()

png

How many unique combinations origin-destination does it have?

origin_destination = df[['municipality_origin', 'municipality_destination', 'year']].drop_duplicates()

count_combinations_by_year = (
    origin_destination
    .groupby('year')['municipality_destination']
    .count().reset_index().rename(columns={'municipality_destination': 'count'})
)

count_combinations_by_year
year count
0 2016 111
1 2017 138
2 2018 158
3 2019 92
4 2020 91
5 2021 160
6 2022 172
7 2023 223
8 2024 248
9 2025 261
# 1. Group the data by 'year'
grouped_by_year = origin_destination.groupby('year')

# 2. Perform the aggregation on the grouped object
result = grouped_by_year.agg(
    count_origin=pd.NamedAgg(column='municipality_origin', aggfunc='nunique'),
    count_destination=pd.NamedAgg(column='municipality_destination', aggfunc='nunique')
)

result
count_origin count_destination
year
2016 10 65
2017 15 70
2018 17 73
2019 15 56
2020 7 64
2021 27 68
2022 45 67
2023 65 82
2024 60 87
2025 68 93