View or edit on GitHub

This page is synchronized from trase/data/brazil/logistics/cost_matrix/cost_matrix_v2/qa/freight_cost_exploration.ipynb. Last modified on 2026-02-03 10:30 CET by Jason J. Benedict. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

import pandas as pd

from trase.tools.aws.aws_helpers_cached import get_pandas_df_once

from trase.tools.aws.aws_helpers import read_geojson

"""

Config Session

"""

PATH_FREIGHT = 'brazil/logistics/freight/freight_prices_conab_{}.csv'

PATH_LOCATIONS_CITIES = 'brazil/spatial/boundaries/ibge/old/municipalities_headquarts_ibge_2015.geojson'

YEARS = list(range(2016, 2026))

df = pd.concat([
    get_pandas_df_once(key=PATH_FREIGHT.format(x), bucket='trase-storage', sep=';')
        for x in YEARS
])

df_municipalities_locations = read_geojson(PATH_LOCATIONS_CITIES, bucket='trase-storage')

df.columns

Index(['source', 'municipality_origin', 'id_ibge_origin', 'state_origin',
       'municipality_destination', 'id_ibge_destination', 'state_destination',
       'year', 'month', 'distance_km', 'price_ton', 'price_ton_km',
       'trase_id_origin', 'trase_id_destination'],
      dtype='object')

df.dtypes

source                       object
municipality_origin          object
id_ibge_origin                int64
state_origin                 object
municipality_destination     object
id_ibge_destination           int64
state_destination            object
year                          int64
month                         int64
distance_km                   int64
price_ton                   float64
price_ton_km                float64
trase_id_origin              object
trase_id_destination         object
dtype: object

df_municipalities_locations

	NAME	GEOCODIGO_	NOME_MUNIC	UF	geometry
0	ALTA FLORESTA D'OESTE	1100015	ALTA FLORESTA D'OESTE	ro	POINT (-61.99982 -11.93554)
1	ARIQUEMES	1100023	ARIQUEMES	ro	POINT (-63.03327 -9.90846)
2	CABIXI	1100031	CABIXI	ro	POINT (-60.54431 -13.49976)
3	CACOAL	1100049	CACOAL	ro	POINT (-61.44294 -11.43386)
4	CEREJEIRAS	1100056	CEREJEIRAS	ro	POINT (-60.81843 -13.19503)
...	...	...	...	...	...
5565	MOJUÍ DOS CAMPOS	1504752	MOJUÍ DOS CAMPOS	pa	POINT (-54.65062 -26.82213)
5566	PESCARIA BRAVA	4212650	PESCARIA BRAVA	sc	POINT (-48.866 -28.38671)
5567	BALNEÁRIO RINCÃO	4220000	BALNEÁRIO RINCÃO	sc	POINT (-49.24067 -28.80429)
5568	PINTO BANDEIRA	4314548	PINTO BANDEIRA	rs	POINT (-51.45222 -29.09692)
5569	PARAÍSO DAS ÁGUAS	5006275	PARAÍSO DAS ÁGUAS	ms	POINT (-53.01307 -19.01739)

5570 rows × 5 columns

How freight prices changes over time ?

import seaborn as sns
from matplotlib import pyplot as plt

df['time'] = pd.to_datetime(pd.DataFrame({
    'year': df['year'],
    'month': df['month'],
    'day': 1
}))


sns.relplot(
    data=df, kind="line", legend=False,
    x="time", y="price_ton", size=3, aspect=1.7
)

plt.title('Freight Prices per Ton')
plt.xlabel('')
plt.ylabel('Price Ton/Km')
plt.show()

png

import seaborn as sns

sns.relplot(
    data=df.loc[df['time'].dt.year > 2018], 
    kind="line", 
    x="time", 
    y="price_ton", 
    col='year', 
    col_wrap=2,
    legend=False, 
    facet_kws={'sharey': False, 'sharex': False}, # Added missing comma here
    height=5, # Note: 'height' is the standard param; 'size' is often deprecated for facet height
    aspect=1.1
)


plt.title('Freight Prices per Ton')
plt.xlabel('')
plt.ylabel('Price Ton/Km')
plt.show()

png

How many unique combinations origin-destination does it have?

origin_destination = df[['municipality_origin', 'municipality_destination', 'year']].drop_duplicates()

count_combinations_by_year = (
    origin_destination
    .groupby('year')['municipality_destination']
    .count().reset_index().rename(columns={'municipality_destination': 'count'})
)

count_combinations_by_year

	year	count
0	2016	111
1	2017	138
2	2018	158
3	2019	92
4	2020	91
5	2021	160
6	2022	172
7	2023	223
8	2024	248
9	2025	261

# 1. Group the data by 'year'
grouped_by_year = origin_destination.groupby('year')

# 2. Perform the aggregation on the grouped object
result = grouped_by_year.agg(
    count_origin=pd.NamedAgg(column='municipality_origin', aggfunc='nunique'),
    count_destination=pd.NamedAgg(column='municipality_destination', aggfunc='nunique')
)

result

	count_origin	count_destination
year
2016	10	65
2017	15	70
2018	17	73
2019	15	56
2020	7	64
2021	27	68
2022	45	67
2023	65	82
2024	60	87
2025	68	93