Skip to content

Qa phase2 compare versions

View or edit on GitHub

This page is synchronized from trase/models/brazil/beef/qa_beef/archive/qa_phase2_compare_versions.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

%%HTML 
<script>
    function luc21893_refresh_cell(cell) {
        if( cell.luc21893 ) return;
        cell.luc21893 = true;
        console.debug('New code cell found...' );

        var div = document.createElement('DIV');            
        cell.parentNode.insertBefore( div, cell.nextSibling );
        div.style.textAlign = 'right';
        var a = document.createElement('A');
        div.appendChild(a);
        a.href='#'
        a.luc21893 = cell;
        a.setAttribute( 'onclick', "luc21893_toggle(this); return false;" );

        cell.style.visibility='hidden';
        cell.style.position='absolute';
        a.innerHTML = '[show code]';        

    }
    function luc21893_refresh() {                
        if( document.querySelector('.code_cell .input') == null ) {            
            // it apeears that I am in a exported html
            // hide this code
            var codeCells = document.querySelectorAll('.jp-InputArea')
            codeCells[0].style.visibility = 'hidden';
            codeCells[0].style.position = 'absolute';                        
            for( var i = 1; i < codeCells.length; i++ ) {
                if (i % 2 == 0){
                luc21893_refresh_cell(codeCells[i].parentNode)}
                else {}

            }
            window.onload = luc21893_refresh;
        }                 
        else {
            // it apperas that I am in a jupyter editor
            var codeCells = document.querySelectorAll('.code_cell .input')
            for( var i = 0; i < codeCells.length; i++ ) {
                if ([1,3,4,5,7].includes(i)){
                luc21893_refresh_cell(codeCells[i])}
                else {}
            }            
            window.setTimeout( luc21893_refresh, 1000 )
        }        
    }

    function luc21893_toggle(a) {
        if( a.luc21893.style.visibility=='hidden' ) {
            a.luc21893.style.visibility='visible';        
            a.luc21893.style.position='';
            a.innerHTML = '[hide code]';
        }
        else {
            a.luc21893.style.visibility='hidden';        
            a.luc21893.style.position='absolute';
            a.innerHTML = '[show code]';
        }
    }

    luc21893_refresh()
</script>

Difference between version 2.1 and 2.2

from trase.tools import sps

from trase.models.brazil.beef.qa_beef.imports.reader import (
    load_downloaded_data_db,
    load_mun,
    load_downloaded_data_s3,
)
from trase.models.brazil.beef.qa_beef.imports.functions import (
    create_plot_comparision_versions,
    compare_versions_sei_pcs,
)
from trase.models.brazil.beef.qa_beef.imports.plots_specific import (
    plot_grouped_by_dumbbell_comparision,
)
from trase.models.brazil.beef.qa_beef.imports.plots_general import (
    plot_comparision_versions_sei_pcs,
)

import plotly.io as pio

pio.renderers.default = "plotly_mimetype+notebook"

dfs_DB = load_downloaded_data_db()
sei_pcs_db = dfs_DB["sei_pcs_db"]
sei_old_db = dfs_DB["sei_old_db"]


mun = sps.get_pandas_df_once("brazil/metadata/ID_MUN_MASTER_A_NEW.csv", sep=";")
# Create a dictionary with the Logistic Hubs - municipalities of the new version.
dict_lh_mun = {
    element: el
    for element in set(sei_pcs_db.LVL6_TRASE_ID_LH)
    for el in mun["name"]
    if element.__contains__("".join([" ", el, " SLAUGHTERHOUSE"]))
}
# transfom the LH of the new version to be the municipality
sei_pcs_db["LVL6_TRASE_ID_LH"] = sei_pcs_db["LVL6_TRASE_ID_LH"].apply(
    dict_lh_mun.__getitem__
)

# # Nanxu: WIP
# dfs_DB = load_downloaded_data_s3()
# sei_pcs_db = dfs_DB['sei_pcs_s3']
# sei_old_db = dfs_DB["sei_old_s3"]

# mun = sps.get_pandas_df_once("brazil/metadata/ID_MUN_MASTER_A_NEW.csv", sep=";")
# print(mun)
# # Create a dictionary with the Logistic Hubs - municipalities of the new version.
# dict_lh_mun = {element: el for element in set(sei_pcs_db.LVL6_TRASE_ID_LH) for el in mun["name"] if element.__contains__("".join([" ",el," SLAUGHTERHOUSE"]))}
# print(dict_lh_mun)
# # transfom the LH of the new version to be the municipality
# sei_pcs_db["LVL6_TRASE_ID_LH"] = sei_pcs_db["LVL6_TRASE_ID_LH"].apply(dict_lh_mun.__getitem__)

# dfs_DB['sei_pcs_db'] = dfs_DB['sei_pcs_s3']
# dfs_DB['sei_old_db'] = dfs_DB['sei_old_s3']

Difference in Trader - Logistic Hub volumes between the two versions

We grouped the volume by Exporter Group and Logistic Hub, and calculated the absolute percentage difference between the volume on the two versions, divided by the minimum volume between the two versions.

For example, if in the previous version of the model JBS was sourcing 100k Tonnes of beef from the Logistic Hub of Bage and in the new version the new volume is 60k Tonnes, the percentage difference between the versions was calculated as follows: |100 - 60| / min(100, 60) = 66%

If the percentage is equal or less than 10%, we considered as same volume, if the percentage is greater than 10%, we considered as different volume.

key = "LVL6_TRASE_ID_LH"
plot_df = compare_versions_sei_pcs(dfs_DB, key)
plot_comparision_versions_sei_pcs(plot_df, key)

Dumbbell plot comparison

We used the dumbbell plot to compare the difference in volume attributed for each logistic hub, municipality of production, exporter and country of import between the two versions of SEI-PCS model. It is expected that the two versions of SEI-PCS model have the same volume.

A dumbbell plot, also known as a connected dot plot, is a type of graph used to compare two related groups or sets of data. It is called a dumbbell plot because the graph consists of a series of dots or circles connected by a line that looks like a dumbbell. Each dot in the plot represents a value from one of the two groups being compared, and the line connecting the dots represents the difference between those two values.

Difference in volume per logistic hub per year between the two versions

plot_grouped_by_dumbbell_comparision(
    sei_pcs_db,
    sei_old_db,
    "LVL6_TRASE_ID_LH",
    value="VOLUME_RAW",
    years=range(2015, 2018),
    labels=["sei_new", "sei_old"],
)

Difference in volume per municipality of production per year between the two versions

plot_grouped_by_dumbbell_comparision(
    sei_pcs_db,
    sei_old_db,
    "LVL6_TRASE_ID_PROD",
    value="VOLUME_RAW",
    years=range(2015, 2018),
    labels=["sei_new", "sei_old"],
)

Difference in volumes per Exporter per year between the two versions

plot_grouped_by_dumbbell_comparision(
    sei_pcs_db,
    sei_old_db,
    "EXPORTER_GROUP",
    value="VOLUME_RAW",
    years=range(2015, 2018),
    labels=["sei_new", "sei_old"],
)

Difference in volume per countries of import between the two versions

plot_grouped_by_dumbbell_comparision(
    sei_pcs_db,
    sei_old_db,
    "COUNTRY_OF_IMPORT",
    value="VOLUME_RAW",
    years=range(2015, 2018),
    labels=["sei_new", "sei_old"],
)

Difference in volume per biome between the two versions

plot_grouped_by_dumbbell_comparision(
    sei_pcs_db,
    sei_old_db,
    "BIOME",
    value="VOLUME_RAW",
    years=range(2015, 2018),
    labels=["sei_new", "sei_old"],
)