Skip to content

View or edit on GitHub

This page is synchronized from trase/models/brazil/beef/2018_beef_input_data.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin. Please view or edit the original file there; changes should be reflected here after a midnight build (CET time), or manually triggering it with a GitHub action (link).

2018 Brazil Beef Input Data Review

from trase.tools.sps import *
import plotly.express as px
import pandas as pd

BEEF_HS6 = [
    "010221",  # Cattle; live, pure-bred breeding animals
    "010229",  # Cattle; live, other than pure-bred breeding animals
    "010290",  # Bovine animals; live, other than cattle and buffalo
    "020120",  # Meat; of bovine animals, cuts with bone in, fresh or chilled
    "020130",  # Meat; of bovine animals, boneless cuts, fresh or chilled
    "020220",  # Meat; of bovine animals, cuts with bone in, frozen
    "020230",  # Meat; of bovine animals, boneless cuts, frozen
    "020610",  # Offal, edible; of bovine animals, fresh or chilled
    "020621",  # Offal, edible; of bovine animals, tongues, frozen
    "020622",  # Offal, edible; of bovine animals, livers, frozen
    "020629",  # Offal, edible; of bovine animals, (other than tongues & livers), frozen
    "021020",  # Meat; salted, in brine, dried or smoked, of bovine animals
    "160250",  # Meat preparations; of bovine animals, meat or offal, prepared/preserved
]
BEEF_HS4 = [h[0:4] for h in BEEF_HS6]

MDIC (Port)

df_mdic = (
    concat(
        [
            get_pandas_df_once(
                "brazil/trade/mdic/port/brazil_mdic_port_2018.csv",
                dtype=str,
                na_filter=False,
            )
        ]
    )
    .astype({"vol": float, "month": int})
    .assign(datasource="MDIC")
)
df_mdic = df_mdic[df_mdic["hs6"].isin(BEEF_HS6)]
df_mdic.sample(1).T

Total volume

print("MDIC Volume", format_float(df_mdic["vol"].sum(), 2))
MDIC Volume 1,800,000,000

Breakdown by HS4

print_report_by_attribute(df_mdic, "vol", "hs6")
                 sum percentage
hs6                            
020230 1,100,000,000        62%
010229   200,000,000        11%
020130   190,000,000        10%
020629   130,000,000         7%
160250   100,000,000         6%
010290    30,000,000         2%
020220    16,000,000         1%
020621    13,000,000         1%
020120     3,800,000         0%
021020     2,800,000         0%
010221     1,500,000         0%
020622       850,000         0%
020610       310,000         0%

Brazil BoL

df_bol = (
    get_pandas_df_once(
        "brazil/trade/bol/2018/BRAZIL_BOL_2018.csv", dtype=str, na_filter=False
    )
    .astype({"vol": float, "month": int})
    .assign(datasource="BOL")
)
df_bol = df_bol[df_bol["hs4"].isin(BEEF_HS4)]
df_bol["hs6_known"] = ~df_bol["hs6"].str.contains("X")
df_bol["exporter_municipality_known"] = ~df_bol[
    "exporter.municipality.trase_id"
].str.contains("X")
df_bol["exporter_cnpj_known"] = df_bol["exporter.cnpj"] != "UNKNOWN"

df_bol.sample(1).T
142374
date 2018-01-03
exporter.label BRF
country_of_destination.label NETHERLANDS
vol 23152.5
hs_description 0210: MEAT AND EDIBLE MEAT OFFAL, SALTED, IN B...
exporter.cnpj 01838723016988
exporter.activity_portuguese COMERCIO ATACADISTA DE CARNES BOVINAS E SUINAS...
importer.label BRF GLOBAL
vessel.label MAERSK LINS
voyage_number 1802
hs 0210
port_of_origin.label ITAJAI
port_of_arrival.label ROTTERDAM
port_of_import.label ALGECIRAS
world_region EUROPA
place_of_loading APM TERMINALS
source DATAMYNE
hs4 0210
hs6 XXXX
hs8 XXXX
carrier_datamar.label UNKNOWN
description UNKNOWN
port_of_discharge UNKNOWN
importer.type UNKNOWN
port_of_maritime_origin.country UNKNOWN
port_of_maritime_destination UNKNOWN
port_of_maritime_origin UNKNOWN
carrier.label UNKNOWN
port_of_export.country UNKNOWN
exporter.type cnpj
port_of_maritime_destination.country UNKNOWN
port_of_receipt.country UNKNOWN
port_of_destination UNKNOWN
port_of_receipt UNKNOWN
vessel.type UNKNOWN
port_of_discharge.country UNKNOWN
datamar_id UNKNOWN
port_of_export.name ITAJAI
country_of_destination.name NETHERLANDS
country_of_destination.trase_id NL
exporter.state.trase_id BR-42
exporter.municipality.trase_id BR-4208203
exporter.state.name SANTA CATARINA
exporter.municipality.name ITAJAI
hs5 XXXX
month 1
datasource BOL
hs6_known False
exporter_municipality_known True
exporter_cnpj_known True

There is about 2 million tons in BoL when we filter by HS4 (maybe also includes pork & chicken??)

print("BOL Volume", format_float(df_bol["vol"].sum(), 2))
BOL Volume 2,100,000,000

Most data comes from datamyne

print_report_by_attribute(df_bol, "vol", ["source"])
                    sum percentage
source                            
DATAMYNE  1,700,000,000        80%
DATALINER   430,000,000        20%

About 77% of data has unknown hs6 - all from datamyne

print_report_by_attribute(df_bol, "vol", ["source", "hs6_known"])
                              sum percentage
source    hs6_known                         
DATAMYNE  False     1,600,000,000        77%
DATALINER True        430,000,000        20%
DATAMYNE  True         62,000,000         3%

The municipality is known for Datamyne, and unknown for Dataliner (==live cattle)

print_report_by_attribute(df_bol, "vol", ["source", "exporter_municipality_known"])
                                                sum percentage
source    exporter_municipality_known                         
DATAMYNE  True                        1,700,000,000        80%
DATALINER True                          240,000,000        11%
          False                         190,000,000         9%
DATAMYNE  False                              38,000         0%
print_report_by_attribute(df_bol, "vol", ["source", "hs6_known"])
                              sum percentage
source    hs6_known                         
DATAMYNE  False     1,600,000,000        77%
DATALINER True        430,000,000        20%
DATAMYNE  True         62,000,000         3%

CD (Concat)

months = {
    "Mai": 5,
    "Abr": 4,
    "Mar": 3,
    "Fev": 2,  # ???
    "Jan": 1,
    "Jun": 6,
    "Jul": 7,
    "Feb": 2,
}
df_cd = (
    get_pandas_df_once(
        "brazil/trade/cd/concat/BRAZIL_CD_CONCAT_BEEF_2018_CLEANED.csv",
        version_id="43UAhIXtUNwaU83tXzz9sBf6lJ4ew9Ut",
        dtype=str,
        na_filter=False,
    )
    .astype({"vol": float, "month": int})
    .assign(datasource="CD_CONCAT")
)
df_cd = df_cd[df_cd["hs4"].isin(BEEF_HS4)]
df_cd["port.name"] = df_cd["port_of_export.name"]

df_cd["month"] = df_cd["date"].apply(lambda l: months[l.split(" ")[1]])
cd_max_month = df_cd["month"].max()

df_cd.sample(1).T
Failed to fetch ETag for s3://trase-storage/brazil/trade/cd/concat/BRAZIL_CD_CONCAT_BEEF_2018_CLEANED.csv, skipping cache
Traceback (most recent call last):
  File "/opt/tljh/user/lib/python3.8/trase/tools/aws/aws_helpers_cached.py", line 41, in get_pandas_df_once
    etag = get_etag(client, bucket, key)
  File "/opt/tljh/user/lib/python3.8/trase/tools/aws/s3_helpers.py", line 81, in get_etag
    response = head_object(client, bucket, key, version_id)
  File "/opt/tljh/user/lib/python3.8/trase/tools/aws/s3_helpers.py", line 86, in head_object
    return client.head_object(
  File "/opt/tljh/user/lib/python3.8/site-packages/botocore/client.py", line 508, in _api_call
    return self._make_api_call(operation_name, kwargs)
  File "/opt/tljh/user/lib/python3.8/site-packages/botocore/client.py", line 915, in _make_api_call
    raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (404) when calling the HeadObject operation: Not Found
28446
date 20 Mar 2018
hs6 020629
VMLE.DOLAR.BAL.EXP 1297.5
vol 838.794
exporter.cnpj 02916265008225
NOME.IMPDR.EXPDR JBS S/A
country_of_destination.label HONG KONG
port_of_export.label ALF-PORTO DE SANTOS
NOME.IMPORTADOR.ESTRANGEIRO MASTER ELITE INTERNATIONAL HOLDINGS LTD
NUM.DDE 21855051494
NUM.RE 180394882005
hs4 0206
month 3
exporter.municipality.trase_id BR-1100189
port_of_export.name SANTOS
country_of_destination.name CHINA (HONG KONG)
datasource CD_CONCAT
port.name SANTOS

Total volume is 1 million - correctly half

print_report_by_attribute(df_cd, "vol", "hs4")
print("CD Volume", format_float(df_cd["vol"].sum(), 2))
             sum percentage
hs4                        
0202 420,000,000        42%
0102 390,000,000        39%
0201  83,000,000         8%
0206  69,000,000         7%
1602  37,000,000         4%
0210   2,700,000         0%
CD Volume 1,000,000,000

HS6 is complete (no unknowns)

print_report_by_attribute(df_cd, "vol", "hs6")
               sum percentage
hs6                          
020230 400,000,000        40%
010229 380,000,000        38%
020130  74,000,000         7%
020629  55,000,000         5%
160250  37,000,000         4%
020220  18,000,000         2%
020621  13,000,000         1%
020120   9,200,000         1%
010290   6,500,000         1%
010221   4,900,000         0%
021020   2,700,000         0%
020610     570,000         0%
020622     310,000         0%

We have about 40% 0102 (live cattle) which is much more than MDIC's 13%

print_report_by_attribute(df_cd, "vol", "hs4")
             sum percentage
hs4                        
0202 420,000,000        42%
0102 390,000,000        39%
0201  83,000,000         8%
0206  69,000,000         7%
1602  37,000,000         4%
0210   2,700,000         0%

Customs declarations have 26% unknowns in the port

print_report_by_attribute(df_cd, "vol", "port_of_export.name")
                                             sum percentage
port_of_export.name                                        
SANTOS                               350,000,000        35%
UNKNOWN PORT BRAZIL                  260,000,000        26%
RIO GRANDE                            87,000,000         9%
SAO FRANCISCO DO SUL                  69,000,000         7%
PARANAGUA                             68,000,000         7%
SAO SEBASTIAO                         51,000,000         5%
DIONISIO CERQUEIRA                    40,000,000         4%
ITAJAI                                19,000,000         2%
BARCARENA                             12,000,000         1%
IMBITUBA                               9,400,000         1%
SAO BORJA                              9,100,000         1%
BELEM                                  8,000,000         1%
RIO DE JANEIRO                         7,900,000         1%
PONTA PORA                             4,000,000         0%
JAGUARAO                               3,500,000         0%
FOZ DO IGUACU                          2,100,000         0%
GUAIRA                                 1,100,000         0%
SALVADOR                                 880,000         0%
SAO PAULO                                530,000         0%
GUAJARA-MIRIM                            410,000         0%
SANTANA DO LIVRAMENTO                    370,000         0%
URUGUAIANA                               290,000         0%
CHUI                                     230,000         0%
CORUMBA                                  190,000         0%
FORTALEZA                                 74,000         0%
ASSIS BRASIL                              72,000         0%
AEROPORTO INTERNACIONAL DE VIRACOPOS      68,000         0%
PECEM                                     36,000         0%
print_report_by_attribute(df_cd, "vol", ["NOME.IMPORTADOR.ESTRANGEIRO"])

CD (Dashboard)

df_dashboard = (
    get_pandas_df_once(
        "brazil/trade/cd/dashboard/out/CD_DASHBOARD_BRAZIL_2018.csv",
        dtype=str,
        na_filter=False,
    )
    .astype({"volume": float, "month": int})
    .assign(datasource="CD_DASHBOARD")
    .rename(columns={"volume": "vol"})
)
df_dashboard = df_dashboard[df_dashboard["hs6"].isin(BEEF_HS6)]
df_dashboard.sample(1).T
544395
date 2018-02-01
hs8 02062990
hs_description OFFAL OF BOVINE ANIMALS, EDIBLE, OTHER, FROZEN
country_of_destination.label HONG KONG
fob 65449
exporter.label FRIGOSUL - FRIGORIFICO SUL LTDA
exporter.municipality.label APARECIDA DO TABOADO
vol 18430.0
exporter.cnpj 02591772000170
exporter.zip 79570-000
via 01
via.name MARITIME
year 2018
month 2
hs6 020629
hs4 0206
exporter.cnpj.valid valid_cnpj
port_of_export.name PARANAGUA
state.name PARANA
state.trase_id BR-41
check 49523
exporter.municipality.name APARECIDA DO TABOADO
exporter.municipality.trase_id BR-5001003
exporter.municipality.state.trase_id BR-50
country_of_destination.name CHINA (HONG KONG)
country_of_destination.trase_id HK
datasource CD_DASHBOARD
print("Total volume", format_float(df_dashboard["vol"].sum(), 2))
Total volume 580,000,000

CD (Third Party)

df_cd_thirdparty = (
    get_pandas_df_once(
        "brazil/trade/cd/datamyne/2018/BRAZIL_BEEF_DATAMYNE_CD_2018.csv",
        dtype=str,
        na_filter=False,
    )
    .assign(datasource="CD_THIRD_PARTY")
    .astype({"vol": float})
)

df_cd_thirdparty.sample(1).T
3440
num_dde 21800300662
registration_date 26 Jan 2018
disembarkation_date 27 Jan 2018
exporter.cnpj 67620377000467
exporter.label MINERVA S.A.
hs8 02013000
country_of_import.label CHILE
port_of_export.label DIONISIO CERQUEIRA
port_of_export.state.uf SC
importer.label FRIGORIFICO DE OSORNO S.A.
fob 97542.89
vol 21505.02
num_re 180121365001
day 27
month 1
year 2018
hs6 020130
hs4 0201
via 07
via.name ROAD
port_of_export.name DIONISIO CERQUEIRA
country_of_import.name CHILE
country_of_import.trase_id CL
exporter.cnpj.valid True
exporter.municipality.trase_id BR-5215702
exporter.municipality.name PALMEIRAS DE GOIAS
datasource CD_THIRD_PARTY

CD (Disaggregated)

df_cd_disaggregated = (
    get_pandas_df_once(
        "brazil/beef/trade/cd/disaggregated/CD_DISAGGREGATED_BEEF_2018.csv",
        dtype=str,
        na_filter=False,
    )
    .astype({"vol": float})
    .assign(datasource="CD Disaggregated")
)

df_cd_disaggregated.sample(1).T
LIVE_BOVINE_ANIMALS_HS4 = "0102"

# filter to rows for beef
hs6_is_unknown = df_cd_disaggregated["hs6"] == "XXXXXX"
accept = df_cd_disaggregated["hs6"].isin(BEEF_HS6)
hs4_is_live_cattle = df_cd_disaggregated["hs4"] == LIVE_BOVINE_ANIMALS_HS4
accept |= hs6_is_unknown & hs4_is_live_cattle
df_cd_disaggregated = df_cd_disaggregated[accept]

df_cd_disaggregated.sample(1).T
31903
index_mdic -1
index_cd_bol CD-8178
vol 460.916
vol_mdic 0.0
matching_stage N/A
state.trase_id BR-XX
success N/A
message UNKNOWN
via 01
hs4 0206
hs5 02062
hs6 020629
hs8 02062990
exporter.cnpj 02916265018530
exporter.label JBS S/A
port_of_export.name SANTOS
port_of_export.group SANTOS
exporter.type CNPJ
exporter.municipality.trase_id BR-5106752
importer.label ORIENTAL PARTNERS LTD
country_of_destination.name CHINA (HONG KONG)
country_of_destination.trase_id HK
country_of_destination.group CHINA
matched FALSE
fob 1176.5987236888966
year 2018
exporter_geocode 5106752
state_of_production UNKNOWN STATE
datasource CD Disaggregated
print("Total volume")
print(format_float(df_cd_disaggregated["vol"].sum(), 2))

print("How much of the CD was matched")
print_report_by_attribute(df_cd_disaggregated, "vol", ["matched", "matching_stage"])

print("How much of the MDIC was solved")
print_report_by_attribute(df_cd_disaggregated, "vol", ["success", "message"])

print("How much of the dataset comes from mdic")
print_report_by_attribute(
    df_cd_disaggregated.assign(is_mdic=(df_cd_disaggregated["index_mdic"] != "-1")),
    "vol",
    ["is_mdic"],
)

print("How much state of production & exporter is known")
df_cd_disaggregated["state_known"] = df_cd_disaggregated["state.trase_id"] != "BR-XX"
df_cd_disaggregated["exporter_known"] = ~df_cd_disaggregated[
    "exporter.label"
].str.contains("UNKNOWN")
df_cd_disaggregated["exporter_geocode_known"] = (
    df_cd_disaggregated["exporter.municipality.trase_id"] != "BR-XXXXXXX"
)
print_report_by_attribute(
    df_cd_disaggregated,
    "vol",
    ["state_known", "exporter_known", "exporter_geocode_known"],
)
Total volume
750,000,000
How much of the CD was matched
                                                                                   sum percentage
matched matching_stage                                                                           
TRUE    1 - COUNTRY, EXPORTER MUNICIPALITY, HS8, PORT, MONTH               380,000,000        51%
FALSE   N/A                                                                190,000,000        26%
        PADDING                                                            130,000,000        17%
TRUE    4 - COUNTRY, EXPORTER MUNICIPALITY, HS8, MONTH                      35,000,000         5%
        2 - COUNTRY, EXPORTER MUNICIPALITY, HS6, PORT, WITHIN THREE MONTHS   6,400,000         1%
        3 - COUNTRY, EXPORTER MUNICIPALITY, HS4, PORT                        3,200,000         0%
How much of the MDIC was solved
                                                                                                                                                                                                                                                                                                              sum percentage
success message                                                                                                                                                                                                                                                                                                             
TRUE    SOLVED WITH {'SOLVER': 'MOSEK'}                                                                                                                                                                                                                                                               340,000,000        45%
N/A     UNKNOWN                                                                                                                                                                                                                                                                                       330,000,000        43%
TRUE    SOLVED WITH {'SOLVER': 'ECOS', 'MAX_ITERS': 500}{'SOLVER': 'MOSEK'} FAILED - SOLVER 'MOSEK' FAILED. TRY ANOTHER SOLVER, OR SOLVE WITH VERBOSE=TRUE FOR MORE INFORMATION. - NONE{} FAILED - SOLVER 'MOSEK' FAILED. TRY ANOTHER SOLVER, OR SOLVE WITH VERBOSE=TRUE FOR MORE INFORMATION. - NONE  75,000,000        10%
        PRESOLVED                                                                                                                                                                                                                                                                                      14,000,000         2%
FALSE   PROBLEM UNDER-DETERMINED DUE TO LACK OF MARITIME TRADE                                                                                                                                                                                                                                             52,000         0%
How much of the dataset comes from mdic
                sum percentage
is_mdic                       
True    430,000,000        57%
False   330,000,000        43%
How much state of production & exporter is known
                                                          sum percentage
state_known exporter_known exporter_geocode_known                       
True        True           True                   430,000,000        57%
False       False          False                  170,000,000        23%
            True           True                   150,000,000        19%
                           False                    4,500,000         1%
True        False          False                       52,000         0%
False       False          True                        28,000         0%

CD (Disaggregated), New

df_cd_disaggregated_new = (
    get_pandas_df_once(
        "brazil/beef/trade/cd/disaggregated/CD_DISAGGREGATED_BEEF_2018_NEW.csv",
        dtype=str,
        na_filter=False,
    )
    .astype({"vol": float})
    .assign(datasource="CD Disaggregated New")
)

df_cd_disaggregated_new.sample(1).T
4659
year 2018
hs6 020230
hs4 0202
exporter_name JBS S/A
exporter_cnpj 02916265020004
state_of_production MATO GROSSO DO SUL
exporter_geocode 5220454
country CHINA (HONG KONG)
importer_name MASTER ELITE INTERNATIONAL HOLDINGS LTD
port SANTOS
cnpj8 02916265
parent_cnpj8 13171927
exporter_state GOIAS
cnpj_is_valid valid
vol 873.955097
fob 3626.477261919983
cwe 1277.4822241259785
datasource CD Disaggregated New
print("Total volume")
print(format_float(df_cd_disaggregated_new["vol"].sum(), 2))

print("How much state of production & exporter is known")
df_cd_disaggregated_new["state_known"] = (
    df_cd_disaggregated_new["state_of_production"] != "UNKNOWN STATE"
)
df_cd_disaggregated_new["exporter_known"] = ~df_cd_disaggregated_new[
    "exporter_name"
].str.contains("UNKNOWN")
df_cd_disaggregated_new["exporter_geocode_known"] = (
    df_cd_disaggregated_new["exporter_geocode"] != "XXXXXXX"
)
print_report_by_attribute(
    df_cd_disaggregated_new,
    "vol",
    ["state_known", "exporter_known", "exporter_geocode_known"],
)
Total volume
2,300,000,000
How much state of production & exporter is known
                                                          sum percentage
state_known exporter_known exporter_geocode_known                       
False       True           True                   940,000,000        41%
True        True           True                   890,000,000        39%
False       False          False                  290,000,000        13%
True        False          False                   93,000,000         4%
False       True           False                   51,000,000         2%
            False          True                     5,600,000         0%
True        False          True                     2,700,000         0%

Comparison of total volumes per HS code

import plotly.express as px

data = pd.concat(
    [
        consolidate(df, ["vol"], ["datasource", "hs4"])
        for df in [df_mdic, df_bol, df_cd_disaggregated, df_cd_disaggregated_new]
    ]
)

fig = px.bar(
    data,
    x="hs4",
    y="vol",
    color="datasource",
    barmode="group",
    title="Comparison of total volume per HS4 (whole year)",
)
fig.show("png")

png

import plotly.express as px
import pandas as pd

data = pd.concat(
    [
        consolidate(
            df[df["month"] <= cd_max_month],
            ["vol"],
            ["datasource", "hs4"],
        )
        for df in [df_mdic, df_bol, df_cd, df_dashboard]
    ]
)

fig = px.bar(
    data,
    x="hs4",
    y="vol",
    color="datasource",
    barmode="group",
    title="Comparison of total volume per HS4 (first half of year)",
)
fig.show("png")

png

When looking at the first half of the year, CD has far far more live cattle (0102) than MDIC, and less 0202 (frozen meat)

How well does CD match with MDIC?

import pandas as pd

# CD
df_cd = (
    get_pandas_df_once(
        "brazil/trade/cd/concat/BRAZIL_CD_CONCAT_BEEF_2018_CLEANED.csv",
        version_id="43UAhIXtUNwaU83tXzz9sBf6lJ4ew9Ut",
        dtype=str,
        na_filter=False,
    )
    .astype({"vol": float, "month": int})
    .assign(datasource="CD")
)
df_cd = df_cd[df_cd["hs4"].isin(BEEF_HS4)]
df_cd["month"] = df_cd["date"].apply(lambda l: months[l.split(" ")[1]])
df_cd["port.name"] = df_cd["port_of_export.name"]
cd_max_month = df_cd["month"].max()


# MDIC (Port)
df_mdic = (
    concat(
        [
            get_pandas_df_once(
                "brazil/trade/mdic/port/brazil_mdic_port_2018_redownload.csv",
                version_id="bpT1NuPBqrwjHbkIIZKml5UB.eCGsGdl",
                dtype=str,
                na_filter=False,
            )
        ]
    )
    .astype({"vol": float, "month": int})
    .assign(datasource="MDIC")
)
df_mdic = df_mdic[df_mdic["hs4"].isin(BEEF_HS4)]
df_mdic = df_mdic[df_mdic["month"] <= cd_max_month]
# df_mdic = pd.merge(df_mdic, df_cd["hs6"].drop_duplicates())
df_mdic = df_mdic[df_mdic["via"] == "01"]

# MDIC (Municipality)
df_mdic_mun = (
    concat(
        [
            get_pandas_df_once(
                "brazil/trade/mdic/municipality/brazil_mdic_municipality_2018_redownload.csv",
                version_id="fyRyzt9XiiQITxiivNNeuf8MPiKMrFUT",
                dtype=str,
                na_filter=False,
            )
        ]
    )
    .astype({"vol": float, "month": int})
    .assign(datasource="MDIC Municipality")
)
df_mdic_mun = df_mdic[df_mdic["hs4"].isin(BEEF_HS4)]
df_mdic_mun = df_mdic_mun[df_mdic_mun["month"] <= cd_max_month]
with pandas_display_context(significant_digits=3):
    df = compare_dataframes_single(
        df_cd,
        df_mdic,
        "vol",
        # set(df_cd) & set(df_mdic) - {"vol", "datasource"},
        ["hs4"],
        suffixes=("_cd", "_mdic"),
    )
    df["comparison"] *= 100
    display(df)
total_cd total_mdic comparison
hs4
0202 420,000,000 543,000,000 29
0206 69,000,000 115,000,000 66
0201 83,100,000 40,700,000 -104
1602 36,800,000 116,000,000 214
0102 392,000,000 121,000,000 -223
0210 2,660,000 46,100,000 1,630
print_report_by_attribute(df_mdic, "vol", ["via"])
            sum percentage
via                       
01  980,000,000       100%
df_cd["vol"].sum() / df_mdic["vol"].sum()
1.0227705738982642

Input CD Data

df_cd1 = get_pandas_df_once(
    "brazil/trade/cd/datamyne/2018/BRAZIL_BEEF_DATAMYNE_CD_2018.csv",
    dtype=str,
    na_filter=False,
)
df_cd1.sample(1).T
3877
num_dde 21801096988
registration_date 29 Mar 2018
disembarkation_date 02 Abr 2018
exporter.cnpj 67620377000467
exporter.label MINERVA S.A.
hs8 02023000
country_of_import.label CHILE
port_of_export.label DIONISIO CERQUEIRA
port_of_export.state.uf SC
importer.label PROCARNE COMERCIAL S.A.
fob 45910.2
vol 23013.2
num_re 180508474001
day 2
month 4
year 2018
hs6 020230
hs4 0202
via 07
via.name ROAD
port_of_export.name DIONISIO CERQUEIRA
country_of_import.name CHILE
country_of_import.trase_id CL
exporter.cnpj.valid True
exporter.municipality.trase_id BR-5215702
exporter.municipality.name PALMEIRAS DE GOIAS
df_cd2 = get_pandas_df_once(
    "brazil/trade/cd/dashboard/out/CD_BRAZIL_2018.csv",
    dtype=str,
    na_filter=False,
)
df_cd2.sample(1).T
200080
Date..Month. 2018-02-01
Exporter.CNJP 59104422001806
Exporter.Name VOLKSWAGEN DO BRASIL INDUSTRIA DE VEICULOS AUT...
Product.HS 73181900
Country.of.Destination BOLIVIA
Port.of.Departure NAO DECLARADO
FOB.Value..US.. 80
Net.Weight 2
Year 2018