View or edit on GitHub
This page is synchronized from trase/models/brazil/beef/2018_beef_input_data.ipynb. Last modified on 2026-03-21 22:30 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
2018 Brazil Beef Input Data Review
from trase.tools.sps import *
import plotly.express as px
import pandas as pd
BEEF_HS6 = [
"010221", # Cattle; live, pure-bred breeding animals
"010229", # Cattle; live, other than pure-bred breeding animals
"010290", # Bovine animals; live, other than cattle and buffalo
"020120", # Meat; of bovine animals, cuts with bone in, fresh or chilled
"020130", # Meat; of bovine animals, boneless cuts, fresh or chilled
"020220", # Meat; of bovine animals, cuts with bone in, frozen
"020230", # Meat; of bovine animals, boneless cuts, frozen
"020610", # Offal, edible; of bovine animals, fresh or chilled
"020621", # Offal, edible; of bovine animals, tongues, frozen
"020622", # Offal, edible; of bovine animals, livers, frozen
"020629", # Offal, edible; of bovine animals, (other than tongues & livers), frozen
"021020", # Meat; salted, in brine, dried or smoked, of bovine animals
"160250", # Meat preparations; of bovine animals, meat or offal, prepared/preserved
]
BEEF_HS4 = [h[0:4] for h in BEEF_HS6]
MDIC (Port)
df_mdic = (
concat(
[
get_pandas_df_once(
"brazil/trade/mdic/port/brazil_mdic_port_2018.csv",
dtype=str,
na_filter=False,
)
]
)
.astype({"vol": float, "month": int})
.assign(datasource="MDIC")
)
df_mdic = df_mdic[df_mdic["hs6"].isin(BEEF_HS6)]
df_mdic.sample(1).T
Total volume
print("MDIC Volume", format_float(df_mdic["vol"].sum(), 2))
MDIC Volume 1,800,000,000
Breakdown by HS4
print_report_by_attribute(df_mdic, "vol", "hs6")
sum percentage
hs6
020230 1,100,000,000 62%
010229 200,000,000 11%
020130 190,000,000 10%
020629 130,000,000 7%
160250 100,000,000 6%
010290 30,000,000 2%
020220 16,000,000 1%
020621 13,000,000 1%
020120 3,800,000 0%
021020 2,800,000 0%
010221 1,500,000 0%
020622 850,000 0%
020610 310,000 0%
Brazil BoL
df_bol = (
get_pandas_df_once(
"brazil/trade/bol/2018/BRAZIL_BOL_2018.csv", dtype=str, na_filter=False
)
.astype({"vol": float, "month": int})
.assign(datasource="BOL")
)
df_bol = df_bol[df_bol["hs4"].isin(BEEF_HS4)]
df_bol["hs6_known"] = ~df_bol["hs6"].str.contains("X")
df_bol["exporter_municipality_known"] = ~df_bol[
"exporter.municipality.trase_id"
].str.contains("X")
df_bol["exporter_cnpj_known"] = df_bol["exporter.cnpj"] != "UNKNOWN"
df_bol.sample(1).T
| 142374 | |
|---|---|
| date | 2018-01-03 |
| exporter.label | BRF |
| country_of_destination.label | NETHERLANDS |
| vol | 23152.5 |
| hs_description | 0210: MEAT AND EDIBLE MEAT OFFAL, SALTED, IN B... |
| exporter.cnpj | 01838723016988 |
| exporter.activity_portuguese | COMERCIO ATACADISTA DE CARNES BOVINAS E SUINAS... |
| importer.label | BRF GLOBAL |
| vessel.label | MAERSK LINS |
| voyage_number | 1802 |
| hs | 0210 |
| port_of_origin.label | ITAJAI |
| port_of_arrival.label | ROTTERDAM |
| port_of_import.label | ALGECIRAS |
| world_region | EUROPA |
| place_of_loading | APM TERMINALS |
| source | DATAMYNE |
| hs4 | 0210 |
| hs6 | XXXX |
| hs8 | XXXX |
| carrier_datamar.label | UNKNOWN |
| description | UNKNOWN |
| port_of_discharge | UNKNOWN |
| importer.type | UNKNOWN |
| port_of_maritime_origin.country | UNKNOWN |
| port_of_maritime_destination | UNKNOWN |
| port_of_maritime_origin | UNKNOWN |
| carrier.label | UNKNOWN |
| port_of_export.country | UNKNOWN |
| exporter.type | cnpj |
| port_of_maritime_destination.country | UNKNOWN |
| port_of_receipt.country | UNKNOWN |
| port_of_destination | UNKNOWN |
| port_of_receipt | UNKNOWN |
| vessel.type | UNKNOWN |
| port_of_discharge.country | UNKNOWN |
| datamar_id | UNKNOWN |
| port_of_export.name | ITAJAI |
| country_of_destination.name | NETHERLANDS |
| country_of_destination.trase_id | NL |
| exporter.state.trase_id | BR-42 |
| exporter.municipality.trase_id | BR-4208203 |
| exporter.state.name | SANTA CATARINA |
| exporter.municipality.name | ITAJAI |
| hs5 | XXXX |
| month | 1 |
| datasource | BOL |
| hs6_known | False |
| exporter_municipality_known | True |
| exporter_cnpj_known | True |
There is about 2 million tons in BoL when we filter by HS4 (maybe also includes pork & chicken??)
print("BOL Volume", format_float(df_bol["vol"].sum(), 2))
BOL Volume 2,100,000,000
Most data comes from datamyne
print_report_by_attribute(df_bol, "vol", ["source"])
sum percentage
source
DATAMYNE 1,700,000,000 80%
DATALINER 430,000,000 20%
About 77% of data has unknown hs6 - all from datamyne
print_report_by_attribute(df_bol, "vol", ["source", "hs6_known"])
sum percentage
source hs6_known
DATAMYNE False 1,600,000,000 77%
DATALINER True 430,000,000 20%
DATAMYNE True 62,000,000 3%
The municipality is known for Datamyne, and unknown for Dataliner (==live cattle)
print_report_by_attribute(df_bol, "vol", ["source", "exporter_municipality_known"])
sum percentage
source exporter_municipality_known
DATAMYNE True 1,700,000,000 80%
DATALINER True 240,000,000 11%
False 190,000,000 9%
DATAMYNE False 38,000 0%
print_report_by_attribute(df_bol, "vol", ["source", "hs6_known"])
sum percentage
source hs6_known
DATAMYNE False 1,600,000,000 77%
DATALINER True 430,000,000 20%
DATAMYNE True 62,000,000 3%
CD (Concat)
months = {
"Mai": 5,
"Abr": 4,
"Mar": 3,
"Fev": 2, # ???
"Jan": 1,
"Jun": 6,
"Jul": 7,
"Feb": 2,
}
df_cd = (
get_pandas_df_once(
"brazil/trade/cd/concat/BRAZIL_CD_CONCAT_BEEF_2018_CLEANED.csv",
version_id="43UAhIXtUNwaU83tXzz9sBf6lJ4ew9Ut",
dtype=str,
na_filter=False,
)
.astype({"vol": float, "month": int})
.assign(datasource="CD_CONCAT")
)
df_cd = df_cd[df_cd["hs4"].isin(BEEF_HS4)]
df_cd["port.name"] = df_cd["port_of_export.name"]
df_cd["month"] = df_cd["date"].apply(lambda l: months[l.split(" ")[1]])
cd_max_month = df_cd["month"].max()
df_cd.sample(1).T
Failed to fetch ETag for s3://trase-storage/brazil/trade/cd/concat/BRAZIL_CD_CONCAT_BEEF_2018_CLEANED.csv, skipping cache
Traceback (most recent call last):
File "/opt/tljh/user/lib/python3.8/trase/tools/aws/aws_helpers_cached.py", line 41, in get_pandas_df_once
etag = get_etag(client, bucket, key)
File "/opt/tljh/user/lib/python3.8/trase/tools/aws/s3_helpers.py", line 81, in get_etag
response = head_object(client, bucket, key, version_id)
File "/opt/tljh/user/lib/python3.8/trase/tools/aws/s3_helpers.py", line 86, in head_object
return client.head_object(
File "/opt/tljh/user/lib/python3.8/site-packages/botocore/client.py", line 508, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/opt/tljh/user/lib/python3.8/site-packages/botocore/client.py", line 915, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (404) when calling the HeadObject operation: Not Found
| 28446 | |
|---|---|
| date | 20 Mar 2018 |
| hs6 | 020629 |
| VMLE.DOLAR.BAL.EXP | 1297.5 |
| vol | 838.794 |
| exporter.cnpj | 02916265008225 |
| NOME.IMPDR.EXPDR | JBS S/A |
| country_of_destination.label | HONG KONG |
| port_of_export.label | ALF-PORTO DE SANTOS |
| NOME.IMPORTADOR.ESTRANGEIRO | MASTER ELITE INTERNATIONAL HOLDINGS LTD |
| NUM.DDE | 21855051494 |
| NUM.RE | 180394882005 |
| hs4 | 0206 |
| month | 3 |
| exporter.municipality.trase_id | BR-1100189 |
| port_of_export.name | SANTOS |
| country_of_destination.name | CHINA (HONG KONG) |
| datasource | CD_CONCAT |
| port.name | SANTOS |
Total volume is 1 million - correctly half
print_report_by_attribute(df_cd, "vol", "hs4")
print("CD Volume", format_float(df_cd["vol"].sum(), 2))
sum percentage
hs4
0202 420,000,000 42%
0102 390,000,000 39%
0201 83,000,000 8%
0206 69,000,000 7%
1602 37,000,000 4%
0210 2,700,000 0%
CD Volume 1,000,000,000
HS6 is complete (no unknowns)
print_report_by_attribute(df_cd, "vol", "hs6")
sum percentage
hs6
020230 400,000,000 40%
010229 380,000,000 38%
020130 74,000,000 7%
020629 55,000,000 5%
160250 37,000,000 4%
020220 18,000,000 2%
020621 13,000,000 1%
020120 9,200,000 1%
010290 6,500,000 1%
010221 4,900,000 0%
021020 2,700,000 0%
020610 570,000 0%
020622 310,000 0%
We have about 40% 0102 (live cattle) which is much more than MDIC's 13%
print_report_by_attribute(df_cd, "vol", "hs4")
sum percentage
hs4
0202 420,000,000 42%
0102 390,000,000 39%
0201 83,000,000 8%
0206 69,000,000 7%
1602 37,000,000 4%
0210 2,700,000 0%
Customs declarations have 26% unknowns in the port
print_report_by_attribute(df_cd, "vol", "port_of_export.name")
sum percentage
port_of_export.name
SANTOS 350,000,000 35%
UNKNOWN PORT BRAZIL 260,000,000 26%
RIO GRANDE 87,000,000 9%
SAO FRANCISCO DO SUL 69,000,000 7%
PARANAGUA 68,000,000 7%
SAO SEBASTIAO 51,000,000 5%
DIONISIO CERQUEIRA 40,000,000 4%
ITAJAI 19,000,000 2%
BARCARENA 12,000,000 1%
IMBITUBA 9,400,000 1%
SAO BORJA 9,100,000 1%
BELEM 8,000,000 1%
RIO DE JANEIRO 7,900,000 1%
PONTA PORA 4,000,000 0%
JAGUARAO 3,500,000 0%
FOZ DO IGUACU 2,100,000 0%
GUAIRA 1,100,000 0%
SALVADOR 880,000 0%
SAO PAULO 530,000 0%
GUAJARA-MIRIM 410,000 0%
SANTANA DO LIVRAMENTO 370,000 0%
URUGUAIANA 290,000 0%
CHUI 230,000 0%
CORUMBA 190,000 0%
FORTALEZA 74,000 0%
ASSIS BRASIL 72,000 0%
AEROPORTO INTERNACIONAL DE VIRACOPOS 68,000 0%
PECEM 36,000 0%
print_report_by_attribute(df_cd, "vol", ["NOME.IMPORTADOR.ESTRANGEIRO"])
CD (Dashboard)
df_dashboard = (
get_pandas_df_once(
"brazil/trade/cd/dashboard/out/CD_DASHBOARD_BRAZIL_2018.csv",
dtype=str,
na_filter=False,
)
.astype({"volume": float, "month": int})
.assign(datasource="CD_DASHBOARD")
.rename(columns={"volume": "vol"})
)
df_dashboard = df_dashboard[df_dashboard["hs6"].isin(BEEF_HS6)]
df_dashboard.sample(1).T
| 544395 | |
|---|---|
| date | 2018-02-01 |
| hs8 | 02062990 |
| hs_description | OFFAL OF BOVINE ANIMALS, EDIBLE, OTHER, FROZEN |
| country_of_destination.label | HONG KONG |
| fob | 65449 |
| exporter.label | FRIGOSUL - FRIGORIFICO SUL LTDA |
| exporter.municipality.label | APARECIDA DO TABOADO |
| vol | 18430.0 |
| exporter.cnpj | 02591772000170 |
| exporter.zip | 79570-000 |
| via | 01 |
| via.name | MARITIME |
| year | 2018 |
| month | 2 |
| hs6 | 020629 |
| hs4 | 0206 |
| exporter.cnpj.valid | valid_cnpj |
| port_of_export.name | PARANAGUA |
| state.name | PARANA |
| state.trase_id | BR-41 |
| check | 49523 |
| exporter.municipality.name | APARECIDA DO TABOADO |
| exporter.municipality.trase_id | BR-5001003 |
| exporter.municipality.state.trase_id | BR-50 |
| country_of_destination.name | CHINA (HONG KONG) |
| country_of_destination.trase_id | HK |
| datasource | CD_DASHBOARD |
print("Total volume", format_float(df_dashboard["vol"].sum(), 2))
Total volume 580,000,000
CD (Third Party)
df_cd_thirdparty = (
get_pandas_df_once(
"brazil/trade/cd/datamyne/2018/BRAZIL_BEEF_DATAMYNE_CD_2018.csv",
dtype=str,
na_filter=False,
)
.assign(datasource="CD_THIRD_PARTY")
.astype({"vol": float})
)
df_cd_thirdparty.sample(1).T
| 3440 | |
|---|---|
| num_dde | 21800300662 |
| registration_date | 26 Jan 2018 |
| disembarkation_date | 27 Jan 2018 |
| exporter.cnpj | 67620377000467 |
| exporter.label | MINERVA S.A. |
| hs8 | 02013000 |
| country_of_import.label | CHILE |
| port_of_export.label | DIONISIO CERQUEIRA |
| port_of_export.state.uf | SC |
| importer.label | FRIGORIFICO DE OSORNO S.A. |
| fob | 97542.89 |
| vol | 21505.02 |
| num_re | 180121365001 |
| day | 27 |
| month | 1 |
| year | 2018 |
| hs6 | 020130 |
| hs4 | 0201 |
| via | 07 |
| via.name | ROAD |
| port_of_export.name | DIONISIO CERQUEIRA |
| country_of_import.name | CHILE |
| country_of_import.trase_id | CL |
| exporter.cnpj.valid | True |
| exporter.municipality.trase_id | BR-5215702 |
| exporter.municipality.name | PALMEIRAS DE GOIAS |
| datasource | CD_THIRD_PARTY |
CD (Disaggregated)
df_cd_disaggregated = (
get_pandas_df_once(
"brazil/beef/trade/cd/disaggregated/CD_DISAGGREGATED_BEEF_2018.csv",
dtype=str,
na_filter=False,
)
.astype({"vol": float})
.assign(datasource="CD Disaggregated")
)
df_cd_disaggregated.sample(1).T
LIVE_BOVINE_ANIMALS_HS4 = "0102"
# filter to rows for beef
hs6_is_unknown = df_cd_disaggregated["hs6"] == "XXXXXX"
accept = df_cd_disaggregated["hs6"].isin(BEEF_HS6)
hs4_is_live_cattle = df_cd_disaggregated["hs4"] == LIVE_BOVINE_ANIMALS_HS4
accept |= hs6_is_unknown & hs4_is_live_cattle
df_cd_disaggregated = df_cd_disaggregated[accept]
df_cd_disaggregated.sample(1).T
| 31903 | |
|---|---|
| index_mdic | -1 |
| index_cd_bol | CD-8178 |
| vol | 460.916 |
| vol_mdic | 0.0 |
| matching_stage | N/A |
| state.trase_id | BR-XX |
| success | N/A |
| message | UNKNOWN |
| via | 01 |
| hs4 | 0206 |
| hs5 | 02062 |
| hs6 | 020629 |
| hs8 | 02062990 |
| exporter.cnpj | 02916265018530 |
| exporter.label | JBS S/A |
| port_of_export.name | SANTOS |
| port_of_export.group | SANTOS |
| exporter.type | CNPJ |
| exporter.municipality.trase_id | BR-5106752 |
| importer.label | ORIENTAL PARTNERS LTD |
| country_of_destination.name | CHINA (HONG KONG) |
| country_of_destination.trase_id | HK |
| country_of_destination.group | CHINA |
| matched | FALSE |
| fob | 1176.5987236888966 |
| year | 2018 |
| exporter_geocode | 5106752 |
| state_of_production | UNKNOWN STATE |
| datasource | CD Disaggregated |
print("Total volume")
print(format_float(df_cd_disaggregated["vol"].sum(), 2))
print("How much of the CD was matched")
print_report_by_attribute(df_cd_disaggregated, "vol", ["matched", "matching_stage"])
print("How much of the MDIC was solved")
print_report_by_attribute(df_cd_disaggregated, "vol", ["success", "message"])
print("How much of the dataset comes from mdic")
print_report_by_attribute(
df_cd_disaggregated.assign(is_mdic=(df_cd_disaggregated["index_mdic"] != "-1")),
"vol",
["is_mdic"],
)
print("How much state of production & exporter is known")
df_cd_disaggregated["state_known"] = df_cd_disaggregated["state.trase_id"] != "BR-XX"
df_cd_disaggregated["exporter_known"] = ~df_cd_disaggregated[
"exporter.label"
].str.contains("UNKNOWN")
df_cd_disaggregated["exporter_geocode_known"] = (
df_cd_disaggregated["exporter.municipality.trase_id"] != "BR-XXXXXXX"
)
print_report_by_attribute(
df_cd_disaggregated,
"vol",
["state_known", "exporter_known", "exporter_geocode_known"],
)
Total volume
750,000,000
How much of the CD was matched
sum percentage
matched matching_stage
TRUE 1 - COUNTRY, EXPORTER MUNICIPALITY, HS8, PORT, MONTH 380,000,000 51%
FALSE N/A 190,000,000 26%
PADDING 130,000,000 17%
TRUE 4 - COUNTRY, EXPORTER MUNICIPALITY, HS8, MONTH 35,000,000 5%
2 - COUNTRY, EXPORTER MUNICIPALITY, HS6, PORT, WITHIN THREE MONTHS 6,400,000 1%
3 - COUNTRY, EXPORTER MUNICIPALITY, HS4, PORT 3,200,000 0%
How much of the MDIC was solved
sum percentage
success message
TRUE SOLVED WITH {'SOLVER': 'MOSEK'} 340,000,000 45%
N/A UNKNOWN 330,000,000 43%
TRUE SOLVED WITH {'SOLVER': 'ECOS', 'MAX_ITERS': 500}{'SOLVER': 'MOSEK'} FAILED - SOLVER 'MOSEK' FAILED. TRY ANOTHER SOLVER, OR SOLVE WITH VERBOSE=TRUE FOR MORE INFORMATION. - NONE{} FAILED - SOLVER 'MOSEK' FAILED. TRY ANOTHER SOLVER, OR SOLVE WITH VERBOSE=TRUE FOR MORE INFORMATION. - NONE 75,000,000 10%
PRESOLVED 14,000,000 2%
FALSE PROBLEM UNDER-DETERMINED DUE TO LACK OF MARITIME TRADE 52,000 0%
How much of the dataset comes from mdic
sum percentage
is_mdic
True 430,000,000 57%
False 330,000,000 43%
How much state of production & exporter is known
sum percentage
state_known exporter_known exporter_geocode_known
True True True 430,000,000 57%
False False False 170,000,000 23%
True True 150,000,000 19%
False 4,500,000 1%
True False False 52,000 0%
False False True 28,000 0%
CD (Disaggregated), New
df_cd_disaggregated_new = (
get_pandas_df_once(
"brazil/beef/trade/cd/disaggregated/CD_DISAGGREGATED_BEEF_2018_NEW.csv",
dtype=str,
na_filter=False,
)
.astype({"vol": float})
.assign(datasource="CD Disaggregated New")
)
df_cd_disaggregated_new.sample(1).T
| 4659 | |
|---|---|
| year | 2018 |
| hs6 | 020230 |
| hs4 | 0202 |
| exporter_name | JBS S/A |
| exporter_cnpj | 02916265020004 |
| state_of_production | MATO GROSSO DO SUL |
| exporter_geocode | 5220454 |
| country | CHINA (HONG KONG) |
| importer_name | MASTER ELITE INTERNATIONAL HOLDINGS LTD |
| port | SANTOS |
| cnpj8 | 02916265 |
| parent_cnpj8 | 13171927 |
| exporter_state | GOIAS |
| cnpj_is_valid | valid |
| vol | 873.955097 |
| fob | 3626.477261919983 |
| cwe | 1277.4822241259785 |
| datasource | CD Disaggregated New |
print("Total volume")
print(format_float(df_cd_disaggregated_new["vol"].sum(), 2))
print("How much state of production & exporter is known")
df_cd_disaggregated_new["state_known"] = (
df_cd_disaggregated_new["state_of_production"] != "UNKNOWN STATE"
)
df_cd_disaggregated_new["exporter_known"] = ~df_cd_disaggregated_new[
"exporter_name"
].str.contains("UNKNOWN")
df_cd_disaggregated_new["exporter_geocode_known"] = (
df_cd_disaggregated_new["exporter_geocode"] != "XXXXXXX"
)
print_report_by_attribute(
df_cd_disaggregated_new,
"vol",
["state_known", "exporter_known", "exporter_geocode_known"],
)
Total volume
2,300,000,000
How much state of production & exporter is known
sum percentage
state_known exporter_known exporter_geocode_known
False True True 940,000,000 41%
True True True 890,000,000 39%
False False False 290,000,000 13%
True False False 93,000,000 4%
False True False 51,000,000 2%
False True 5,600,000 0%
True False True 2,700,000 0%
Comparison of total volumes per HS code
import plotly.express as px
data = pd.concat(
[
consolidate(df, ["vol"], ["datasource", "hs4"])
for df in [df_mdic, df_bol, df_cd_disaggregated, df_cd_disaggregated_new]
]
)
fig = px.bar(
data,
x="hs4",
y="vol",
color="datasource",
barmode="group",
title="Comparison of total volume per HS4 (whole year)",
)
fig.show("png")

import plotly.express as px
import pandas as pd
data = pd.concat(
[
consolidate(
df[df["month"] <= cd_max_month],
["vol"],
["datasource", "hs4"],
)
for df in [df_mdic, df_bol, df_cd, df_dashboard]
]
)
fig = px.bar(
data,
x="hs4",
y="vol",
color="datasource",
barmode="group",
title="Comparison of total volume per HS4 (first half of year)",
)
fig.show("png")

When looking at the first half of the year, CD has far far more live cattle (0102) than MDIC, and less 0202 (frozen meat)
How well does CD match with MDIC?
import pandas as pd
# CD
df_cd = (
get_pandas_df_once(
"brazil/trade/cd/concat/BRAZIL_CD_CONCAT_BEEF_2018_CLEANED.csv",
version_id="43UAhIXtUNwaU83tXzz9sBf6lJ4ew9Ut",
dtype=str,
na_filter=False,
)
.astype({"vol": float, "month": int})
.assign(datasource="CD")
)
df_cd = df_cd[df_cd["hs4"].isin(BEEF_HS4)]
df_cd["month"] = df_cd["date"].apply(lambda l: months[l.split(" ")[1]])
df_cd["port.name"] = df_cd["port_of_export.name"]
cd_max_month = df_cd["month"].max()
# MDIC (Port)
df_mdic = (
concat(
[
get_pandas_df_once(
"brazil/trade/mdic/port/brazil_mdic_port_2018_redownload.csv",
version_id="bpT1NuPBqrwjHbkIIZKml5UB.eCGsGdl",
dtype=str,
na_filter=False,
)
]
)
.astype({"vol": float, "month": int})
.assign(datasource="MDIC")
)
df_mdic = df_mdic[df_mdic["hs4"].isin(BEEF_HS4)]
df_mdic = df_mdic[df_mdic["month"] <= cd_max_month]
# df_mdic = pd.merge(df_mdic, df_cd["hs6"].drop_duplicates())
df_mdic = df_mdic[df_mdic["via"] == "01"]
# MDIC (Municipality)
df_mdic_mun = (
concat(
[
get_pandas_df_once(
"brazil/trade/mdic/municipality/brazil_mdic_municipality_2018_redownload.csv",
version_id="fyRyzt9XiiQITxiivNNeuf8MPiKMrFUT",
dtype=str,
na_filter=False,
)
]
)
.astype({"vol": float, "month": int})
.assign(datasource="MDIC Municipality")
)
df_mdic_mun = df_mdic[df_mdic["hs4"].isin(BEEF_HS4)]
df_mdic_mun = df_mdic_mun[df_mdic_mun["month"] <= cd_max_month]
with pandas_display_context(significant_digits=3):
df = compare_dataframes_single(
df_cd,
df_mdic,
"vol",
# set(df_cd) & set(df_mdic) - {"vol", "datasource"},
["hs4"],
suffixes=("_cd", "_mdic"),
)
df["comparison"] *= 100
display(df)
| total_cd | total_mdic | comparison | |
|---|---|---|---|
| hs4 | |||
| 0202 | 420,000,000 | 543,000,000 | 29 |
| 0206 | 69,000,000 | 115,000,000 | 66 |
| 0201 | 83,100,000 | 40,700,000 | -104 |
| 1602 | 36,800,000 | 116,000,000 | 214 |
| 0102 | 392,000,000 | 121,000,000 | -223 |
| 0210 | 2,660,000 | 46,100,000 | 1,630 |
print_report_by_attribute(df_mdic, "vol", ["via"])
sum percentage
via
01 980,000,000 100%
df_cd["vol"].sum() / df_mdic["vol"].sum()
1.0227705738982642
Input CD Data
df_cd1 = get_pandas_df_once(
"brazil/trade/cd/datamyne/2018/BRAZIL_BEEF_DATAMYNE_CD_2018.csv",
dtype=str,
na_filter=False,
)
df_cd1.sample(1).T
| 3877 | |
|---|---|
| num_dde | 21801096988 |
| registration_date | 29 Mar 2018 |
| disembarkation_date | 02 Abr 2018 |
| exporter.cnpj | 67620377000467 |
| exporter.label | MINERVA S.A. |
| hs8 | 02023000 |
| country_of_import.label | CHILE |
| port_of_export.label | DIONISIO CERQUEIRA |
| port_of_export.state.uf | SC |
| importer.label | PROCARNE COMERCIAL S.A. |
| fob | 45910.2 |
| vol | 23013.2 |
| num_re | 180508474001 |
| day | 2 |
| month | 4 |
| year | 2018 |
| hs6 | 020230 |
| hs4 | 0202 |
| via | 07 |
| via.name | ROAD |
| port_of_export.name | DIONISIO CERQUEIRA |
| country_of_import.name | CHILE |
| country_of_import.trase_id | CL |
| exporter.cnpj.valid | True |
| exporter.municipality.trase_id | BR-5215702 |
| exporter.municipality.name | PALMEIRAS DE GOIAS |
df_cd2 = get_pandas_df_once(
"brazil/trade/cd/dashboard/out/CD_BRAZIL_2018.csv",
dtype=str,
na_filter=False,
)
df_cd2.sample(1).T
| 200080 | |
|---|---|
| Date..Month. | 2018-02-01 |
| Exporter.CNJP | 59104422001806 |
| Exporter.Name | VOLKSWAGEN DO BRASIL INDUSTRIA DE VEICULOS AUT... |
| Product.HS | 73181900 |
| Country.of.Destination | BOLIVIA |
| Port.of.Departure | NAO DECLARADO |
| FOB.Value..US.. | 80 |
| Net.Weight | 2 |
| Year | 2018 |