Diff Path
View or edit on GitHub
This page is synchronized from trase/models/Diff - Path.ipynb. Last modified on 2025-12-10 18:30 CET by Trase Admin.
Please view or edit the original file there; changes should be reflected here after a midnight build (CET time),
or manually triggering it with a GitHub action (link).
import sys
from io import BytesIO
from pathlib import PurePosixPath
import pandas as pd
from boto3 import client
from trase.tools.aws import get_pandas_df
from trase.tools.jupyter.diff import print_difference, read_csv
df_b = read_csv("Ecuador/Shrimp/2019/POST/results.csv")
df_a = get_pandas_df("ecuador/shrimp/sei_pcs/v1.0.0/SEIPCS_ECUADOR_SHRIMP_2019.csv")
# df_b = get_pandas_df("ecuador/shrimp/sei_pcs/SEIPCS_ECUADOR_SHRIMP_2019.csv")
# assert 30616 not in list(df_b.HS6.unique())
print((32000 / df_b.VOLUME_RAW.sum()) * 100)
print_difference(
df_a,
df_b,
)
from trase.tools.aws import get_pandas_df
from trase.diff import print_difference, read_csv
df_a = get_pandas_df("brazil/coffee/sei_pcs/qa_ed/SEIPCS_BRAZIL_COFFEE_2015.csv")
df_b = get_pandas_df(
"brazil/coffee/sei_pcs/qa_ed/post-processed/SEIPCS_BRAZIL_COFFEE_2015.csv"
)
print_difference(
df_a,
df_b[
[
"COUNTRY_OF_ORIGIN",
"COMODITY",
"YEAR",
"EXPORTER",
"EXPORTER_TAX_ID",
"LVL3_NAME_PROD",
"PORT_OF_EXPORT",
"COUNTRY_OF_DESTINATION",
"IMPORTER",
"VOLUME_RAW",
"VOLUME_NORM",
"FOB",
"BRANCH",
"PRODUCT",
"STATUS",
"LVL6_GEOCODE_PROD",
"LVL6_NAME_PROD",
"LVL6_GEOCODE_LH",
"LVL6_NAME_LH",
"QA_EXPORT_MUN",
]
],
)
import pandas as pd
df = pd.read_csv("Ecuador/Shrimp/2019/POST/results.csv", sep=";")
print(df.columns)
df[df.EXPORTER.str.startswith("LABORATORIO BAMAR")].BRANCH.unique()
# SODERAL SOCIEDAD DE DESTILACION DE ALCOHOLES S.A.
# SODERAL SOCIEDAD DE DESTILACION DE ALCOHOLES S.A.
from trase.tools.ingest.metadata import (
get_dataset_file_id_list,
get_dataset_file_metadata,
)
file_ids = get_dataset_file_id_list(7)
metadata = get_dataset_file_metadata(7, file_ids[0])
node_list = metadata["nodes"]
# print(sorted(node_list, key=lambda i: i['node_position']))
for node_data in node_list:
print(node_data)
df_b.columns