Skip to content

Brazil Logistics Silo Map V2 Final

s3://trase-storage/brazil/logistics/silos/silo_map_v2/silo_map_v2_final.geojson

Dbt path: trase_production.main_brazil.brazil_logistics_silo_map_v2_final

Explore on Metabase: Full table; summary statistics

Containing yaml file link: trase/data_pipeline/models/brazil/logistics/silos/silo_map_v2/_schema.yml

Model file link: trase/data_pipeline/models/brazil/logistics/silos/silo_map_v2/brazil_logistics_silo_map_v2_final.py

Calls script: trase/data/brazil/logistics/silos/silos_map_v2/4. silos_v2_types.R

Dbt test runs & lineage: Test results ยท Lineage

Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)

Tags: mock_model


brazil_logistics_silo_map_v2_final

Description

No description


Details

Column Type Description
json JSON

Models / Seeds

  • model.trase_duckdb.brazil_logistics_silo_map_v2_post_validation_deduplication_annotation
  • model.trase_duckdb.brazil_logistics_sicarm_out_sicarm_2024
library(tidyverse)
# library(gemini.R)
# library(tictoc)
# library(future)
library(aws.s3)
# library(magick)
# library(furrr)
# library(rstac)
# library(terra)
library(sf)


###### DISCLAIMER 
## CURRENTLY TYPES ARE ASSIGNED MANUALLY THROUGH VISUAL INSPECTION 

silos_v2 <- s3read_using(FUN = read_sf,
                             object = "brazil/logistics/silos/silo_map_v2/silo_map_v2_post_validation_deduplication_annotation.geojson",
                             bucket = "trase-storage",
                             opts = c("check_region" = T))

## Data misses the capacity from sicarm, bringing it back to get it done
sicarm <- s3read_using(
  object = "brazil/logistics/sicarm/out/sicarm_2024.csv",
  FUN = read_delim, 
  delim = ';',
  bucket = 'trase-storage',
  opts = c("check_region" = T)
)  %>%
  tidyr::extract(
    geometry,
    into = c("lng", "lat"),
    regex = "^c\\((-?[0-9.]+),\\s*(-?[0-9.]+)\\)$",
    convert = TRUE
  ) %>% 
  filter(!is.na(lat)) %>%  
  st_as_sf(coords = c("lng", "lat"), 
           crs = 4326, 
           remove = FALSE) %>% st_transform(4326)




v2_ready <- merge(
  silos_v2,
  sicarm %>%
    st_drop_geometry() %>%
    select(cnpj, lng, `Capacidade (t)`, CDA),
  by = c('cnpj', 'lng'),
  all.x = TRUE
) %>%
  rename(
    capacity = `Capacidade (t)`,
    sicarm_cda = CDA
  )
s3write_using(v2_ready,
                FUN = write_sf,
                object = "brazil/logistics/silos/silo_map_v2/silo_map_v2_final.geojson",
                bucket = "trase-storage",
                opts = c("check_region" = T))
# View(v2_ready)
unique(v2_ready$type)
unique(v2_ready$local_facility)
##------------------------------------------##


#This process introduces a new field called types to indentify what type of facility
#the record is classified

#It reruns the vision process for the verified facilities but for a larger AOI and using Sentinel images

##-- Preparing the assets and credentials --##
# setAPI(Sys.getenv("MASTER_API_KEY"))

# ##-- Preparing the AOI and requesting the image --##
# dir.create('temp')
# centroid_projected <- st_transform(silos_v2, 32618)

# # Define half-width and half-height for the box in meters
# half_width <- 1000
# half_height <- 1000

# for(i in 1:nrow(centroid_projected)){
#   print(paste0('Progress: ',round(i/nrow(centroid_projected),3)*100,'%'))
#   # Get the coordinates of the transformed point
#   point_coords <- st_coordinates(centroid_projected[i,])
#   x_center <- point_coords[1]
#   y_center <- point_coords[2]

#   # Calculate the corner coordinates of the box
#   min_x <- x_center - half_width
#   max_x <- x_center + half_width
#   min_y <- y_center - half_height
#   max_y <- y_center + half_height

#   # Create a simple feature polygon for the box
#   box_coords <- matrix(
#       c(min_x, min_y,  # Bottom-left
#         max_x, min_y,  # Bottom-right
#         max_x, max_y,  # Top-right
#         min_x, max_y,  # Top-left
#         min_x, min_y), # Close the polygon
#       ncol = 2, byrow = TRUE
#     )

#   box_polygon <- st_polygon(list(box_coords))
#   box_sf <- st_sfc(box_polygon, crs = 32618)
#   aoi <- st_transform(box_sf, 4326)

#   aoi_bbox <- aoi |>
#       sf::st_transform(4326) |>
#       sf::st_bbox()

#   stac_query <- stac(
#       "https://planetarycomputer.microsoft.com/api/stac/v1"
#     )

#   stac_search_results <- stac_query |>
#       stac_search(
#         collections = "sentinel-2-l2a",
#         bbox = as.vector(aoi_bbox),
#         datetime = "2024-01-01/2024-12-31"
#       )

#   filtered_query <- stac_search_results |>
#       ext_filter(
#         `eo:cloud_cover` < 10
#       )

#   final_results <- filtered_query |>
#       post_request()

#   make_vsicurl_url <- function(base_url) {
#       paste0(
#         "/vsicurl", 
#         "?pc_url_signing=yes",
#         "&pc_collection=sentinel-2-l2a",
#         "&url=",
#         base_url
#       )
#     }

#   url <- make_vsicurl_url(rstac::assets_url(final_results, c("visual")))

#   out_file <- paste0('temp/sentinel2_visual_',i,'.tif')

#   sf::gdal_utils(
#       "warp",
#       source = url,
#       destination = out_file,
#       options = c(
#         "-t_srs", sf::st_crs(aoi)$wkt,
#         "-te", sf::st_bbox(aoi),
#         "-overwrite"
#       )
#     )
# }

# for(i in 1:nrow(centroid_projected)){
# print(i)
# aoi_sentinel<-rast(paste0('temp/tif/sentinel2_visual_',i,'.tif')) # read the raster
# png(paste0('temp/sentinel2_visual_',i,'.png'), width=6, height=5, units="in", res=1200) # save it as png
# plot(aoi_sentinel)
# dev.off()
# }
# ##------------------------------------------##

# ##-- All images saved, now we run the AI verification for facility type --##
# for(i in 1:nrow(centroid_projected)){
# print(i)
# combined_img <- image_append(c(image_read('temp/ref_image.png'), image_scale(image_read(paste0('temp/png/sentinel2_visual_',i,'.png')),1200)), stack = FALSE)
# image_write(combined_img, path = paste0('temp/combined/combined_image_',i,'.png'))
# }

# prompt_type <- "
#      Analyze the provided satellite image located in the first image from right to left. Classify the primary visible infrastructure and surrounding landscape as one of the following categories: 'agriculture' 'port' or 'none'
#      Consider the following characteristics for each category, allowing for natural variations:
#      Agriculture: As exemplified by the first, second, third and fourth image from left to right, look for large expanses of cultivated land, often characterized by geometric patterns (fields, irrigation), and potential agricultural infrastructure like storage facilities (e.g., silos, barns, square structures in fields). The color of the landscape can vary significantly (e.g., green, brown, yellow) depending on the season, crop type, or soil, and clouds may be present.
#      Port: As exemplified by the 5th and 6th image from left to right, look for a prominent body of water (ocean, sea, large lake, or major river) directly adjacent to a significant concentration of man-made structures. Key features include docks, piers, shipping containers, numerous buildings, industrial facilities, and potentially vessels (ships, boats) in the water or at anchor. The specific color of the water or land may vary, but the fundamental presence of water-side industrial/shipping activity should be evident.
#      None: If the image does not predominantly display the characteristics of either agriculture or a port, or if the features are too ambiguous to confidently classify into the other two categories.
#      Provide only the classification word ('agriculture', 'port', 'none') as your answer
# "

# # Be careful not to exceed the API's rate limits! Start with the default.
# plan(multisession)

# process_one_image <- function(image_path, model_name, prompt_text) {
#   out <- gemini_image(image = image_path, model = model_name, prompt = prompt_text)
#   validation_code <- gsub(':| ', '', str_sub(out, -4, -1))
#   return(list(full_result = out, validation_code = validation_code))
# }

# safe_process_one_image <- safely(process_one_image, otherwise = NULL)
# image_paths <- paste0(paste0('temp/combined/combined_image_',100:200,'.png'))

# tic("Parallel Processing")
# parallel_results <- future_map(image_paths, ~ safe_process_one_image(
#   image_path = .x,
#   model_name = '2.5-flash',
#   prompt_text = prompt_type
# ), .options = furrr_options(seed = TRUE)) 
# toc()

# plan(sequential)

# combined_results <- do.call(rbind, lapply(parallel_results, function(x) x$result$full_result))
import pandas as pd


def model(dbt, session):
    dbt.config(materialized="external")

    dbt.ref("brazil_logistics_silo_map_v2_post_validation_deduplication_annotation")
    dbt.ref("brazil_logistics_sicarm_out_sicarm_2024")

    raise NotImplementedError()

    return pd.DataFrame({"hello": ["world"]})