Indonesia Kabupaten Boundaries Simplified 2023
ST_READ('s3://trase-storage/indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_simplified_2023.geojson', ignore_geometry := true)
Dbt path: trase_production.main.indonesia_kabupaten_boundaries_simplified_2023
Explore on Metabase: Full table; summary statistics
Containing yaml file link: trase/data_pipeline/models/indonesia/spatial/boundaries/auriga/out/_schema.yml
Model file link: trase/data_pipeline/models/indonesia/spatial/boundaries/auriga/out/indonesia_kabupaten_boundaries_simplified_2023.py
Calls script: trase/data/indonesia/spatial/boundaries/auriga/out/kabupaten_boundaries_simplified_2023.R
Dbt test runs & lineage: Test results ยท Lineage
Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)
Tags: mock_model, indonesia, spatial, kabupaten
indonesia_kabupaten_boundaries_simplified_2023
Description
No description
Details
| Column | Type | Description |
|---|---|---|
type |
VARCHAR |
|
prov |
VARCHAR |
|
prov_code |
DOUBLE |
|
kab |
VARCHAR |
|
kab_code |
INTEGER |
|
geom |
GEOMETRY |
Models / Seeds
source.trase_duckdb.trase-storage-raw.indonesia_spatial_boundaries_auriga_in_administrative_boundaries_trase
Sources
['trase-storage-raw', 'indonesia_spatial_boundaries_auriga_in_administrative_boundaries_trase']
## ---------------------------------------------------------
##
## Project: Trase Indonesia wood pulp context
##
## Purpose of script: Clean and simplify kabupaten boundary data from Auriga
##
## Author: Jason Jon Benedict
##
## Date Created: 2023-03-15
##
## ---------------------------------------------------------
##
## Notes:
##
##
##
##
##
## ---------------------------------------------------------
library(tidyverse)
library(readxl)
library(tidylog)
library(data.table)
library(janitor)
library(lubridate)
library(sf)
library(aws.s3)
library(fs)
library(rmapshaper)
library(testthat)
library(dlookr)
options(scipen = 6, digits = 4) # I prefer to view outputs in non-scientific notation
# credentials -------------------------------------------------------------
aws.signature::use_credentials()
bucket <- "trase-storage"
Sys.setenv("AWS_DEFAULT_REGION" = "eu-west-1")
# read data ---------------------------------------------------------------
# kabupaten (raw from Auriga)
obj <- get_object("/indonesia/spatial/BOUNDARIES/auriga/in/administrative_boundaries_trase.geojson", bucket)
kab <- read_sf(rawToChar(obj))
# clean attributes -------------------------------------------------------
kab_clean <- kab %>%
select(type = kota, prov = prov_INA, prov_code = id_prov, kab = dist_INA, kab_code = id_dist) %>%
mutate(across(where(is.character), toupper)) %>%
mutate(
prov_code = case_when(
prov == "PAPUA SELATAN" ~ 93,
prov == "PAPUA TENGAH" ~ 94,
prov == "PAPUA PEGUNUNGAN" ~ 95,
prov == "PAPUA BARAT DAYA" ~ 96,
TRUE ~ prov_code
) # note: dummy codes added since BIG has not yet assigned new province codes to these 4 new provinces in Papua
) %>%
st_zm(drop=TRUE,what="ZM")
# simplify data -----------------------------------------------------------
kab_simp <- kab_clean %>%
ms_simplify(keep = 0.1, weighting = 0.8, sys = TRUE) %>%
ms_dissolve(field = "kab_code", copy_fields = c("type", "prov", "prov_code", "kab"), sys = TRUE) %>%
st_make_valid()
# data checks --------------------------------------------------------------
kab_simp %>%
st_drop_geometry() %>%
diagnose()
provinces <- kab_simp %>%
select(prov,prov_code) %>%
st_drop_geometry() %>%
distinct() %>%
print()
kabupaten <- kab_simp %>%
select(kab,kab_code) %>%
st_drop_geometry() %>%
distinct() %>%
print()
expect_equal(length(unique(provinces$prov)),length(unique(provinces$prov_code)))
expect_equal(length(unique(kabupaten$kab)),length(unique(kabupaten$kab_code)))
# write data --------------------------------------------------------------
## write cleaned up boundaries to s3 (non simplified)
tmp <- dir_create(file_temp())
kab_clean %>%
st_as_sf() %>%
st_write(path(tmp, "kabupaten_boundaries_2023.geojson"))
aws.s3::put_object(
file = path(tmp, "kabupaten_boundaries_2023.geojson"),
object = "indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_2023.geojson",
bucket = "trase-storage"
)
## write cleaned up boundaries to s3 (non simplified)
tmp <- dir_create(file_temp())
kab_simp %>%
st_as_sf() %>%
st_write(path(tmp, "kabupaten_boundaries_simplified_2023.geojson"))
aws.s3::put_object(
file = path(tmp, "kabupaten_boundaries_simplified_2023.geojson"),
object = "indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_simplified_2023.geojson",
bucket = "trase-storage"
)
## additional to shapefile for GEE
kab_export <- kab_clean %>%
st_as_sf()
st_write(kab_export, path(tmp, "kabupaten_boundaries_2023.shp"), delete_dsn = TRUE)
files_to_zip <- list.files(tmp, pattern = "kabupaten_boundaries_2023", full.names = TRUE)
zipfile <- file.path(tmp, "kabupaten_boundaries_2023.zip")
zip(zipfile, files = files_to_zip, flags = "-j")
aws.s3::put_object(
file = zipfile,
object = "indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_2023.zip",
bucket = "trase-storage"
)
import pandas as pd
def model(dbt, cursor):
dbt.source(
"trase-storage-raw",
"indonesia_spatial_boundaries_auriga_in_administrative_boundaries_trase",
)
raise NotImplementedError()
return pd.DataFrame({"hello": ["world"]})