Skip to content

Indonesia Kabupaten Boundaries 2023

ST_READ('s3://trase-storage/indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_2023.geojson')

Dbt path: trase_production.main.indonesia_kabupaten_boundaries_2023

Explore on Metabase: Full table; summary statistics

Containing yaml file link: trase/data_pipeline/models/indonesia/spatial/boundaries/auriga/out/_schema.yml

Model file link: trase/data_pipeline/models/indonesia/spatial/boundaries/auriga/out/indonesia_kabupaten_boundaries_2023.py

Calls script: trase/data/indonesia/spatial/boundaries/auriga/out/kabupaten_boundaries_simplified_2023.R

Dbt test runs & lineage: Test results ยท Lineage

Full dbt_docs page: Open in dbt docs (includes lineage graph -at the bottom right-, tests, and downstream dependencies)

Tags: mock_model, indonesia, spatial, kabupaten


indonesia_kabupaten_boundaries_2023

Description

No description


Details

Column Type Description

Models / Seeds

  • source.trase_duckdb.trase-storage-raw.indonesia_spatial_boundaries_auriga_in_administrative_boundaries_trase

Sources

  • ['trase-storage-raw', 'indonesia_spatial_boundaries_auriga_in_administrative_boundaries_trase']
## ---------------------------------------------------------
##
## Project: Trase Indonesia wood pulp context
##
## Purpose of script: Clean and simplify kabupaten boundary data from Auriga
##
## Author: Jason Jon Benedict
##
## Date Created: 2023-03-15
##
## ---------------------------------------------------------
##
## Notes:
##
##
##
##
##
## ---------------------------------------------------------

library(tidyverse)
library(readxl)
library(tidylog)
library(data.table)
library(janitor)
library(lubridate)
library(sf)
library(aws.s3)
library(fs)
library(rmapshaper)
library(testthat)
library(dlookr)

options(scipen = 6, digits = 4) # I prefer to view outputs in non-scientific notation

# credentials -------------------------------------------------------------

aws.signature::use_credentials()
bucket <- "trase-storage"
Sys.setenv("AWS_DEFAULT_REGION" = "eu-west-1")

# read data ---------------------------------------------------------------

# kabupaten (raw from Auriga)
obj <- get_object("/indonesia/spatial/BOUNDARIES/auriga/in/administrative_boundaries_trase.geojson", bucket)
kab <- read_sf(rawToChar(obj))

# clean attributes  -------------------------------------------------------

kab_clean <- kab %>%
  select(type = kota, prov = prov_INA, prov_code = id_prov, kab = dist_INA, kab_code = id_dist) %>%
  mutate(across(where(is.character), toupper)) %>%
  mutate(
    prov_code = case_when(
      prov == "PAPUA SELATAN" ~ 93,
      prov == "PAPUA TENGAH" ~ 94,
      prov == "PAPUA PEGUNUNGAN" ~ 95,
      prov == "PAPUA BARAT DAYA" ~ 96,
      TRUE ~ prov_code
    ) # note: dummy codes added since BIG has not yet assigned new province codes to these 4 new provinces in Papua
  ) %>%
  st_zm(drop=TRUE,what="ZM")

# simplify data -----------------------------------------------------------

kab_simp <- kab_clean %>%
  ms_simplify(keep = 0.1, weighting = 0.8, sys = TRUE) %>%
  ms_dissolve(field = "kab_code", copy_fields = c("type", "prov", "prov_code", "kab"), sys = TRUE) %>%
  st_make_valid()

# data checks --------------------------------------------------------------

kab_simp %>%
  st_drop_geometry() %>%
  diagnose()

provinces <- kab_simp %>%
  select(prov,prov_code) %>%
  st_drop_geometry() %>%
  distinct() %>%
  print()

kabupaten <- kab_simp %>%
  select(kab,kab_code) %>%
  st_drop_geometry() %>%
  distinct() %>%
  print()

expect_equal(length(unique(provinces$prov)),length(unique(provinces$prov_code)))
expect_equal(length(unique(kabupaten$kab)),length(unique(kabupaten$kab_code)))

# write data --------------------------------------------------------------
## write cleaned up boundaries to s3 (non simplified)
tmp <- dir_create(file_temp())

kab_clean %>%
  st_as_sf() %>%
  st_write(path(tmp, "kabupaten_boundaries_2023.geojson"))

aws.s3::put_object(
  file = path(tmp, "kabupaten_boundaries_2023.geojson"),
  object = "indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_2023.geojson",
  bucket = "trase-storage"
)

## write cleaned up boundaries to s3 (non simplified)
tmp <- dir_create(file_temp())

kab_simp %>%
  st_as_sf() %>%
  st_write(path(tmp, "kabupaten_boundaries_simplified_2023.geojson"))

aws.s3::put_object(
  file = path(tmp, "kabupaten_boundaries_simplified_2023.geojson"),
  object = "indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_simplified_2023.geojson",
  bucket = "trase-storage"
)

## additional to shapefile for GEE 
kab_export <- kab_clean %>%
  st_as_sf() 

st_write(kab_export, path(tmp, "kabupaten_boundaries_2023.shp"), delete_dsn = TRUE)
files_to_zip <- list.files(tmp, pattern = "kabupaten_boundaries_2023", full.names = TRUE)
zipfile <- file.path(tmp, "kabupaten_boundaries_2023.zip")
zip(zipfile, files = files_to_zip, flags = "-j")

aws.s3::put_object(
  file = zipfile,
  object = "indonesia/spatial/BOUNDARIES/auriga/out/kabupaten_boundaries_2023.zip",
  bucket = "trase-storage"
)
import pandas as pd


def model(dbt, cursor):
    dbt.source(
        "trase-storage-raw",
        "indonesia_spatial_boundaries_auriga_in_administrative_boundaries_trase",
    )

    raise NotImplementedError()
    return pd.DataFrame({"hello": ["world"]})