In-class Exercise 3

In this in-class exercise, I learn about analytical mapping.

Nor Aisyah https://www.linkedin.com/in/nor-aisyah/
08-30-2021

1. Installing and loading required packages

packages = c('sf', 'tmap', 'tidyverse')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

2. Import Data

mpszpop2020 <- read_rds("data/rds/mpszpop2020.rds")

3. Plot a quantile map of initial data

4. Visualise Extreme Values

4.1 Box plot

4.2 Percentile Map

4.2.1 Extract a variable from sf data frame

4.2.1.1 Exclude records with NA

mpszpop2020a <- mpszpop2020 %>%
  drop_na()

4.2.1.2 See range of DEPENDENCY (Extra: For my understanding)

range(mpszpop2020a$DEPENDENCY)
[1]  0 19

4.2.1.3 See quartiles of DEPENDENCY (Extra: For my understanding)

quantile(mpszpop2020a$DEPENDENCY)
        0%        25%        50%        75%       100% 
 0.0000000  0.6518626  0.7024793  0.7645065 19.0000000 

4.2.1.4 Defining a different set of quartiles

percent <- c(0, .01, .1, .5, .9, .99, 1)
var <- mpszpop2020a["DEPENDENCY"] %>% 
  st_set_geometry(NULL)
quantile(var[,1], percent)
        0%         1%        10%        50%        90%        99% 
 0.0000000  0.1377778  0.5686120  0.7024793  0.8474114  1.2100000 
      100% 
19.0000000 

4.2.2 Creating the get.var function

get.var<-function(vname, df) {
  v <- df[vname]%>%
  st_set_geometry(NULL)
  v <- unname(v[,1])
  return(v)
}

4.2.3 Plotting a percentile map using tmap functions

percent <- c(0, .01, .1, .5, .9, .99, 1)
var <- get.var("DEPENDENCY", mpszpop2020a)
bperc <- quantile(var, percent)
tm_shape(mpszpop2020a) + 
  tm_polygons() + 
  tm_shape(mpszpop2020a) + 
  tm_fill("DEPENDENCY", 
          breaks=bperc, 
          palette="Blues",
          labels=c("< 1%", "1%-10%", "10%-50%", "50%-90%", "90%-99%", ">99%"))+
  tm_borders() +
  tm_layout(title = "Percentile Map", title.position = c("right", "bottom"))

4.2.4 Plotting a percentile map using a function

percentmap <- function(vnam, df, legtitle=NA, mtitle="Percentile Map"){
  percent <- c(0,.01, .1,.5,.9,.99,1)
  var <- get.var(vnam,df)
  bperc <- quantile (var, percent)
  tm_shape (mpszpop2020) + 
    tm_polygons() + 
    tm_shape(df)+
    tm_fill(vnam, title=legtitle, breaks=bperc, palette="Blues", 
            labels = c("< 1%", "1%-10%", "10%-50%", "50%-90%", "90%-99%", ">99%"))+
     tm_borders() +
    tm_layout(title = mtitle, title.position=c("right", "bottom"))
    
}

4.2.4.1 Test the percentile mapping function - AGED

percentmap("AGED", mpszpop2020a)

4.2.4.2 Test the percentile mapping function - YOUNG

percentmap("YOUNG", mpszpop2020a)

Just for my understanding (to see the difference in map after choosing a different palette): Please ignore this section

A percentile mapping function (PRGn palette)

## Plot as a polygon first 
# then fill function so that you wont get the awkward spaces after removing NA values
percentmap <- function(vnam, df, legtitle=NA, mtitle="Percentile Map"){
  percent <- c(0,.01, .1,.5,.9,.99,1)
  var <- get.var(vnam,df)
  bperc <- quantile (var, percent)
  tm_shape (mpszpop2020) + 
    tm_polygons() + 
    tm_shape(df)+
    tm_fill(vnam, title=legtitle, breaks=bperc, palette="PRGn", 
            labels = c("< 1%", "1%-10%", "10%-50%", "50%-90%", "90%-99%", ">99%"))+
     tm_borders() +
    tm_layout(title = mtitle, title.position=c("right", "bottom"))
    
}

Test function

percentmap("AGED", mpszpop2020a)

4.3 Box Map

4.3.1 Creating the boxbreaks function

  # identify outliers using upper and lower percentile
boxbreaks <- function(v,mult=1.5) {
  qv <- unname(quantile(v))
  iqr <- qv[4] - qv[2]
  upfence <- qv[4] + mult * iqr
  lofence <- qv[2] - mult * iqr
  # initialize break points vector
  bb <- vector(mode="numeric",length=7)
  # logic for lower and upper fences
  if (lofence < qv[1]) {  # no lower outliers
    bb[1] <- lofence
    bb[2] <- floor(qv[1])
  } else {
    bb[2] <- lofence
    bb[1] <- qv[1]
  }
  if (upfence > qv[5]) { # no upper outliers
    bb[7] <- upfence
    bb[6] <- ceiling(qv[5])
  } else {
    bb[6] <- upfence
    bb[7] <- qv[5]
  }
  bb[3:5] <- qv[2:4]
  return(bb)
}

4.3.2 Test the boxbreak function

var <- get.var("AGED", mpszpop2020a) 
boxbreaks(var)

4.3.3 Ensure that AGED doesn’t contain 0 (although already removed previously)

mpszpop2020a <- mpszpop2020 %>%
  filter(AGED>=0)
var <- get.var("AGED", mpszpop2020a)
boxbreaks(var)
[1] -4330     0   515  2080  3745  8590 20240

4.3.4 Creating Boxmap function (This will have holes)

boxmap <- function(vnam, df, 
                   legtitle=NA,
                   mtitle="Box Map",
                   mult=1.5){
  var <- get.var(vnam,df)
  bb <- boxbreaks(var)
  tm_shape(df) +
     tm_fill(vnam,title=legtitle,
             breaks=bb,
             palette="Blues",
          labels = c("lower outlier", 
                     "< 25%", 
                     "25% - 50%", 
                     "50% - 75%",
                     "> 75%", 
                     "upper outlier"))  +
  tm_borders() +
  tm_layout(title = mtitle, 
            title.position = c("right",
                               "bottom"))
}

4.3.5 Plot box map - ECONOMY ACTIVE (Have holes)

4.3.6 Tidy version of box map (No holes, nicer to see)

4.3.7 Plot box map - ECONOMY ACTIVE (Have holes)

4.4 Choropleth Map for Rates

4.4.1 Comparing distribution of total and aged population, 2020

4.4.1.1 TOTAL

4.4.1.2 AGED

4.5 Raw Rate map

mpszpop2020a <- mpszpop2020 %>%
  mutate(`AGED%` = (`AGED`/`TOTAL`) * 100) %>%
  filter (`AGED%` >= 0)

4.5.1 Plot Raw Rate map

[1] -2.17276  0.00000 11.28169 16.48199 20.25132 33.70576 95.00000

4.6 Compare Absolute and Rate Choropleth Maps

4.6.1 Absolute

4.6.2 Rate

NOTE: There is a difference in the colours.