In this in-class exercise, I learn about analytical mapping.
packages = c('sf', 'tmap', 'tidyverse')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
mpszpop2020 <- read_rds("data/rds/mpszpop2020.rds")
mpszpop2020a <- mpszpop2020 %>%
drop_na()
range(mpszpop2020a$DEPENDENCY)
[1] 0 19
quantile(mpszpop2020a$DEPENDENCY)
0% 25% 50% 75% 100%
0.0000000 0.6518626 0.7024793 0.7645065 19.0000000
percent <- c(0, .01, .1, .5, .9, .99, 1)
var <- mpszpop2020a["DEPENDENCY"] %>%
st_set_geometry(NULL)
quantile(var[,1], percent)
0% 1% 10% 50% 90% 99%
0.0000000 0.1377778 0.5686120 0.7024793 0.8474114 1.2100000
100%
19.0000000
percent <- c(0, .01, .1, .5, .9, .99, 1)
var <- get.var("DEPENDENCY", mpszpop2020a)
bperc <- quantile(var, percent)
tm_shape(mpszpop2020a) +
tm_polygons() +
tm_shape(mpszpop2020a) +
tm_fill("DEPENDENCY",
breaks=bperc,
palette="Blues",
labels=c("< 1%", "1%-10%", "10%-50%", "50%-90%", "90%-99%", ">99%"))+
tm_borders() +
tm_layout(title = "Percentile Map", title.position = c("right", "bottom"))
percentmap <- function(vnam, df, legtitle=NA, mtitle="Percentile Map"){
percent <- c(0,.01, .1,.5,.9,.99,1)
var <- get.var(vnam,df)
bperc <- quantile (var, percent)
tm_shape (mpszpop2020) +
tm_polygons() +
tm_shape(df)+
tm_fill(vnam, title=legtitle, breaks=bperc, palette="Blues",
labels = c("< 1%", "1%-10%", "10%-50%", "50%-90%", "90%-99%", ">99%"))+
tm_borders() +
tm_layout(title = mtitle, title.position=c("right", "bottom"))
}
percentmap("AGED", mpszpop2020a)
percentmap("YOUNG", mpszpop2020a)
## Plot as a polygon first
# then fill function so that you wont get the awkward spaces after removing NA values
percentmap <- function(vnam, df, legtitle=NA, mtitle="Percentile Map"){
percent <- c(0,.01, .1,.5,.9,.99,1)
var <- get.var(vnam,df)
bperc <- quantile (var, percent)
tm_shape (mpszpop2020) +
tm_polygons() +
tm_shape(df)+
tm_fill(vnam, title=legtitle, breaks=bperc, palette="PRGn",
labels = c("< 1%", "1%-10%", "10%-50%", "50%-90%", "90%-99%", ">99%"))+
tm_borders() +
tm_layout(title = mtitle, title.position=c("right", "bottom"))
}
percentmap("AGED", mpszpop2020a)
# identify outliers using upper and lower percentile
boxbreaks <- function(v,mult=1.5) {
qv <- unname(quantile(v))
iqr <- qv[4] - qv[2]
upfence <- qv[4] + mult * iqr
lofence <- qv[2] - mult * iqr
# initialize break points vector
bb <- vector(mode="numeric",length=7)
# logic for lower and upper fences
if (lofence < qv[1]) { # no lower outliers
bb[1] <- lofence
bb[2] <- floor(qv[1])
} else {
bb[2] <- lofence
bb[1] <- qv[1]
}
if (upfence > qv[5]) { # no upper outliers
bb[7] <- upfence
bb[6] <- ceiling(qv[5])
} else {
bb[6] <- upfence
bb[7] <- qv[5]
}
bb[3:5] <- qv[2:4]
return(bb)
}
var <- get.var("AGED", mpszpop2020a)
boxbreaks(var)
mpszpop2020a <- mpszpop2020 %>%
filter(AGED>=0)
var <- get.var("AGED", mpszpop2020a)
boxbreaks(var)
[1] -4330 0 515 2080 3745 8590 20240
boxmap <- function(vnam, df,
legtitle=NA,
mtitle="Box Map",
mult=1.5){
var <- get.var(vnam,df)
bb <- boxbreaks(var)
tm_shape(df) +
tm_fill(vnam,title=legtitle,
breaks=bb,
palette="Blues",
labels = c("lower outlier",
"< 25%",
"25% - 50%",
"50% - 75%",
"> 75%",
"upper outlier")) +
tm_borders() +
tm_layout(title = mtitle,
title.position = c("right",
"bottom"))
}
mpszpop2020a <- mpszpop2020 %>%
mutate(`AGED%` = (`AGED`/`TOTAL`) * 100) %>%
filter (`AGED%` >= 0)
[1] -2.17276 0.00000 11.28169 16.48199 20.25132 33.70576 95.00000
NOTE: There is a difference in the colours.