In this exercise, I learn how to perform spatial point patterns analysis using spatstat package.
To show your figure images in a higher resolution, fix the retina in the above code chunk above.
packages = c('maptools', 'sf', 'raster', 'spatstat', 'tmap', 'tidyverse', 'plotly', 'ggthemes')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
More on the packages used:
Maptools is for creating spatial* objects
sf is for importing geospatial data
raster is for creating a raster object
spatstat is for performing SPPA
tmap is for plotting thematic maps
tidyverse is for data handling
plotly is for plotting interactive plot
ggthemes allow us to use the more advanced theme for plots # 2. Data used:
MP14_SUBZONE_WEB_PL: a polygon feature data in ESRI shapefile format from data.gov.sg, providing information of URA 2014 Master Plan Planning Subzone boundary data
CHILDCARE: a point feature data in rds format that provides both location and attribute information of childcare centres from data.gov.sg
CHAS: a point feature data in rds format that provides location and attribute information of CHAS clinics
mpsz_sf <- st_read(dsn = "data/shapefile", layer = "MP14_SUBZONE_WEB_PL")
Reading layer `MP14_SUBZONE_WEB_PL' from data source
`C:\aisyahajit2018\IS415\IS415_blog\_posts\2021-09-13-in-class-exercise-5\data\shapefile'
using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
We can see from the results above that mpsz_sf projection is in SVY21.
CHAS <- read_rds("data/rds/CHAS.rds")
childcare <- read_rds("data/rds/childcare.rds")
Note: There are some data issue in the childcare data frame because Lat
and Lng
should be in numeric data type. The coordinate fields are in decimal degrees. Hence, wgs referencing system is assumed.
CHAS_sf <- st_as_sf(CHAS,
coords = c("X_COORDINATE",
"Y_COORDINATE"),
crs=3414)
Note: st_as_sf accepts the coordinates in character data type.
childcare_sf <- st_as_sf(CHAS,
coords = c("Lng",
"Lat"),
crs=4326) %>%
st_transform(crs=3414)
Note: In childcare we have Lng and Lat as the coordinates where they are both in demical degrees. The coordinate system for that is WGS84, crs code 4326. We then use st_transform to change the coordinates of a geometry from one spatial reference system to another.
Notes for arguments: - alpha is to set transparency - col is to set the colour of the dots - size is to set the size of the dots
tmap_mode('view')
tm_shape(childcare_sf) +
tm_dots(alpha=0.4,
col="blue",
size=0.05) +
tm_shape(CHAS_sf) +
tm_dots(alpha=0.4,
col="red",
size=0.05)
tmap_mode('plot')
Note:
Notes:
Note: After performing the above, the difference that we can see in childcare and childcare_sp is that the dataframe disappears. Similarly for CHAS and mpsz.
Here we are using as.ppp() of maptools package.
Note:
[1] FALSE
The above results shows that there are no duplicates.
[1] FALSE
Similarly, for CHAS, the above results shows that there are no duplicates.
pg <- mpsz[mpsz@data$PLN_AREA_N=="PUNGGOL",]
Note:
pg_sp <- as(pg, "SpatialPolygons")
pg_owin <- as(pg_sp, "owin")
childcare_pg <- childcare_ppp_jit[pg_owin]
CHAS_pg <- CHAS_ppp_jit[pg_owin]
plot(childcare_pg)
It fixed the study area and will only show the data points within PUNGGOL.
In spatstat we can perform a Monte Carlo test by applying the envelope() function
L_childcare <- envelope(childcare_pg,
Lest,
nsim=99,
rank=1,
global=TRUE)
Generating 99 simulations of CSR ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99.
Done.
# Code chunk for plotting interactive L-function
title <- "Pairwise Distance: L function"
Lcsr_df <- as.data.frame(L_childcare)
colour=c("#0D657D","#ee770d","#D3D3D3")
csr_plot <- ggplot(Lcsr_df, aes(r, obs-r))+
# plot observed value
geom_line(colour=c("#4d4d4d"))+
geom_line(aes(r,theo-r), colour="red", linetype = "dashed")+
# plot simulation envelopes
geom_ribbon(aes(ymin=lo-r,ymax=hi-r),alpha=0.1, colour=c("#91bfdb")) +
xlab("Distance r (m)") +
ylab("L(r)-r") +
geom_rug(data=Lcsr_df[Lcsr_df$obs > Lcsr_df$hi,], sides="b", colour=colour[1]) +
geom_rug(data=Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,], sides="b", colour=colour[2]) +
geom_rug(data=Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,], sides="b", color=colour[3]) +
theme_tufte()+
ggtitle(title)
text1<-"Significant clustering"
text2<-"Significant segregation"
text3<-"Not significant clustering/segregation"
# the below conditional statement is required to ensure that the labels (text1/2/3) are assigned to the correct traces
if (nrow(Lcsr_df[Lcsr_df$obs > Lcsr_df$hi,])==0){
if (nrow(Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,])==0){
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text3, traces = 4) %>%
rangeslider()
}else if (nrow(Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,])==0){
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text2, traces = 4) %>%
rangeslider()
}else {
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text2, traces = 4) %>%
style(text = text3, traces = 5) %>%
rangeslider()
}
} else if (nrow(Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,])==0){
if (nrow(Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,])==0){
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
rangeslider()
} else{
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
style(text = text3, traces = 5) %>%
rangeslider()
}
} else{
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
style(text = text2, traces = 5) %>%
style(text = text3, traces = 6) %>%
rangeslider()
}
Note:
L_CHAS <- envelope(CHAS_pg,
Lest,
nsim=99,
rank=1,
global=TRUE)
Generating 99 simulations of CSR ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99.
Done.
# Code chunk for plotting interactive L-function
title <- "Pairwise Distance: L function"
Lcsr_df <- as.data.frame(L_CHAS)
colour=c("#0D657D","#ee770d","#D3D3D3")
csr_plot <- ggplot(Lcsr_df, aes(r, obs-r))+
# plot observed value
geom_line(colour=c("#4d4d4d"))+
geom_line(aes(r,theo-r), colour="red", linetype = "dashed")+
# plot simulation envelopes
geom_ribbon(aes(ymin=lo-r,ymax=hi-r),alpha=0.1, colour=c("#91bfdb")) +
xlab("Distance r (m)") +
ylab("L(r)-r") +
geom_rug(data=Lcsr_df[Lcsr_df$obs > Lcsr_df$hi,], sides="b", colour=colour[1]) +
geom_rug(data=Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,], sides="b", colour=colour[2]) +
geom_rug(data=Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,], sides="b", color=colour[3]) +
theme_tufte()+
ggtitle(title)
text1<-"Significant clustering"
text2<-"Significant segregation"
text3<-"Not significant clustering/segregation"
# the below conditional statement is required to ensure that the labels (text1/2/3) are assigned to the correct traces
if (nrow(Lcsr_df[Lcsr_df$obs > Lcsr_df$hi,])==0){
if (nrow(Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,])==0){
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text3, traces = 4) %>%
rangeslider()
}else if (nrow(Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,])==0){
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text2, traces = 4) %>%
rangeslider()
}else {
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text2, traces = 4) %>%
style(text = text3, traces = 5) %>%
rangeslider()
}
} else if (nrow(Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,])==0){
if (nrow(Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,])==0){
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
rangeslider()
} else{
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
style(text = text3, traces = 5) %>%
rangeslider()
}
} else{
ggplotly(csr_plot, dynamicTicks=T) %>%
style(text = text1, traces = 4) %>%
style(text = text2, traces = 5) %>%
style(text = text3, traces = 6) %>%
rangeslider()
}
Childcare tends to be closer to each other as compared to CHAS clinic.