In Class Exercise 5

A short description of the post.

Sarah Chin linkedin.com/in/sarahchin99/
09-13-2021

Installing and Loading the R package

packages <- c('maptools', 'sf', 'raster', 'spatstat', 'tmap', 'tidyverse', 'plotly', 'ggthemes')
for(p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
library(p, character.only = T)
}

Importing Geospatial data

Importing shapefile using st_read() of sf package. The output object is in tibble sf object class.

mpsz_sf <- st_read(dsn = "data/shapefile",
                   layer = "MP14_SUBZONE_WEB_PL") # st_read is for geospatial data, not for excel 
Reading layer `MP14_SUBZONE_WEB_PL' from data source 
  `D:\sarahcsp\IS415_blog\_posts\2021-09-13-in-class-exercise\data\shapefile' 
  using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
# for excel, we have readxl

Projection is in SVY21.

Importing Aspatial data

read_rds() of readr package is used instead of readRDS() of base R is used. This is because output of read_RDS is in tibble object.

CHAS <- read_rds("data/rds/CHAS.rds")
childcare <- read_rds("data/rds/childcare.rds")

Note that there are some data issue in childcare data frame because ‘Lat’ and ‘Lng’ should be in numeric data type.

Converting the aspatial data frame into sf objects

CHAS_sf <- st_as_sf(CHAS,
                    coords = c("X_COORDINATE",
                               "Y_COORDINATE"), # can refer to the excel itself for the coords
                    crs = 3414)

Note: st_as_sf accept coordinates in character data type.

childcare_sf <- st_as_sf(childcare,
                    coords = c("Lng",
                               "Lat"), 
                    crs = 4326) %>% # because it is in decimal 
  st_transform(crs = 3414)

If you want to change the ‘Lat’ and ‘Lng’ in the original dataset to numeric data type, it is also acceptable.

Plot map to review

tmap_mode("view")
tm_shape(childcare_sf) + 
  tm_dots(alpha = 0.4, # alpha sets the transparency
          col = "blue", 
          size = 0.05)

You can also view more than one data in a single map.

tmap_mode("view")
tm_shape(childcare_sf) + 
  tm_dots(alpha = 0.4, 
          col = "blue", 
          size = 0.05) +
tm_shape(CHAS_sf) + 
  tm_dots(alpha = 0.4, 
          col = "red", 
          size = 0.05)

Geospatial Data Wrangling

Converting from sf to Spatial dataframe classes

as_Spatial() of sf package.

childcare <- as_Spatial(childcare_sf)
CHAS <- as_Spatial(CHAS_sf)
mpsz <- as_Spatial(mpsz_sf)

Converting Spatial data frame into Spatial objects

as.SpatialPoint() of as.SpatialPolygon() of maptools package.

childcare_sp <- as(childcare, "SpatialPoints")
CHAS_sp <- as(CHAS, "SpatialPoints")
mpsz_sp <- as(mpsz, "SpatialPolygons")

Note that it is no longer a data frame, it is a formal class. Compare it to childcare and you realise that the data has been dropped from childcare_sp.

Converting from Spatial Objects into ppp objects

Dropping all the projection information. All that would be left is the coordinates.

childcare_ppp <- as(childcare_sp, "ppp") 
CHAS_ppp <- as(CHAS_sp, "ppp")

Removing duplicate points using jitter

childcare_ppp_jit <- rjitter(childcare_ppp,
                             retry = TRUE, 
                             nsim = 1, # number of simulations
                             drop = TRUE)
any(duplicated(childcare_ppp_jit)) # checks if there are any duplicated in the jit version of childcare
[1] FALSE
CHAS_ppp_jit <- rjitter(CHAS_ppp,
                             retry = TRUE, 
                             nsim = 1, 
                             drop = TRUE)
any(duplicated(CHAS_ppp_jit)) 
[1] FALSE

Now we want to plot it in a graph. However, we cannot use tmap like how we did before since it is in ppp format. We have to use the plot() function instead.

Extracting Punggol Planning Area

Specifying which of the data we want to use.

pg <- mpsz[mpsz@data$PLN_AREA_N=="PUNGGOL",]

Converting SpatialPolygonDataFrame into SpatialPologons object

pg_sp <- as(pg, "SpatialPolygons")

Converting SpatialPolygons into owin object

pg_owin <- as(pg_sp, "owin")

Extracting spatial points window owin

childcare_pg <- childcare_ppp_jit[pg_owin]
CHAS_pg <- CHAS_ppp_jit[pg_owin]
plot(childcare_pg)

L-Function

L_childcare <- envelope(childcare_pg,
                        Lest, 
                        nsim = 99, 
                        rank = 1, 
                        global = TRUE)
Generating 99 simulations of CSR  ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.

Done.

After the iterations are done, it should go through this chunk in order to get an interactive version of the map.

title <- "Pairwise Distance: L function"

Lcsr_df <- as.data.frame(L_childcare)

colour=c("#0D657D","#ee770d","#D3D3D3")
csr_plot <- ggplot(Lcsr_df, aes(r, obs-r))+
  # plot observed value
  geom_line(colour=c("#4d4d4d"))+
  geom_line(aes(r,theo-r), colour="red", linetype = "dashed")+
  # plot simulation envelopes
  geom_ribbon(aes(ymin=lo-r,ymax=hi-r),alpha=0.1, colour=c("#91bfdb")) +
  xlab("Distance r (m)") +
  ylab("L(r)-r") +
  geom_rug(data=Lcsr_df[Lcsr_df$obs > Lcsr_df$hi,], sides="b", colour=colour[1])  +
  geom_rug(data=Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,], sides="b", colour=colour[2]) +
  geom_rug(data=Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,], sides="b", color=colour[3]) +
  theme_tufte()+
  ggtitle(title)

text1<-"Significant clustering"
text2<-"Significant segregation"
text3<-"Not significant clustering/segregation"

# the below conditional statement is required to ensure that the labels (text1/2/3) are assigned to the correct traces
if (nrow(Lcsr_df[Lcsr_df$obs > Lcsr_df$hi,])==0){ 
  if (nrow(Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,])==0){ 
    ggplotly(csr_plot, dynamicTicks=T) %>%
      style(text = text3, traces = 4) %>%
      rangeslider() 
  }else if (nrow(Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,])==0){ 
    ggplotly(csr_plot, dynamicTicks=T) %>%
      style(text = text2, traces = 4) %>%
      rangeslider() 
  }else {
    ggplotly(csr_plot, dynamicTicks=T) %>%
      style(text = text2, traces = 4) %>%
      style(text = text3, traces = 5) %>%
      rangeslider() 
  }
} else if (nrow(Lcsr_df[Lcsr_df$obs < Lcsr_df$lo,])==0){
  if (nrow(Lcsr_df[Lcsr_df$obs >= Lcsr_df$lo & Lcsr_df$obs <= Lcsr_df$hi,])==0){
    ggplotly(csr_plot, dynamicTicks=T) %>%
      style(text = text1, traces = 4) %>%
      rangeslider() 
  } else{
    ggplotly(csr_plot, dynamicTicks=T) %>%
      style(text = text1, traces = 4) %>%
      style(text = text3, traces = 5) %>%
      rangeslider()
  }
} else{
  ggplotly(csr_plot, dynamicTicks=T) %>%
    style(text = text1, traces = 4) %>%
    style(text = text2, traces = 5) %>%
    style(text = text3, traces = 6) %>%
    rangeslider()
  }