GeoSpatialMapping-ForwardSortationArea

Author

Ajay Rao

Description

This notebook takes us through an example of plotting census population data by Forward Sortation Area (FSA). A FSA is the first 3-digit of postal codes. Refer to FSA Description for further information.

We need 3 sets of info for generating these plots:

Forward Sortation Area Shape File - Stats Canada Shapefile (2016)
Forward Sortation Area Population Stats - Stats Canada Population Stats (2016)
LISTING OF FORWARD SORTATION AREA CODES from Canada Post - FSA List - Canada Post

You will notice that the shapefile and population data is from 2016. The most recent data available from Stats Canada at Forward Sortation Area level is from 2016.

Importing Necessary Libraries

if (!require(pacman)) install.packages("pacman")

library(pacman)

p_load(data.table, leaflet, rgdal, sp, htmltools, 
       pdftools, tidyverse, scales)

Importing the ShapeFile

## Download the zip file
if (!"tFile" %in% list.files())
{
  download.file("https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/files-fichiers/2016/lfsa000b16a_e.zip", "tFile")
  
  
}

## Extract the .shp file
if (!"lfsa000b16a_e.shp" %in% list.files())
{
  unzip("tFile")
}


## Load Shape File into R
shpDF <- readOGR(dsn="lfsa000b16a_e.shp")

OGR data source with driver: ESRI Shapefile 
Source: "E:\Learning\Geo Spatial Mapping\lfsa000b16a_e.shp", layer: "lfsa000b16a_e"
with 1620 features
It has 3 fields

Load Population Data File

## Import .csv file using "fread"
popDT <- fread("T1201EN.CSV", nrows = 1642)

## We drop the frist row --> this corresponds to Total Canada
## and the fourth column --> this column doesn't have any data
popDT <- popDT[-1,-4]

## Give easier to manage column names
colnames(popDT) <- c("FSA", "GEOName", "Province", "Population",
                     "Dwellings", "DwellingsOccupied")

Import FSA Names from Canada Post PDF

## Download the pdf file
if (!"fsa-list-april-2022.pdf" %in% list.files())
{
  download.file("https://www.canadapost-postescanada.ca/cpc/doc/en/support/kb/nps/fsa-list-april-2022.pdf", "fsa-list-april-2022.pdf")
  
}

## Read the PDF file
pdfPageList <- pdf_text("fsa-list-april-2022.pdf") %>% 
  str_split("\n")


## The first page doesn't contain any FSAs so we skip it
## The top 11 lines are headers
## The last two lines are footers
for(i in 2:15) { #sets the iteration to go through all 17 pages
  pdfPageList[[i]] <- pdfPageList[[i]][11:(length(pdfPageList[[i]]) - 2)]
}
rm(i)

## Convert the List to Table
myFunc <- function(x)
{
  ## This function converts list to data.table
  
  return(data.table(x))
}

pdfPageTable <- do.call("rbind",lapply(pdfPageList[2:15], myFunc))

rm(pdfPageList)

## Split text into individual columns
### If there are 4 consecutive white space, we consider that as a separator
pdfPageTable[,paste0("c", seq(1, 4)):=tstrsplit(x, "\\s{4, }")]
### Drop the original column
pdfPageTable <- pdfPageTable[,!"x",with=F]

## Extract FSA codes and FSA Names
myFunc2 <- function(x, DT)
{
 
  # For each column passed to this function
  # First 3 characters are considered as FSA code
  # The text starting from 4 character onwards is considered FSA Name
  
  # The FSA codes and Names are stored as a data.table and returned
  
  DT2 <- DT[,x,with=F]
  
  DT2 <- DT2[!is.na(get(x)) & !(get(x) == "SUCC BUREAU-CHEF") & !get(x)=="",]
  
  DT2$FSA <- substr(trimws(DT2[[x]]), 1, 3)
  DT2$FSAName <- trimws(substr(trimws(DT2[[x]]), 4, nchar(DT2[[x]])))
  DT2 <- DT2[,!x,with=F]
  
  return(DT2)
}

modTable <- do.call("rbind", 
                    lapply(colnames(pdfPageTable), 
                           myFunc2, DT = pdfPageTable))

rm(pdfPageTable)

Merge Population Data with Shape File

## Merge the Shape File with Population Data
shpPopDT <- merge(x = shpDF[shpDF$PRNAME == "Ontario",],
                  y = popDT,
                  by.x = "CFSAUID",
                  by.y = "FSA",
                  all.x = T)

## Delete the shpDF to reduce memory space
rm(shpDF, popDT)
gc()

          used (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 1481503 79.2    3425161 183.0  3425161 183.0
Vcells 3880319 29.7   25202836 192.3 31500559 240.4

## Merge the FSA Names
shpPopDT <- merge(
  x = shpPopDT,
  y = modTable,
  by.x = "CFSAUID",
                  by.y = "FSA",
                  all.x = T
)
rm(modTable)

## Convert the Merged File into a Shape File format
shpPopDT <- spTransform(shpPopDT, CRS("+init=epsg:4326"))


## Store a copy as a data.table to create labels
popDT <- data.table(shpPopDT@data)

popDT$Population <- comma(popDT$Population)
popDT$Dwellings <- comma(popDT$Dwellings)

Plot on a Map Using Leaflet

## labels
labs <- lapply(seq(nrow(popDT)), function(i) {
  paste0( '<p><b>', popDT[i, "FSAName"], '</b></p><p>', 
          "<i>Population: </i>", popDT[i, "Population"],'</p><p>', 
          "<i>Dwellings: </i>", popDT[i, "Dwellings"], '</p>' ) 
})

## Palette
pal <- colorNumeric(
  palette = "Reds",
  domain = shpPopDT$Population)

## Plotting the map
leaflet(shpPopDT) %>% 
  addTiles() %>%
  setView(-84.81, 49.74, zoom = 5) %>%
  addPolygons(
    data = shpPopDT,
  fillColor = ~pal(Population),
  weight = 2,
  opacity = 1,
  color = "white",
  dashArray = "3",
  fillOpacity = 0.7,
  highlightOptions = highlightOptions(
    weight = 5,
    color = "#666",
    dashArray = "",
    fillOpacity = 0.7,
    bringToFront = TRUE),
  label = lapply(labs, HTML),
  labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto")
  ) %>% addLegend(pal = pal, values = ~Population, opacity = 0.7, title = "Population",
  position = "bottomleft")