Rasters and Shapefiles

I am using the WorldPop Population Count for DR Congo as a GeoTIFF (.tif) file (See here). This single-band raster file has about 5.17 million grids cells each at the 30-arc second resolution for the entire country.

I would like to clip out just the two northeastern provinces ([Ituri and North-Kivu] in R and save the clipped file as a .tif file. I am able to do this using QGIS (See here) however this is way too complicated. I am looking to using GADM inside the R workspace and overlay it on top of the population count raster and clip out the two provinces.

Here is what I have for my preliminary attempt at importing the raster (.tif) file and the GSDM (.rds) in R.

########################################################################### 
#                                                                         #
# Spatial tracking of the 2018-2020 Kivu Ebola outbreak in DRC            #
#                                                                         #
# This source code is issued under the GNU General Public License, v3.0.  #
#                                                                         #
# This script is free software; you can redistribute it and/or modify     #
# it under the terms of the GNU General Public License as published by    #
# the Free Software Foundation; either version 3.0 of the License, or     #
# (at your option) any later version.                                     #
#                                                                         #
# See the GNU General Public License for more details.                    #
#                                                                         #
# https://www.gnu.org/licenses/gpl-3.0.en.html                            #
###########################################################################

rm(list = ls())

#install.packages("raster", dependencies = T)
#install.packages("rgdal", dependencies = T)
#install.packages("ncdf4", dependencies = T)
#install.packages("rstudioapi", dependencies = T)

library(sp)
library(raster)
library(rgdal)
library(ncdf4)
library(rstudioapi)
# library(colorRamps)
# library(ggmap)
# library(ggplot2)

#----------------------------#
# Set your working directory #
#----------------------------#

setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # RStudio IDE preferred
getwd() # Path to your working directory

#----------------------------------------------------------------#
# Source 1: WorldPop UN-Adjusted Population Count GeoTIFF raster #
#----------------------------------------------------------------#

# Downloaded from https://www.worldpop.org/geodata/summary?id=35845

DRCWorldPop <- raster("cod_ppp_2020_1km_Aggregated_UNadj.tif")

DRCWorldPop # this original raster layer has 2,261 rows and 2,289 columns = 5,175,429 grid cells

dim(DRCWorldPop); length(DRCWorldPop); extent(DRCWorldPop)

names(DRCWorldPop) <- "Susceptible"

names(DRCWorldPop); res(DRCWorldPop); projection(DRCWorldPop)

summary(getValues(DRCWorldPop))

minValue(DRCWorldPop); maxValue(DRCWorldPop)

DRCWorldPop <- replace(DRCWorldPop, is.na(DRCWorldPop), 0)

summary(getValues(DRCWorldPop))

sum(getValues(DRCWorldPop))

nlayers(DRCWorldPop)

#---------------------#
# Source 2: From GADM #
#---------------------#

#?getData
drc <- getData("GADM", level=1, country="COD")
#drc$NAME_1 # List of all provinces

drc <- drc[drc$NAME_1 %in% c("Ituri", "Nord-Kivu"), ]

r <- raster(drc, resolution = res(DRCWorldPop)[1])
values(r) <- 0

r           # this raster layer has 689 rows and 487 columns
extent(r) 

plot(log(DRCWorldPop), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", col=topo.colors(100), legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8), axes=T)
lines(drc, col="black", lwd=1)

# plot(drc, col = "light yellow")
# lines(drc, col="black", lwd=2)

#--------------------------------------------#
# Merging and Cropping Source 1 and Source 2 #
#--------------------------------------------#

# ?merge

merged <- merge(DRCWorldPop, r, tolerance = 0.07) 
# Task: What is the significance of tolerance?

# ?crop 
cropped <- crop(merged, drc)
cropped # this raster layer has 689 rows and 487 columns

extent(cropped)

plot(log(cropped), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8), axes=T)
lines(drc, col="red", lwd=2)

# Task: How to change the plot Legend to make it more colorful?
# Task: The plot is in log-scale, how to construct a similar colorful plot in raw scale?

writeRaster(cropped, "cropped.tif", format = "GTiff", overwrite = TRUE)

croppedDRCWorldPop <- raster("cropped.tif") 

croppedDRCWorldPop # this cropped raster layer has 689 rows and 487 columns

#--------------------------------#
# Aggregating the cropped raster #
#--------------------------------#

aggregationFactor <- 10 # in km

DRC_aggr_count <- aggregate(croppedDRCWorldPop, fact = c(aggregationFactor, aggregationFactor), fun = sum, na.rm = TRUE)

DRC_aggr_count # this raster layer has 69 rows and 49 columns

names(DRC_aggr_count); res(DRC_aggr_count); projection(DRC_aggr_count)
dim(DRC_aggr_count); length(DRC_aggr_count); extent(DRC_aggr_count); isLonLat(DRC_aggr_count)

summary(getValues(DRC_aggr_count))

sum(getValues(DRC_aggr_count)) 

nrow(DRC_aggr_count)
ncol(DRC_aggr_count)

# ?xyFromCell
#xyFromCell(DRC_aggr_count, 1:ncell(DRC_aggr_count), spatial=FALSE)

# #xy for corners of a raster:
# xyFromCell(DRC_aggr_count, c(1, ncol(DRC_aggr_count), ncell(DRC_aggr_count)-ncol(DRC_aggr_count)+1, ncell(DRC_aggr_count)))
# 
# xmin(DRC_aggr_count)
# xmax(DRC_aggr_count)
# ymin(DRC_aggr_count)
# ymax(DRC_aggr_count)

# origin(DRC_aggr_count)

# https://www.nationalgeographic.org/encyclopedia/latitude/

# https://www.nationalgeographic.org/encyclopedia/longitude/

plot(log(DRC_aggr_count), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8))
lines(drc, col="red", lwd=2)
text(32,3,"aggregated", xpd = TRUE)

# Task: How to change the plot Legend to make it more colorful?
# Task: The plot is in log-scale, how to construct a similar colorful plot in raw scale?

# library(RColorBrewer)
# warna <- brewer.pal(n = 11, name = "RdYlGn")
# #warna <- rev(warna)
# plot(log(DRC_aggr_count), col=palette(warna), main = "2020 UN-Adjusted Population Count \n for DR Congo (log-scale)", legend.width=2, legend.shrink=1, legend.args=list(text='Persons', side=4, font=2, line=2.5, cex=0.8))
# lines(drc, col="red", lwd=2)
# #plot.window(xlim=extent(DRC_aggr_count)[1:2], ylim=extent(DRC_aggr_count)[3:4])

#spplot(log(DRC_aggr_count))

#-----------------------------------#
# Export cropped raster to a netCDF #
#-----------------------------------#

if (require(ncdf4)) {
  #rnc <- writeRaster(DRCWorldPop, filename ='Congo_full_0000.nc', format = "CDF",  varname = "Susceptible", varunit = "Persons", longname = "Susceptible", overwrite = TRUE)
  rnc_aggr_tif <- writeRaster(DRC_aggr_count, filename ='cod_ppp_2020_10km_Aggregated_UNadj.tif', format = "GTiff",  varname = "Susceptible", varunit = "Persons", longname = "Susceptible", overwrite = TRUE)
  rnc_aggr <- writeRaster(DRC_aggr_count, filename ='Congo_0000.nc', format = "CDF",  varname = "Susceptible", varunit = "Persons", longname = "Susceptible", overwrite = TRUE)
}

I would like the cell values for the neighbouring countries (Uganda, Rwanda, South Sudan, Burundi) set equal to zero. Also the resulting tif has 689 rows and 487 columns which is equivalent to 335,543 nonoverlapping grid cells each containing the population count. This is too big for my simulation purposes so I am aggregating the cropped raster by a factor of 10 to get a raster with 69 rows by 49 columns. Aggregation is done using the above code and finally saved as a NetCDF file and viewed using the Panoply tool. The base NetCDF file has one layer (Population Count data which we call "Susceptible".). To this NetCDF file we add additional epidemic compartments as follows.


#---------------------------------------------------------#
# Adding more epidemic compartments to an existing netCDF #
#---------------------------------------------------------#

episim <- nc_open("Congo_0000.nc", write = TRUE)

episim$dim$latitude$vals;  length(episim$dim$latitude$vals)     # lat is vertical axis  (or)rows in our case
episim$dim$longitude$vals; length(episim$dim$longitude$vals)		# lon is horizontal axis (or) columns in our case

nrows <- length(episim$dim$latitude$vals) 
ncols <- length(episim$dim$longitude$vals)

# Longitude is "East - West" means columns
# Latitude is "North - South" means rows

#ncatt_get(episim, 0, attname=NA, verbose=FALSE)

ncatt_put(episim, 0, "created_by", attval = c("Ashok Krishnamurthy"), verbose=FALSE)
ncatt_put(episim, 0, "contact", attval = c("Ashok Krishnamurthy <akrishnamurthy@mtroyal.ca>"), verbose=FALSE)

ncatt_put(episim, 0, "nRows", attval = nrows, verbose=FALSE)
ncatt_put(episim, 0, "nCols", attval = ncols, verbose=FALSE)

#Upper Left Corner pair is (first row, first column) = (27.22375, 3.674584)

ncatt_put(episim, 0, "ULCornerLongitude", attval = episim$dim$longitude$vals[1], verbose=FALSE)  
ncatt_put(episim, 0, "ULCornerLatitude", attval = episim$dim$latitude$vals[1], verbose=FALSE)

#Lower Left Corner pair is (last row, first column) = (27.22375, -2.075416)

ncatt_put(episim, 0, "LLCornerLongitude", attval = episim$dim$longitude$vals[1], verbose=FALSE)  #
ncatt_put(episim, 0, "LLCornerLatitude", attval = episim$dim$latitude$vals[nrows], verbose=FALSE) # 

#ncatt_put(episim, 0, "cellSize", attval = abs(episim$dim$longitude$vals[1] - episim$dim$longitude$vals[2]), verbose=FALSE)

ncatt_put(episim, 0, "hcellSize", attval = res(DRC_aggr_count)[1], verbose=FALSE)
ncatt_put(episim, 0, "vcellSize", attval = res(DRC_aggr_count)[2], verbose=FALSE)

# episim$var[[1]] # str(episim$var[[1]]$dim) # str(episim$var[[1]])

#----------------------------------------------------------------#
# Adding new Epidemic State Variables to an existing netCDF file #
#----------------------------------------------------------------#

x <- ncdim_def(name = "longitude", units = "degrees_east", vals = episim$dim$longitude$vals, unlim = FALSE, create_dimvar = TRUE, calendar = NA, longname = "longitude")
y <- ncdim_def(name = "latitude",  units = "degrees_north", vals = episim$dim$latitude$vals, unlim = FALSE, create_dimvar = TRUE, calendar = NA, longname = "latitude")

#?ncvar_def
Vaccinated <- ncvar_def(name = "Vaccinated", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Vaccinated")
Exposed   <- ncvar_def(name = "Exposed", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Exposed")
Infected  <- ncvar_def(name = "Infected", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Infected")
Recovered <- ncvar_def(name = "Recovered", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Recovered")
Dead      <- ncvar_def(name = "Dead", units = "Persons", dim = list(x,y), missval = NULL, prec = "float", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Dead")
Inhabitable <-  ncvar_def(name = "Inhabitable", units = "Binary", dim = list(x,y), missval = NULL, prec = "integer", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "Inhabitable")
#ProvinceIdentifier <-  ncvar_def(name = "ProvinceIdentifier", units = "Province", dim = list(x,y), missval = NULL, prec = "integer", shuffle=FALSE, compression=NA, chunksizes=NA, verbose=FALSE, longname = "ProvinceIdentifier")

# Epidemic State Variables (or Epidemic Compartments) are added in the following order
ncvar_add(episim, Vaccinated)
ncvar_add(episim, Exposed)
ncvar_add(episim, Infected)
ncvar_add(episim, Recovered)
ncvar_add(episim, Dead)
ncvar_add(episim, Inhabitable)
#ncvar_add(episim, ProvinceIdentifier)

currSusceptible <- ncvar_get(episim, episim$var[[2]])     # WorldPop
#ProvinceIdentifier <- ncvar_get(epiProvince, epiProvince$var[[2]]) # Sitansu

dim(currSusceptible)     # 49 rows and 69 columns
dim(t(currSusceptible))  # 69 rows and 49 columns

# sum(currSusceptible)
# sum(currSusceptible>0)
# sum(currSusceptible == 0)
# max(currSusceptible)

# currSusceptible[ncols, nrows]
# currSusceptible[nrows, ncols] # Subscript out of bounds error! AS EXPECTED

# dim(ProvinceIdentifier)
# dim(t(ProvinceIdentifier))
# table(ProvinceIdentifier)

# I could use a combination of transpose and flip from raster package

currVaccinated <- currExposed <- currInfected <- currRecovered  <- currDead <- currInhabitable <- matrix(0, length(episim$dim$longitude$vals),length(episim$dim$latitude$vals))

for(i in 1:ncols)
{ 								# ncols
  for(j in 1:nrows)
  {							  # nrows
    if (currSusceptible[i,j] > 0)
    {	
      currInhabitable[i,j] <- 1
    }
    if (currSusceptible[i,j] == 0)
    {	
      currInhabitable[i,j] <- 0
    }
  }
}

table(currInhabitable)

# Some cells in DRC have a population count equal to zero. Possibly forests, deserts or uninhabited areas

# currInhabitable
#  0    1 
# 903 2478

nc_close(episim)

################################################################################################

episim <- nc_open("Congo_0000.nc", write = TRUE)    # Fill values to an existing ncdf file

ncvar_put(episim, episim$var[[2]], currSusceptible)

ncvar_put(episim, episim$var[[3]], currVaccinated)
ncvar_put(episim, episim$var[[4]], currExposed)
ncvar_put(episim, episim$var[[5]], currInfected)
ncvar_put(episim, episim$var[[6]], currRecovered)
ncvar_put(episim, episim$var[[7]], currDead)
ncvar_put(episim, episim$var[[8]], currInhabitable)
# ncvar_put(episim, episim$var[[9]], ProvinceIdentifier) 

cat(paste("The file", episim$filename, "has", episim$nvars, "variables"), fill=TRUE)
for (v in 1:episim$nvars) cat(paste("Variable ", v, " is ", episim$var[[v]]$name,".",sep=""), fill=TRUE)
#episim # class(episim) # str(episim)

nc_close(episim)

Rplot0 Rplot1 Rplot2

You will need a combination of raster::mask() and raster::crop() calls; consider the example linked here:

resample before using using mask

DRCWorldPop <- resample(DRCWorldPop, r)

save using writeRaster

writeRaster(m, "mask.tif", format = "GTiff")

?resample
Resample transfers values between non matching Raster* objects (in terms of origin and resolution). Use projectRaster if the target has a different coordinate reference system (projection).

library(raster)
library(rgdal)

drc <- getData("GADM", level=1, country="COD")
#drc$NAME_1

drc <- drc[!(drc$NAME_1 %in% c("Bas-Uélé", "Équateur", "Haut-Katanga", "Haut-Lomami", "Haut-Uélé", "Kasaï", "Kasaï-Central", "Kasaï-Oriental", "Kinshasa", "Kongo-Central", "Kwango", "Kwilu", "Lomami", "Lualaba", "Maï-Ndombe", "Maniema", "Mongala", "Nord-Ubangi", "Sankuru", "Sud-Kivu", "Sud-Ubangi", "Tanganyika", "Tshopo", "Tshuapa")), ]

class(drc)

r <- raster(drc, res=0.008333333)

values(r) <- 0 #1:ncell(r)

plot(drc, col = "light yellow")

lines(drc, col="blue", lwd=2)

DRCWorldPop <- raster("cod_ppp_2020_1km_Aggregated_UNadj.tif")

DRCWorldPop # this raster layer has 689 rows and 487 columns

r # this raster layer has 689 rows and 487 columns

m <- resample(DRCWorldPop, r) # What does this even do?

plot(m)

writeRaster(m, "resampled.tif", format = "GTiff")

clippedDRCWorldPop <- raster("resampled.tif")

clippedDRCWorldPop # this raster layer has 689 rows and 487 columns but all values are zero

Thank you @meztez. I have edited the original post with the most current R code. The clipped raster in now having negative values for population count which is troubling. I was thinking I will use

DRCWorldPop <- replace(DRCWorldPop, is.na(DRCWorldPop), 0)

DRCWorldPop[DRCWorldPop < 0] = 0

Both doesn't seem to get the Negative values out. Any advice on how to fix this?

Merging with a greater tolerance?

library(raster)
library(rgdal)

drc <- getData("GADM", level=1, country="COD")
#drc$NAME_1

drc <- drc[drc$NAME_1 %in% c("Ituri", "Nord-Kivu"), ]

class(drc)

r <- raster(drc, resolution=0.008333333)

values(r) <- 0 #1:ncell(r)

plot(drc, col = "light yellow")

lines(drc, col="blue", lwd=2)

DRCWorldPop <- raster("cod_ppp_2020_1km_Aggregated_UNadj.tif")
merged <- merge(DRCWorldPop, r, tolerance = 0.07)
cropped <- crop(merged, drc)

extent(cropped)

plot(cropped)

lines(drc, col="red", lwd=2)

writeRaster(m, "cropped.tif", format = "GTiff", overwrite=TRUE)

clippedDRCWorldPop <- raster("cropped.tif")

clippedDRCWorldPop

@meztez Thank you. What is puzzling to me is the original tif file with the UN Adjusted population count had no negative values: 0, 83281.66 (min, max) so when I use resample() and mask() functions the clipped raster had values: -119.3592, 31953.65 (min, max) but your latest suggestion of merge() and crop() functions without resample() or mask() eliminates the negative values but the resulting raster has values: 0, 32905.59 (min, max). What really happened here? Were the negative values imply dropped? I gave a variety of tolerance values and I am getting an error for some values and works the same for others. Any advice on why or how tolerance value helps.

Read the doc on the package.

The raster package — R Spatial

Basically, the two merged grid do not align perfectly.

Tolerance refer to the permissible difference in origin (relative to the cell resolution).

Everything is in the doc, you will be much better in the long run if you read it.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.