st_join not working after st_buffer and st_union

Hello everyone,

I am working on a project where I map conflict data (csv format, geometry type = points) on polygons (kml files). Both layers are data.frames, share the same CRS, and the polygons are single feature.

The mapping (with st_join) works well before I apply st_buffer and st_union to the objects (see conflictMap_1225). However, after these two functions, st_join somehow does not connect the two layers (see conflictMap1225_buf). Please find the reprex attached.

Does someone know why st_join behaves like this and knows how to fix it? Or alternatively, could point me to another solution?

Many thanks, Dominique

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(sf)
#> Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(lwgeom)
#> Linking to liblwgeom 2.5.0dev r16016, GEOS 3.6.1, PROJ 4.9.3
#> Warning in fun(libname, pkgname): GEOS versions differ: lwgeom has 3.6.1 sf
#> has 3.7.2
#> Warning in fun(libname, pkgname): PROJ versions differ: lwgeom has 4.9.3 sf
#> has 5.2.0
library(tmap)
library(raster)
#> Loading required package: sp
#> 
#> Attaching package: 'raster'
#> The following object is masked from 'package:dplyr':
#> 
#>     select
library(pryr)
#> 
#> Attaching package: 'pryr'
#> The following object is masked from 'package:raster':
#> 
#>     subs
library(ggplot2)
library(rnaturalearth)

u = "https://arkham.enterprises/r-script/KML_1225.kml"
download.file(url = u, destfile = "/tmp/KML_1225.kml")

# load KML files (project polygons)
project1225_raw <- st_read("/tmp/KML_1225.kml")
#> Reading layer `KACP_AREAS' from data source `/private/tmp/KML_1225.kml' using driver `KML'
#> Simple feature collection with 28 features and 2 fields
#> geometry type:  MULTIPOLYGON
#> dimension:      XY
#> bbox:           xmin: 34.24501 ymin: -0.400429 xmax: 34.58954 ymax: 0.81301
#> epsg (SRID):    4326
#> proj4string:    +proj=longlat +datum=WGS84 +no_defs

# Read CSV file (conflict data, points)
conflictData <- read.csv("https://arkham.enterprises/r-script/GED_20200103_copy2.csv")

# Convert the CSV data frame to an sf object
conflictData_sf4326 <- st_as_sf(conflictData, coords = c("longitude", "latitude"), crs = 4326)

# Project CRS of conflict data and KML
conflictData_sf <- st_transform(conflictData_sf4326, 3395) 
project1225 <- st_transform(project1225_raw, 3395)

# Buffer KML polygons
project1225_buf <- st_buffer(project1225, 3000)

# Dissolve buffered polygon into a single polygon 
project1225_union_buf <- st_union(project1225_buf, by_feature = FALSE)
project1225_union_buf_sf <- st_as_sf(project1225_union_buf)

# Map conflict data with project data
conflictMap1225_buf <- st_join(conflictData_sf, project1225_union_buf_sf)

# That's how the mapping should look like
conflictMap_1225 <- st_join(conflictData_sf, project1225)

Created on 2020-01-07 by the reprex package (v0.3.0)

It doesn't work for me before doing the buffer either.

What are you trying to achieve?

library(dplyr)
library(sf)
library(lwgeom)
library(tmap)
library(raster)
library(pryr)
library(ggplot2)
library(rnaturalearth)

u = "https://arkham.enterprises/r-script/KML_1225.kml"
download.file(url = u, destfile = "tmp/KML_1225.kml")

# load KML files (project polygons)
project1225_raw <- st_read("tmp/KML_1225.kml")
st_crs(project1225_raw)
ggplot(project1225_raw) +
	geom_sf()

# Read CSV file (conflict data, points)
conflictData <- read.csv("https://arkham.enterprises/r-script/GED_20200103_copy2.csv")
conflictData_sf4326 <- st_as_sf(conflictData, coords = c("longitude", "latitude"), crs = 4326)
st_crs(conflictData_sf4326)
ggplot(conflictData_sf4326) +
	geom_sf()

# Project CRS of conflict data and KML
conflictData_sf <- st_transform(conflictData_sf4326, 3395)
project1225 <- st_transform(project1225_raw, 3395)

st_crs(conflictData_sf)
ggplot(conflictData_sf) +
	geom_sf()

st_crs(project1225)
ggplot(project1225) +
	geom_sf()

# That's how the mapping should look like
conflictMap_1225 <- st_join(conflictData_sf, project1225)

st_crs(conflictMap_1225)
ggplot(conflictMap_1225) +
	geom_sf()

# Buffer KML polygons
project1225_buf <- st_buffer(project1225, 3000)

st_crs(project1225_buf)
ggplot(project1225_buf) +
	geom_sf()

# Dissolve buffered polygon into a single polygon
project1225_union_buf <- st_union(project1225_buf, by_feature = FALSE)
project1225_union_buf_sf <- st_as_sf(project1225_union_buf)

st_crs(project1225_union_buf)
ggplot(project1225_union_buf) +
	geom_sf()

st_crs(project1225_union_buf_sf)
ggplot(project1225_union_buf_sf) +
	geom_sf()

# Map conflict data with project data
glimpse(conflictData_sf)
glimpse(project1225_union_buf_sf)
conflictMap1225_buf <- st_join(conflictData_sf, project1225_union_buf_sf)

st_crs(conflictMap1225_buf)
ggplot(conflictMap1225_buf) +
	geom_sf()

What is your ultimate aim?

I see a lot of libraries loaded but not actually used - tmap, raster, pryr, lwgeom - and I do not see the point of buffer and union (to make KML into a single spatial object consider the humble dplyr::summarise())

If plain visualization is your aim consider something along this lines:

library(sf)
library(dplyr)
library(leaflet)

u = "https://arkham.enterprises/r-script/KML_1225.kml"
download.file(url = u, destfile = "/tmp/KML_1225.kml")

# load KML files (project polygons)
project1225_raw <- st_read("/tmp/KML_1225.kml")

conflictData <- read.csv("https://arkham.enterprises/r-script/GED_20200103_copy2.csv") %>% 
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326)

# dissolve polygons
project_dissolved <- project1225_raw %>% 
  summarize()

# a visualization - nothing is joinded, just overlaid
leaflet(conflictData) %>% 
  addProviderTiles("CartoDB.Positron") %>%  
  addCircleMarkers(radius = 2,
                   color = "cornflowerblue") %>% 
  leaflet::addPolygons(data = project_dissolved,
                       fill = NA,
                       weight = 3,
                       color = "orangered",
                       opacity = .8)


(interactive map, zoomed on your area of interest)

1 Like

I am preparing this mapping for statistical analysis, for which I need a data.frame in the format of conflictMap_1225. Visualisation is actually secondary. The example I provided here deals with only one project (KML file) but once I get the mapping to work, I will loop through 60 projects.

@jlacko : You are right. I load a bunch of libraries, which I actually don't use. I changed that accordingly. However, I need the buffer as part of the theoretical model I test in my analysis. st_union seemed to be plausible to apply in this case - using dplyr::summarise() does not change the result.

The structure of conflictMap_1225 (created by joining points to undissolved polygons) is expected to differ from conflictMap1225_buf (created by joining the same points to dissolved polygons).

The st_union will drop all data dimensions from the polygons, leaving only geometry.

Which is sort of what happens - once you account for the fact that in sf data frame the geometry column is by convention listed last.

str(conflictMap_1225)
Classes ‘sf’ and 'data.frame':	11 obs. of  16 variables:
 $ id          : int  12330001 12250001 12250002 12250003 67972 23385 24255 82612 82645 158618 ...
 $ year        : int  2012 2012 2014 2014 2013 2004 2007 2008 2008 2011 ...
 $ geom_wkt    : Factor w/ 11 levels "POINT (-0.036661 34.449658)",..: 7 2 1 4 3 6 5 10 9 8 ...
 $ Name        : Factor w/ 28 levels "BUMULA","CENTRAL UYOMA",..: NA 3 24 1 NA NA NA NA NA NA ...
 $ description : Factor w/ 28 levels "<html xmlns:fo=\"http://www.w3.org/1999/XSL/Format\" xmlns:msxsl=\"urn:schemas-microsoft-com:xslt\">\n\n<head>\"| __truncated__,..: NA 3 24 1 NA NA NA NA NA NA ...
 $ timestamp   : POSIXct, format: NA NA NA ...
 $ begin       : POSIXct, format: NA NA NA ...
 $ end         : POSIXct, format: NA NA NA ...
 $ altitudeMode: Factor w/ 0 levels: NA NA NA NA NA NA NA NA NA NA ...
 $ tessellate  : int  NA -1 -1 -1 NA NA NA NA NA NA ...
 $ extrude     : int  NA 0 0 0 NA NA NA NA NA NA ...
 $ visibility  : int  NA -1 -1 -1 NA NA NA NA NA NA ...
 $ drawOrder   : int  NA NA NA NA NA NA NA NA NA NA ...
 $ icon        : Factor w/ 0 levels: NA NA NA NA NA NA NA NA NA NA ...
 $ snippet     : Factor w/ 1 level "": NA 1 1 1 NA NA NA NA NA NA ...
 $ geometry    :sfc_POINT of length 11; first list element:  'XY' num  -7832128 544138
 - attr(*, "sf_column")= chr "geometry"
 - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
  ..- attr(*, "names")= chr  "id" "year" "geom_wkt" "Name" ...

str(conflictMap1225_buf)
Classes ‘sf’ and 'data.frame':	11 obs. of  4 variables:
 $ id      : int  12330001 12250001 12250002 12250003 67972 23385 24255 82612 82645 158618 ...
 $ year    : int  2012 2012 2014 2014 2013 2004 2007 2008 2008 2011 ...
 $ geom_wkt: Factor w/ 11 levels "POINT (-0.036661 34.449658)",..: 7 2 1 4 3 6 5 10 9 8 ...
 $ geometry:sfc_POINT of length 11; first list element:  'XY' num  -7832128 544138
 - attr(*, "sf_column")= chr "geometry"
 - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA
  ..- attr(*, "names")= chr  "id" "year" "geom_wkt"

On a related note: you may wish to reinstall your {lwgeom} library, it seems to be out of sync with your {sf} installation - see the warning about GEOS versions. This may, and may not, have implications to the st_union (it uses GEOS).

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.