Convert geometry column with lists of longitude/latitude coordinates

Hi,

I have a dataframe that is intersected from two fortified shapefiles. The first row of the new dataframe is like this below (can't input more due to word limits). I have two questions:

  1. How can I convert the geometry column to separate columns with longitude and latitude so that I can map the points?
  2. Is there a way to combine the geometry column by another variable (id)? There are two values in id (40 and 41). What I want to do is to map the polygon by the id variable.

Thank you for your attention and help!

structure(list(id = "41", geometry = structure(list(structure(list(
    structure(c(-71.4040535189999, -71.4040835179999, -71.4041215189999, 
    -71.404510519, -71.404560518, -71.404602519, -71.404602519, 
    -71.404716518, -71.4038335179999, -71.4030385189999, -71.4020585179999, 
    -71.401825518, -71.4016515179999, -71.400536517, -71.400500517, 
    -71.400452517, -71.4003685179999, -71.400315517, -71.400269517, 
    -71.4001465169999, -71.401008518, -71.4013135169999, -71.4015355179999, 
    -71.401554517, -71.4018105179999, -71.402083518, -71.4026645179999, 
    -71.4027105179999, -71.403580518, -71.4040535189999, 41.8245935400001, 
    41.8248955400001, 41.82528354, 41.82526154, 41.82571854, 
    41.82611554, 41.826157541, 41.8269165400001, 41.82699454, 
    41.82706154, 41.8271415410001, 41.8271605410001, 41.827177541, 
    41.827290541, 41.826901541, 41.8263745400001, 41.8255435410001, 
    41.8251205400001, 41.82476854, 41.8239185400001, 41.82381854, 
    41.8237835400001, 41.82375854, 41.82375754, 41.82375454, 
    41.82375454, 41.82375454, 41.82460854, 41.8246045400001, 
    41.8245935400001), .Dim = c(30L, 2L))), class = c("XY", "POLYGON", 
"sfg"))), class = c("sfc_POLYGON", "sfc"), precision = 0, bbox = structure(c(xmin = -71.404716518, 
ymin = 41.82375454, xmax = -71.4001465169999, ymax = 41.827290541
), class = "bbox"), crs = structure(list(input = "EPSG:4326", 
    wkt = "GEOGCS[\"WGS 84\",\n    DATUM[\"WGS_1984\",\n        SPHEROID[\"WGS 84\",6378137,298.257223563,\n            AUTHORITY[\"EPSG\",\"7030\"]],\n        AUTHORITY[\"EPSG\",\"6326\"]],\n    PRIMEM[\"Greenwich\",0,\n        AUTHORITY[\"EPSG\",\"8901\"]],\n    UNIT[\"degree\",0.0174532925199433,\n        AUTHORITY[\"EPSG\",\"9122\"]],\n    AUTHORITY[\"EPSG\",\"4326\"]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
-1L), sf_column = "geometry", agr = structure(c(OBJECTID = NA_integer_, 
NAME = NA_integer_, COUNTY = NA_integer_, OSP = NA_integer_, 
TWNCODE = NA_integer_, LAND = NA_integer_, Shape__Are = NA_integer_, 
Shape__Len = NA_integer_, OBJECTID.1 = NA_integer_, ZCTA5CE10 = NA_integer_, 
Shape__Are.1 = NA_integer_, Shape__Len.1 = NA_integer_, zip = NA_integer_
), class = "factor", .Label = c("constant", "aggregate", "identity"
)), class = c("tbl_df", "tbl", "data.frame"))

Your geometry column contains polygons and not points so it's not just simply longitude and latitude. See below on how to plot, you don't need to know the lat/lon. If you have some data on a data.frame or tibble, you can use the join functions to merge on data to this sf object and then plot.

library(sf)
#> Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

dat <- structure(list(id = "41", geometry = structure(list(structure(list(
  structure(c(-71.4040535189999, -71.4040835179999, -71.4041215189999, 
              -71.404510519, -71.404560518, -71.404602519, -71.404602519, 
              -71.404716518, -71.4038335179999, -71.4030385189999, -71.4020585179999, 
              -71.401825518, -71.4016515179999, -71.400536517, -71.400500517, 
              -71.400452517, -71.4003685179999, -71.400315517, -71.400269517, 
              -71.4001465169999, -71.401008518, -71.4013135169999, -71.4015355179999, 
              -71.401554517, -71.4018105179999, -71.402083518, -71.4026645179999, 
              -71.4027105179999, -71.403580518, -71.4040535189999, 41.8245935400001, 
              41.8248955400001, 41.82528354, 41.82526154, 41.82571854, 
              41.82611554, 41.826157541, 41.8269165400001, 41.82699454, 
              41.82706154, 41.8271415410001, 41.8271605410001, 41.827177541, 
              41.827290541, 41.826901541, 41.8263745400001, 41.8255435410001, 
              41.8251205400001, 41.82476854, 41.8239185400001, 41.82381854, 
              41.8237835400001, 41.82375854, 41.82375754, 41.82375454, 
              41.82375454, 41.82375454, 41.82460854, 41.8246045400001, 
              41.8245935400001), .Dim = c(30L, 2L))), class = c("XY", "POLYGON", 
                                                                "sfg"))), class = c("sfc_POLYGON", "sfc"), precision = 0, bbox = structure(c(xmin = -71.404716518, 
                                                                                                                                             ymin = 41.82375454, xmax = -71.4001465169999, ymax = 41.827290541
                                                                ), class = "bbox"), crs = structure(list(input = "EPSG:4326", 
                                                                                                         wkt = "GEOGCS[\"WGS 84\",\n    DATUM[\"WGS_1984\",\n        SPHEROID[\"WGS 84\",6378137,298.257223563,\n            AUTHORITY[\"EPSG\",\"7030\"]],\n        AUTHORITY[\"EPSG\",\"6326\"]],\n    PRIMEM[\"Greenwich\",0,\n        AUTHORITY[\"EPSG\",\"8901\"]],\n    UNIT[\"degree\",0.0174532925199433,\n        AUTHORITY[\"EPSG\",\"9122\"]],\n    AUTHORITY[\"EPSG\",\"4326\"]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              -1L), sf_column = "geometry", agr = structure(c(OBJECTID = NA_integer_, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              NAME = NA_integer_, COUNTY = NA_integer_, OSP = NA_integer_, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              TWNCODE = NA_integer_, LAND = NA_integer_, Shape__Are = NA_integer_, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Shape__Len = NA_integer_, OBJECTID.1 = NA_integer_, ZCTA5CE10 = NA_integer_, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Shape__Are.1 = NA_integer_, Shape__Len.1 = NA_integer_, zip = NA_integer_
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              ), class = "factor", .Label = c("constant", "aggregate", "identity"
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              )), class = c("tbl_df", "tbl", "data.frame"))

library(tidyverse)

dat %>%
  ggplot(aes(geometry=geometry)) +
  geom_sf()

Created on 2020-05-22 by the reprex package (v0.3.0)

1 Like

It also looks like the sample data you posted isn't quite an sf object. If you convert it to an sf object first, you can then use built in plot methods.

data
df <- structure(list(id = "41", geometry = structure(list(structure(list(
    structure(c(-71.4040535189999, -71.4040835179999, -71.4041215189999, 
    -71.404510519, -71.404560518, -71.404602519, -71.404602519, 
    -71.404716518, -71.4038335179999, -71.4030385189999, -71.4020585179999, 
    -71.401825518, -71.4016515179999, -71.400536517, -71.400500517, 
    -71.400452517, -71.4003685179999, -71.400315517, -71.400269517, 
    -71.4001465169999, -71.401008518, -71.4013135169999, -71.4015355179999, 
    -71.401554517, -71.4018105179999, -71.402083518, -71.4026645179999, 
    -71.4027105179999, -71.403580518, -71.4040535189999, 41.8245935400001, 
    41.8248955400001, 41.82528354, 41.82526154, 41.82571854, 
    41.82611554, 41.826157541, 41.8269165400001, 41.82699454, 
    41.82706154, 41.8271415410001, 41.8271605410001, 41.827177541, 
    41.827290541, 41.826901541, 41.8263745400001, 41.8255435410001, 
    41.8251205400001, 41.82476854, 41.8239185400001, 41.82381854, 
    41.8237835400001, 41.82375854, 41.82375754, 41.82375454, 
    41.82375454, 41.82375454, 41.82460854, 41.8246045400001, 
    41.8245935400001), .Dim = c(30L, 2L))), class = c("XY", "POLYGON", 
"sfg"))), class = c("sfc_POLYGON", "sfc"), precision = 0, bbox = structure(c(xmin = -71.404716518, 
ymin = 41.82375454, xmax = -71.4001465169999, ymax = 41.827290541
), class = "bbox"), crs = structure(list(input = "EPSG:4326", 
    wkt = "GEOGCS[\"WGS 84\",\n    DATUM[\"WGS_1984\",\n        SPHEROID[\"WGS 84\",6378137,298.257223563,\n            AUTHORITY[\"EPSG\",\"7030\"]],\n        AUTHORITY[\"EPSG\",\"6326\"]],\n    PRIMEM[\"Greenwich\",0,\n        AUTHORITY[\"EPSG\",\"8901\"]],\n    UNIT[\"degree\",0.0174532925199433,\n        AUTHORITY[\"EPSG\",\"9122\"]],\n    AUTHORITY[\"EPSG\",\"4326\"]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
-1L), sf_column = "geometry", agr = structure(c(OBJECTID = NA_integer_, 
NAME = NA_integer_, COUNTY = NA_integer_, OSP = NA_integer_, 
TWNCODE = NA_integer_, LAND = NA_integer_, Shape__Are = NA_integer_, 
Shape__Len = NA_integer_, OBJECTID.1 = NA_integer_, ZCTA5CE10 = NA_integer_, 
Shape__Are.1 = NA_integer_, Shape__Len.1 = NA_integer_, zip = NA_integer_
), class = "factor", .Label = c("constant", "aggregate", "identity"
)), class = c("tbl_df", "tbl", "data.frame"))
library(sf)
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
geo_df <- st_as_sf(df)
plot(geo_df)

Created on 2020-05-22 by the reprex package (v0.3.0)

Thank you @StateSteph for your response! I appreciate your help!
Sorry for not being clear in the post. I've uploaded my data here: [Dropbox - File Deleted - Simplify your life]
This is an intersected dataframe of providence zip code shape file and providence municipality shape file. I'm focusing on providence.

What I'm really trying to do is to fill the polygon based on one variable (coef38quit), and draw a border line based on another variable, in my case, I want to draw border line based on id, not geometry. id is a new variable grouped based on zip code areas.

So what I tried is to summarize by id (code below), I used it before for intact shapefiles and it worked. But this time I got an error saying "stat_sf requires the following missing aesthetics: geometry".
That's why I'm trying to convert the geometry list column to points or combine the geometry column by id, so that I can create the border line.
I feel like I'm missing something basic. Could you please point to me what I have missed?

library(dplyr)
library(ggplot2)
library(sf)
# group by id
prtown %>% 
  group_by(id) %>% 
  summarise()

# fill polygon by coef38quit, and overlay another layer for the border, based on id
ggplot(prtown) +
  geom_sf(aes(geometry = prtown$geometry,fill=coef38quit), color = "grey40", size = 0.5)+
  geom_sf(fill = "transparent", color = "red", size = 1, 
          data = . %>% group_by(id) %>% summarise())
# Error: stat_sf requires the following missing aesthetics: geometry

Thank you @mfherman for your response! Please see my response above.

I took a look at the file and the issue is that your input data is not actually an sf object, but rather just a tibble/data frame with a geometry column. Try converting prtown to an sf object using st_as_sf() and your code should work.

library(sf)
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(ggplot2)
library(dplyr)

load("C:/users/tc6836/Downloads/prtown.Rda")

prtown %>% 
  st_as_sf() %>% 
  ggplot() +
  geom_sf(aes(fill = factor(coef38quit)), color = "grey40", size = 0.5) +
  geom_sf(data = . %>% count(id), fill = NA, size = 1, color = "red")

Created on 2020-05-22 by the reprex package (v0.3.0)

Thank you @mfherman very much for your help! That's why! Also I should use count instead of summarise. That has resolved my problem and thank you so much for your guidance!

Glad it helped! count() is just a short cut for the common group_by() %>% summarize() pattern, so it will work either way.

I'd also say that depending on how the rest of your code/project is structured, you might think about creating a separate sf object for each geography. This way you can be a little more explicit about what you are plotting. It would look something like this:

pr_zip <- prtown %>% 
  st_as_sf()

pr_mun <- pr_zip %>% 
  count(id)

pr_zip %>% 
  ggplot() +
  geom_sf(aes(fill = factor(coef38quit)), color = "grey40", size = 0.5) +
  geom_sf(data = pr_mun, fill = NA, size = 1, color = "red")

Thank you @mfherman for your helpful advice! I will definitely do it!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.