Large number of data points shiny/leaflet

emperorfish · December 29, 2017, 9:52am

Hi Im new to shiny and was wondering what the options were to display a large number of points on a leaflet map within a shiny app? At the moment Im using the addCircles function for 300000 records and this hangs. Thanks in advance.

jrlewi · December 29, 2017, 1:31pm

Hello @emperorfish,
It might help to provide a small example of exactly what you are trying to do (see https://www.tidyverse.org/help/ regarding reprex's).

Cheers!

emperorfish · December 29, 2017, 2:39pm

Hi John
Thanks for your help... this is an example below...

library(shiny)
library(leaflet)


ui=shinyUI(fluidPage(
  

  titlePanel("Test tracks"),
  
  sidebarLayout(
    sidebarPanel(
      checkboxGroupInput(inputId="shps.select", label="Select layer",
                         choices=c("tracks" = "tracks", "MPA" = "mpa")),
      dateRangeInput("dates", 
                     "Date range",
                     start = Sys.Date() - 7,
                     end =   Sys.Date(),
                     min    = Sys.Date() - 7,
                     max    = Sys.Date(),
                     format = "dd/mm/yyyy",
                     separator = " - ")
    ),
    
  
    mainPanel(
      leafletOutput("map", width = "100%", height = 400)
    )
  )
))

coords <- list(
  c(-122.36075812146,  47.6759920119894),
  c(-122.360781646764, 47.6668890126755),
  c(-122.360782108665,  47.6614990696722),
  c(-122.366199035722, 47.6614990696722),
  c(-122.366199035722,  47.6592874248973),
  c(-122.364582509469, 47.6576254522105),
  c(-122.363887331445,  47.6569107302038),
  c(-122.360865528129, 47.6538418253251),
  c(-122.360866157644,  47.6535254473167),
  c(-122.360866581103, 47.6533126275176),
  c(-122.362526540691,  47.6541872926348),
  c(-122.364442114483, 47.6551892850798),
  c(-122.366077719797,  47.6560733960606),
  c(-122.368818463838, 47.6579742346694),
  c(-122.370115159943,  47.6588730808334),
  c(-122.372295967029, 47.6604350102328),
  c(-122.37381369088,  47.660582362063),
  c(-122.375522972109, 47.6606413027949),
  c(-122.376079703095,  47.6608793094619),
  c(-122.376206315662, 47.6609242364243),
  c(-122.377610811371,  47.6606160735197),
  c(-122.379857378879, 47.6610306942278),
  c(-122.382454873022,  47.6627496239169),
  c(-122.385357955057, 47.6638573778241),
  c(-122.386007328104,  47.6640865692306),
  c(-122.387186331506, 47.6654326177161),
  c(-122.387802656231,  47.6661492860294),
  c(-122.388108244121, 47.6664548739202),
  c(-122.389177800763,  47.6663784774359),
  c(-122.390582858689, 47.6665072251861),
  c(-122.390793942299,  47.6659699214511),
  c(-122.391507906234, 47.6659200946229),
  c(-122.392883050767,  47.6664166747017),
  c(-122.392847210144, 47.6678696739431),
  c(-122.392904778401,  47.6709016021624),
  c(-122.39296705153, 47.6732047491624),
  c(-122.393000803496,  47.6759322346303),
  c(-122.37666945305, 47.6759896300663),
  c(-122.376486363943,  47.6759891899754),
  c(-122.366078869215, 47.6759641734893),
  c(-122.36075812146,  47.6759920119894)
)

coords <- as.data.frame(do.call("rbind", coords))
colnames(coords) <- c("longitude", "latitude")


start <- as.POSIXct(Sys.time())
end <- start - as.difftime(7, units = "days")
length <- nrow(coords)
exampledates <- seq(from = start, to = end, length.out = length)
coords$timestamp <- exampledates


mpaData <- coords[1,-3] # example data 
mpaPoints <- sf::st_as_sf(mpaData, coords = c("longitude", "latitude"))
mpaPolygon <- sf::st_buffer(x = mpaPoints, 0.02)

server = shinyServer(function(input, output) {
  
  output$map <- renderLeaflet({ leaflet() %>% addTiles() %>% setView(-122.37, 47.6659, zoom = 14)})
  
  observe({
    
    # Create map
    map <- leafletProxy("map")
    map %>% clearShapes()
    
    # Get select inputs
    shps.select <- input$shps.select # the function is triggered when the select option changes
    
    if (length(shps.select) > 0) {
      
      if ('tracks' %in% shps.select) {
        
        dates <- input$dates # triggers this function when you update dates
        dates <- as.POSIXct(dates)
        
        # Query data by input dates
        subset <- subset(coords, timestamp >= dates[1] & timestamp <= dates[2])
        
        leafletProxy("map")  %>% addCircles(group = "idTrack", data = subset, radius = 20, lng = subset$longitude, lat = subset$latitude, weight = 1, color = "black", fillColor = "black", fillOpacity = 1)
      } 
      else {
        leafletProxy("map")  %>% clearGroup("idTrack")
      }
      if ('mpa' %in% shps.select) {
        # Add marine protected areas 
        leafletProxy("map")  %>% addPolygons(group = "idMPA", lng = sf::st_coordinates(mpaPolygon)[,1], lat = sf::st_coordinates(mpaPolygon)[,2])
      }
      else {
        leafletProxy("map")  %>% clearGroup(group = "idMPA")
      }
    }  
  })
  
  
})
shinyApp(ui = ui, server = server)

jrlewi · December 29, 2017, 3:17pm

the format is still a little funky and makes it hard to tell what is code and what is not. Can you make sure all of your code is enclosed in ``.

this is code

this is not.

You can just paste your code, highlight it and this hit the </> button. The goal is to make it easy for folks to copy and paste your example so they can reproduce it.

emperorfish · December 29, 2017, 3:53pm

Ok John
Please see above.

jrlewi · December 29, 2017, 4:01pm

does reducing the radius parameter within addCircles reduce the hanging?

emperorfish · December 29, 2017, 4:22pm

No I tried that. I have the radius at 0.03... i just think its too many data points to load. Maybe just sampling the data would do although i would lose the track detail?

jrlewi · December 29, 2017, 4:29pm

okay - yea...my hunch is that at some point there are too many points and there will be some hanging...

If they are tracks...does it make sense to connect them with lines not points? Then subsetting might not cause a loss of detail?

emperorfish · December 29, 2017, 4:53pm

Yes thanks John, I tried this and its actually not too bad... I just used the sample function to sample 5000 points it loads almost immediately...

edgararuiz · December 29, 2017, 5:12pm

Hi, may I recommend grouping the data points by long/lat prior to sending them to the plot? Since it looks like you have no transparency setup for the dots, then the plot should not keep on plotting one dot on the same spot over and over

A step beyond, would be to cluster the dots, using something like kmeans() and set the size of the circle based on how many data points are inside a given cluster.

Hope this helps!

emperorfish · December 29, 2017, 5:34pm

Hi Edgar
Thanks for your contribution, what happens if the points arent overlapping? So you are suggesting clustering, could I for instance use addclustermakers to get a similar result?

Thanks.

lbusett · December 29, 2017, 6:10pm

You may consider also to convert the points to a multiline geometry withst_cast and then simplifying it using st_simplify.

emperorfish · December 29, 2017, 7:02pm

Please could you share some code as to how to do this? Im not overly familiar with sf package. I have tried this approach a while back.. st_as_sf(dat, coords = c("longitude", "latitude"),
crs = 4326, agr = "constant")
but this also took a while to load and froze the screen.

Thanks a lot.

pauloldham · December 30, 2017, 11:46am

Thanks @emporerfish for posting this question. I often work with species occurrence datasets with a few hundred thousand coordinate points or above. I think you are pointing to a more general problem that others experience when we get to around 30,000 data points with Leaflet.

Clustering as suggested by @edgararuiz is one option while @lbusett suggestion sounded promising for your data. However, I think there is a more general issue with how to address large scale data sets with Leaflet/Shiny in R without clustering… or whether a different approach is more appropriate… where suggestions from the community would be very welcome. At present I download and prepare the data in R and then import to Tableau as a very easy mapping solution. It works but I would prefer a different solution without becoming trapped in Tableau.

I am not sure this is of help to you but as a relative newbie with Leaflet and Shiny I prepared some walkthroughs earlier this year. Far from perfect but maybe of some general help to those of us puzzling our way forward with Leaflet/Shiny maps and the scale issue.

emperorfish · December 30, 2017, 1:12pm

Thanks @pauloldham for this very useful posting, your work and app look very interesting.

lbusett · January 4, 2018, 2:45pm

Hi,

sorry for the late reply, but I was off-grid for a few days. Hope this is still useful.

Below, you can find an example app showing how you could use sf::st_sample_line to automatically subsample your "points" according to a "density" parameter. Note that here I am assuming that your objective is to be able to plot "tracks", which I supposed are something akin to a gps path. Therefore, I sligtly modified your example, in order to use a "test" gpx file downloaded from the web (http://software.frankingermann.de/), which is then converted to a sf LINESTRING geometry (If I interpreted wrongly your intent, this will therefore be probably useless to you... but could be useful to someone else... )

Running the app should allow you to see how the sf::st_line_sample function can be used to simplify your track by adjusting the density parameter, thus improving the rendering speed. After the "simplification", you can decide whether to plot directly the "line", or to go back to a "point representation" (which is however slower to render). Obviously, in a real application the "simplification" step should be done "beforehand".

An advantage with repect to a "random sampling" approach is that st_line_sample should allow a more reasonable subsampling of the point data, since it takes into account the distance between points. A small caveat is that you'd need to go back and forth between the 4326 lat/lon projection needed by leaflet, and a metric projection needed for st_line_sample to work.

HTH !

library(shiny)
library(leaflet)
library(sf)
librry(tibble)
#> Linking to GEOS 3.6.1, GDAL 2.2.0, proj.4 4.9.3
library(httr)

# ____________________________________________________________________________
# Download a test gpx track and convert it to a `sf` LINEsTRING
tmp <- tempfile(fileext = ".zip")
get_zip <- httr::GET('http://software.frankingermann.de/images/gpxtracks/kuhkopfsteig-fv.zip', 
                     httr::write_disk(tmp))
in_track <- unzip(tmp) %>% sf::st_read(layer = "track_points", quiet = TRUE)

track_data <- tibble::tibble(track_id  = 1,
                             timestamp = in_track$time,
                             x = sf::st_coordinates(in_track)[,1],
                             y = sf::st_coordinates(in_track)[,2]) %>% 
  sf::st_as_sf(coords = c("x","y")) %>% 
  sf::st_set_crs(4326) %>% 
  # NOTE: Here we have to convert to a metric projection because `st_line_sample`
  # only works with projected data
  sf::st_transform(3857) %>%
  # This needed to transform the "points" to a LINESTRING
  dplyr::group_by(track_id) %>% 
  dplyr::summarise(do_union = FALSE) %>% 
  sf::st_cast("LINESTRING")
track_data
#> Simple feature collection with 1 feature and 1 field
#> geometry type:  LINESTRING
#> dimension:      XY
#> bbox:           xmin: 717227.3 ymin: 6568866 xmax: 719747.2 ymax: 6571670
#> epsg (SRID):    3857
#> proj4string:    +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
#>   track_id                       geometry
#> 1        1 LINESTRING (717522.70749015...


# ____________________________________________________________________________
# Now set-up a shiny app showing the effect of simplifying the linestring
# using the `density` argument.

ui = shinyUI(fluidPage(
  titlePanel("Test tracks"),
  sidebarLayout(
    sidebarPanel(
      checkboxGroupInput(inputId = "shps.select", label="Select layer",
                         choices = c("line" = "line", "points" = "points")),
      sliderInput("density", "Points Density:", 50, min = 1, max = 250)
    ),
    mainPanel(leafletOutput("map", width = "100%", height = 400))
  )
))

server = shinyServer(function(input, output) {
  
  output$map <- renderLeaflet({ leaflet() %>% 
      addTiles() %>% 
      setView(6.45471, 50.71, zoom = 14)})
  
  observe({
    
    # Create map
    map <- leafletProxy("map")
    map %>% clearShapes()
    
    # Get select inputs
    shps.select <- input$shps.select # the function is triggered when the select option changes
    
    if (length(shps.select) > 0) {
      if ('lines' %in% shps.select) {
        density <- input$density # triggers this function when you update density
        
        # "sample" the linestring according to selected density
        linetrack <- track_data %>%
          sf::st_line_sample(density = 1/density, type = "regular") %>%
          sf::st_transform(4326) %>% 
          st_cast("LINESTRING")
        
        leafletProxy("map")  %>% addPolylines(group = "track_id",
                                              data = linetrack)
      }
      
      if ('points' %in% shps.select) {
        
        density <- input$density # triggers this function when you update dates
        
        # "sample" the linestring according to selected "density" and go back to
        #  points representation        
        track <- track_data %>%
          sf::st_line_sample(density = 1/density) %>%
          sf::st_cast("POINT") %>%
          sf::st_transform(4326)
        
        leafletProxy("map")  %>% addCircles(group = "track_id",
                                            data = track)
      }
    }  
  })
})

shinyApp(ui = ui, server = server)

emperorfish · January 5, 2018, 6:53am

Thankyou so much this works very well and much better than just sampling. I appreciate your time to show this.