In this part
d %>%
group_by(run) %>%
mutate(dists = get_dists(.))
I think the . is understood as the %>% one so it is replaced by the all data.frame d not just the subset group data. It is why the dimension is 6 not 3. mutate works well with vector.
With your current function taking a data.frame, you can use tidyr::nest to create a list column with grouped data and apply you function with map on the list -column
library(dplyr, warn.conflicts = F)
library(tidyr)
# custom function for getting distances between latitude and longitude points
# REQUIRES GEOSPHERE
get_dists <- function(dat_in, lon = 'lon', lat = 'lat'){
dat <- dat_in[,c(lon, lat)]
names(dat) <- c('lon', 'lat')
out <- sapply(2:nrow(dat), function(y){geosphere::distm(dat[y-1,], dat[y,])/1000})
out <- c(0, cumsum(out))
return(out)
}
# create fake data
d <- data_frame(run = c(1, 1, 1, 2, 2, 2),
lat = c(57.15508, 57.15521, 57.15520, 52.41278, 52.41283, 52.41317),
lon = c(-2.07886, -2.07886, -2.07887, -4.07803, -4.07806, -4.07858))
d %>%
nest(-run) %>%
group_by(run) %>%
mutate(dists = purrr::map(data, get_dists)) %>%
unnest()
#> # A tibble: 6 x 4
#> # Groups: run [2]
#> run dists lat lon
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.000000000 57.15508 -2.07886
#> 2 1 0.014471534 57.15521 -2.07886
#> 3 1 0.015737917 57.15520 -2.07887
#> 4 2 0.000000000 52.41278 -4.07803
#> 5 2 0.005927023 52.41283 -4.07806
#> 6 2 0.057688123 52.41317 -4.07858
However, you can also skip the nesting part by recreating the data.frame your function work with
d %>%
group_by(run) %>%
mutate(dists = tibble(lat, lon) %>% get_dists)
#> # A tibble: 6 x 4
#> # Groups: run [2]
#> run lat lon dists
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 57.15508 -2.07886 0.000000000
#> 2 1 57.15521 -2.07886 0.014471534
#> 3 1 57.15520 -2.07887 0.015737917
#> 4 2 52.41278 -4.07803 0.000000000
#> 5 2 52.41283 -4.07806 0.005927023
#> 6 2 52.41317 -4.07858 0.057688123
or change your function to take two vectors so that it's work better with grouped-by mutate
get_dists2 <- function(lon, lat){
dat <- tibble(lon, lat)
names(dat) <- c('lon', 'lat')
out <- sapply(2:nrow(dat), function(y){geosphere::distm(dat[y-1,], dat[y,])/1000})
out <- c(0, cumsum(out))
return(out)
}
d %>%
group_by(run) %>%
mutate(dists = get_dists2(lon, lat))
#> # A tibble: 6 x 4
#> # Groups: run [2]
#> run lat lon dists
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 57.15508 -2.07886 0.000000000
#> 2 1 57.15521 -2.07886 0.014471534
#> 3 1 57.15520 -2.07887 0.015737917
#> 4 2 52.41278 -4.07803 0.000000000
#> 5 2 52.41283 -4.07806 0.005927023
#> 6 2 52.41317 -4.07858 0.057688123