I think the following does what you want:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
input_dataset <- data.frame(id = c("a",'a','a','a', "b", "b",'b','b'),
id2 = c("1","2","2","2", "1","1","2",'1'),
text = c("gi", "hi",'ed','ew', "ic",'ia',"ws",'er'),
stringsAsFactors = FALSE)
expected_dataset <- data.frame(id = c("a",'a', "b", "b",'b'),
id2 = c("1", "2", "1","2",'1'),
text = c("gi", "hi ed ew", "ic ia","ws",'er'),
stringsAsFactors = FALSE)
output_dataset <- input_dataset %>%
mutate(ID = paste0(id, id2)) %>%
transform(ID2 = rep.int(x = seq(length(x = rle(x = ID)$values)),
times = rle(x = ID)$lengths)) %>%
group_by(ID2) %>%
mutate(text = paste0(text, collapse = " ")) %>%
ungroup() %>%
distinct() %>%
select(-ID, -ID2) %>%
as.data.frame()
all.equal(target = expected_dataset,
current = output_dataset)
#> [1] TRUE
Created on 2019-03-25 by the reprex package (v0.2.1)