Thank you in advance for you help.
Could you please help me to solve this questions ?
I have the following dataset.
What I need is identify all the lines for each visitors_id where the 1st touch point is "Y" and only all their visits AFTER this touch point.
Example :
-for visitor B output will be lines 14 (1st touch point with Y),5,6
-for visitor D output will be lines 8 (1st touch point with Y) and 10. Lines 11 and 3 won't be in the output as they were before the 1st touch point with channel Y (line 8)
Hi @balthordu. You can find the position of the first touch of "Y" by match and remove the row and row before it. Hope the code can help.
library(tidyverse)
sample_data <- data.frame(date = sample(c("date1","date2","date3"), 15, replace = TRUE),
visitor_id = sample(c("A","B","C","D"), 15, replace = TRUE),
channel = sample(c("X", "Y", "Z"), 15, replace = TRUE))
sample_data
#> date visitor_id channel
#> 1 date1 A Z
#> 2 date1 B X
#> 3 date3 D Z
#> 4 date3 D Z
#> 5 date3 B Z
#> 6 date1 C X
#> 7 date1 A Y
#> 8 date2 D Z
#> 9 date3 A Y
#> 10 date1 C Z
#> 11 date3 A Z
#> 12 date3 C Y
#> 13 date3 C Y
#> 14 date3 C X
#> 15 date3 A X
sample_data %>%
arrange(date) %>%
group_by(visitor_id) %>%
nest() %>%
mutate(firstMatch = map_dbl(data, ~{match("Y", .x$channel)})) %>%
filter(!is.na(firstMatch)) %>%
mutate(data = map2(data, firstMatch, ~{.x[-(1:.y),]})) %>%
select(-firstMatch) %>%
unnest(data)
#> # A tibble: 5 x 3
#> # Groups: visitor_id [4]
#> visitor_id date channel
#> <fct> <fct> <fct>
#> 1 A date3 Y
#> 2 A date3 Z
#> 3 A date3 X
#> 4 C date3 Y
#> 5 C date3 X
@balthordu. You can change the code to keep the row of the first touch of "Y" and the row after it.
library(tidyverse)
sample_data <- data.frame(date = sample(c("date1","date2","date3"), 15, replace = TRUE),
visitor_id = sample(c("A","B","C","D"), 15, replace = TRUE),
channel = sample(c("X", "Y", "Z"), 15, replace = TRUE))
sample_data
#> date visitor_id channel
#> 1 date3 D X
#> 2 date1 B Z
#> 3 date2 B X
#> 4 date3 B Z
#> 5 date3 C X
#> 6 date3 C Y
#> 7 date2 D Z
#> 8 date1 C X
#> 9 date1 A Z
#> 10 date2 D Z
#> 11 date1 D Y
#> 12 date1 B Z
#> 13 date1 C Y
#> 14 date1 A X
#> 15 date3 D Z
sample_data %>%
arrange(date) %>%
group_by(visitor_id) %>%
nest() %>%
mutate(firstMatch = map_dbl(data, ~{match("Y", .x$channel)})) %>%
filter(!is.na(firstMatch)) %>%
mutate(data = map2(data, firstMatch, ~{.x[(.y:nrow(.x)),]})) %>%
select(-firstMatch) %>%
unnest(data)
#> # A tibble: 8 x 3
#> # Groups: visitor_id [4]
#> visitor_id date channel
#> <fct> <fct> <fct>
#> 1 C date1 Y
#> 2 C date3 X
#> 3 C date3 Y
#> 4 D date1 Y
#> 5 D date2 Z
#> 6 D date2 Z
#> 7 D date3 X
#> 8 D date3 Z