Calculating the difference in each group

Hello, I have a dataframe containing date, price, and a binary group (1/0 for discount or not discount) as show in the picture below

Screenshot 2021-09-27 205623

structure(list(date = structure(c(16443, 16444, 16447, 16448, 
16449, 16457, 16458, 16461, 16462, 16463, 16464, 16465, 16468, 
16469, 16470, 16471, 16472, 16475, 16476, 16484, 16437, 16440, 
16441, 16442, 16450, 16451, 16454, 16455, 16456, 16477, 16478, 
16479, 16482, 16483, 16492, 16493, 16496, 16497, 16498, 16520
), class = "Date"), price = c(8.066, 7.918, 7.856, 7.82, 7.828, 
7.37, 7.283, 7.299, 7.25, 7.228, 7.219, 7.168, 7.118, 7.031, 
7.098, 7.11, 7.156, 7.189, 7.195, 7.195, 7.859, 7.964, 7.942, 
8.117, 7.791, 7.785, 7.789, 7.446, 7.401, 7.412, 7.529, 7.495, 
7.456, 7.393, 7.104, 7.062, 7.081, 7.115, 7.289, 7.378), discount = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -40L), groups = structure(list(
    discount = c(0, 1), .rows = structure(list(1:20, 21:40), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

A discount group represents individual period which has various lengths of days (it can be thought as discount week).

I would like to calculate the difference between first and last price in each group of discount.
Any advise would be appreciated.

How about this ?

library(tidyverse)

df <- structure(list(date = structure(c(16443, 16444, 16447, 16448, 
                                  16449, 16457, 16458, 16461, 16462, 16463, 16464, 16465, 16468, 
                                  16469, 16470, 16471, 16472, 16475, 16476, 16484, 16437, 16440, 
                                  16441, 16442, 16450, 16451, 16454, 16455, 16456, 16477, 16478, 
                                  16479, 16482, 16483, 16492, 16493, 16496, 16497, 16498, 16520), 
                                class = "Date"), 
               price = c(8.066, 7.918, 7.856, 7.82, 7.828, 
                        7.37, 7.283, 7.299, 7.25, 7.228, 7.219, 7.168, 7.118, 7.031, 
                        7.098, 7.11, 7.156, 7.189, 7.195, 7.195, 7.859, 7.964, 7.942, 
                        8.117, 7.791, 7.785, 7.789, 7.446, 7.401, 7.412, 7.529, 7.495, 
                        7.456, 7.393, 7.104, 7.062, 7.081, 7.115, 7.289, 7.378), 
               discount = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), 
          class = c("grouped_df", "tbl_df", "tbl", "data.frame"), 
          row.names = c(NA, -40L), 
          groups = structure(list(discount = c(0, 1), .rows = structure(list(1:20, 21:40), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))),
                             row.names = c(NA, -2L),
                             class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))


df %>% 
  ungroup %>%
  arrange(date) %>% 
  mutate(
    discount_lag = lag(discount, default = 0),
    id = cumsum(discount != discount_lag)
  ) %>% 
  select(-discount_lag) %>% 
  group_by(id) %>% 
  mutate(
    edge = case_when(
      date == min(date) ~ "from",
      date == max(date) ~ "to",
      T ~ NA_character_
      )
  ) %>% 
  ungroup %>% 
  filter(!is.na(edge)) %>% 
  pivot_wider(
    names_from = edge, 
    values_from = c(date, price),
  ) %>% 
  select(-id) %>% 
  mutate(
    difference = price_to - price_from
  )

# # A tibble: 7 x 6
# discount date_from  date_to    price_from price_to difference
# <dbl> <date>     <date>          <dbl>    <dbl>      <dbl>
# 1        1 2015-01-02 2015-01-07       7.86     8.12     0.258 
# 2        0 2015-01-08 2015-01-14       8.07     7.83    -0.238 
# 3        1 2015-01-15 2015-01-21       7.79     7.40    -0.390 
# 4        0 2015-01-22 2015-02-10       7.37     7.20    -0.175 
# 5        1 2015-02-11 2015-02-17       7.41     7.39    -0.0190
# 6        0 2015-02-18 NA               7.20    NA       NA     
# 7        1 2015-02-26 2015-03-26       7.10     7.38     0.274 
1 Like

Hi seaside_flog, yes this works perfectly with clear code and familiar tidyverse library! Thanks a lot

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.