Count average spending by customer within specific time zone and assign them into different tier group

I have the dataset look like below. and i am stuck at counting the dates within last 3 months and then name people by tier based on their avg. expense.

library(tidyverse)
library(lubridate)


name <- c('Mary','Sue','Peter','Mary','Mary','John','Sue',
          'Peter','Peter','John','John','John','Mary','Mary',
          'John','Mary','Peter','Sue')
date <- c('01/04/2018','03/02/2017','01/01/2019','24/04/2017',
          '02/03/2019','31/05/2019','08/09/2019','17/12/2019',
          '02/08/2017','10/11/2017','30/12/2017','18/02/2018',
          '18/02/2018','18/10/2019','30/04/2019','18/09/2019',
          '17/11/2019','08/08/2019'
          )
expense <- c('300','450','550','980',
          '787','300','2343','233',
          '932','44','332','432',
          '786','345','567','290','345','876')

data <- data.frame(name,
                   date=lubridate::dmy(date),expense)

library(dplyr)

data$expense <- as.numeric(as.character(data$expense))

data %>% 
  group_by(name) %>%
  summarise(last_3_month_expense = mean(expense[date > max(date) - 90], na.rm = TRUE), 
            mean_expense = mean(expense, na.rm = TRUE)
            ,playdays = rowSums(expense[date > max(date) - 90])
            )

Is this part of what you are looking for. I did not understand what you re trying to do with the rowSums so I left it out of this code.

library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union


name <- c('Mary','Sue','Peter','Mary','Mary','John','Sue',
          'Peter','Peter','John','John','John','Mary','Mary',
          'John','Mary','Peter','Sue')
date <- c('01/04/2018','03/02/2017','01/01/2019','24/04/2017',
          '02/03/2019','31/05/2019','08/09/2019','17/12/2019',
          '02/08/2017','10/11/2017','30/12/2017','18/02/2018',
          '18/02/2018','18/10/2019','30/04/2019','18/09/2019',
          '17/11/2019','08/08/2019'
)
expense <- c('300','450','550','980',
             '787','300','2343','233',
             '932','44','332','432',
             '786','345','567','290','345','876')

data <- data.frame(name,
                   date=lubridate::dmy(date),expense)

library(dplyr)

data$expense <- as.numeric(as.character(data$expense))

data2 <- data %>% 
  group_by(name) %>%
  mutate(Within90Days = date > (max(date) - 90)) %>% 
  summarise(mean_last_3_month = sum(expense * Within90Days, na.rm = TRUE)/sum(Within90Days), 
            mean_expense = mean(expense, na.rm = TRUE))
            #,playdays = rowSums(expense[date > max(date) - 90])

data2
#> # A tibble: 4 x 3
#>   name  mean_last_3_month mean_expense
#>   <fct>             <dbl>        <dbl>
#> 1 John               434.         335 
#> 2 Mary               318.         581.
#> 3 Peter              289          515 
#> 4 Sue               1610.        1223

Created on 2020-05-16 by the reprex package (v0.3.0)

Thanks, how to add one more column to show how many days within 90 days for each person.

library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union


name <- c('Mary','Sue','Peter','Mary','Mary','John','Sue',
          'Peter','Peter','John','John','John','Mary','Mary',
          'John','Mary','Peter','Sue')
date <- c('01/04/2018','03/02/2017','01/01/2019','24/04/2017',
          '02/03/2019','31/05/2019','08/09/2019','17/12/2019',
          '02/08/2017','10/11/2017','30/12/2017','18/02/2018',
          '18/02/2018','18/10/2019','30/04/2019','18/09/2019',
          '17/11/2019','08/08/2019'
)
expense <- c('300','450','550','980',
             '787','300','2343','233',
             '932','44','332','432',
             '786','345','567','290','345','876')

data <- data.frame(name,
                   date=lubridate::dmy(date),expense)

library(dplyr)

data$expense <- as.numeric(as.character(data$expense))

data2 <- data %>% 
  group_by(name) %>%
  mutate(Within90Days = date > (max(date) - 90)) %>% 
  summarise(mean_last_3_month = sum(expense * Within90Days, na.rm = TRUE)/sum(Within90Days), 
            mean_expense = mean(expense, na.rm = TRUE),
            N_ddays = sum(Within90Days))
            #,playdays = rowSums(expense[date > max(date) - 90])

data2
#> # A tibble: 4 x 4
#>   name  mean_last_3_month mean_expense N_ddays
#>   <fct>             <dbl>        <dbl>   <int>
#> 1 John               434.         335        2
#> 2 Mary               318.         581.       2
#> 3 Peter              289          515        2
#> 4 Sue               1610.        1223        2

Created on 2020-05-16 by the reprex package (v0.3.0)

Thank you. that's what i need.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.