Hi @jak123,
Sorry, I didn't read your original question carefully enough. This is how you can arrange your customers by "total Sales" and then pick out say, the top 4, and group the rest as "5".
suppressPackageStartupMessages(library(tidyverse))
# Expand the posted data to make example clearer
your_df <- data.frame(
Item.group = rep(c("Breast", "Breast", "Whole", "Whole", "Whole",
"Breast", "Minced", "Thigh"), times=2),
Customer = c("D17", "D17", "D17", "D21", "D7", "D17", "D17", "D17",
"D21","D23","D11","D28","D28","D30","D31","D34"),
Date = c(rep(x = "2018-01-01", times = 8), rep(x = "2018-01-02", times = 8)),
Sales = c(255, 138, 368, NA_integer_, 335, 75, 188, 118,
121, 56, 98, 145, 133, 108, 41, 67),
Campaign.flag = c(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0))
# Assume we want the top 4 customers and the rest lumped together.
your_df %>%
group_by(Customer) %>%
summarise(total_sales = sum(Sales, na.rm=TRUE),
freq = n()) %>%
arrange(desc(total_sales)) %>%
mutate(new_cust = seq(1, nrow(.)),
new_cust = ifelse(new_cust > 5, 5, new_cust)) -> new_df
new_df
#> # A tibble: 9 x 4
#> Customer total_sales freq new_cust
#> <chr> <dbl> <int> <dbl>
#> 1 D17 1142 6 1
#> 2 D7 335 1 2
#> 3 D28 278 2 3
#> 4 D21 121 2 4
#> 5 D30 108 1 5
#> 6 D11 98 1 5
#> 7 D34 67 1 5
#> 8 D23 56 1 5
#> 9 D31 41 1 5
Created on 2021-09-13 by the reprex package (v2.0.1)
Further summarizing is then possible, if required.