Want to make 'moving' sum of a column in a data frame with group of records and number of records per set.

I want to do an operation on a data frame like this

The sum of three values is not specific for 3 only. The sum is taken from the reverse order.

Thanking you for your support

For the first (left) set of records I have generated cumulative sum of 'values' column for each ID

xyz = data.frame(
  "ID"=c("A","A","A","A","A","B","B","B","B","B","B","B","C","C","C","C"),"Value"=as.integer(rnorm(16,mean = 5,sd = 5)))

xyz1=xyz

A=cumsum(xyz1$Value[xyz1$ID=="A"]  )
B = cumsum(xyz1$Value[xyz1$ID=="B"])  
C = cumsum(xyz1$Value[xyz1$ID=="C"])

abc=vector(length=nrow(xyz))

abc[1:5]=A
abc[6:12]=B
abc[13:16]=C

xyz1=cbind(xyz,abc)

# How to repeat this for large no. of ID's like in 1000's

But, for 2nd set of records (on the right) I have to make cumulative sum of three records at a time. Like in table column (Column_to_make) the last record of A has sum of last 3 values (7+7+8)==22
the above record is sum of (7+8+4)==19. and so on...…….

How can I automate the method of cumsum as described above for large number of ID's like in 1000's.

Thanking You for your time.

Did you mean to include an abs in generating the Value column? I assumed that since the picture in question has all natural numbers, but you can change it easily.

# setting seed
set.seed(seed = 25845)

# set of records
xyz <- data.frame("ID" = c("A","A","A","A","A","B","B","B","B","B","B","B","C","C","C","C"),
                  "Value" = abs(x = as.integer(x = rnorm(n = 16,
                                                         mean = 5,
                                                         sd = 5))))

# first problem
unique_IDs <- unique(x = xyz$ID)
temp <- vector(mode = "list",
               length = length(x = unique_IDs))
for (counter in seq_along(along.with = unique_IDs))
{
  temp[[counter]] <- cumsum(x = subset(x = xyz,
                                       subset = (ID == unique_IDs[counter]),
                                       select = Value))
}
(xyz1 <- cbind(xyz, Column_to_make = unlist(x = temp)))
#>    ID Value Column_to_make
#> 1   A     7              7
#> 2   A    10             17
#> 3   A     7             24
#> 4   A     4             28
#> 5   A     0             28
#> 6   B     9              9
#> 7   B     7             16
#> 8   B     9             25
#> 9   B     3             28
#> 10  B    10             38
#> 11  B     8             46
#> 12  B    13             59
#> 13  C     1              1
#> 14  C    12             13
#> 15  C     0             13
#> 16  C     8             21

# second problem
(xyz2 <- within(data = xyz,
                expr = {
                  Column_to_make <- c(Value[1:2],
                                      zoo::rollsum(x = Value,
                                                   k = 3))
                }))
#>    ID Value Column_to_make
#> 1   A     7              7
#> 2   A    10             10
#> 3   A     7             24
#> 4   A     4             21
#> 5   A     0             11
#> 6   B     9             13
#> 7   B     7             16
#> 8   B     9             25
#> 9   B     3             19
#> 10  B    10             22
#> 11  B     8             21
#> 12  B    13             31
#> 13  C     1             22
#> 14  C    12             26
#> 15  C     0             13
#> 16  C     8             20
1 Like

This is another way to do it without loops

set.seed(seed = 25845)

xyz <- data.frame("ID" = c("A","A","A","A","A","B","B","B","B","B","B","B","C","C","C","C"),
                  "Value" = abs(x = as.integer(x = rnorm(n = 16,
                                                         mean = 5,
                                                         sd = 5))))
library(dplyr)
library(zoo)

xyz %>% 
    group_by(ID) %>% 
    mutate(column_to_make = cumsum(Value))
#> # A tibble: 16 x 3
#> # Groups:   ID [3]
#>    ID    Value column_to_make
#>    <fct> <int>          <int>
#>  1 A         7              7
#>  2 A        10             17
#>  3 A         7             24
#>  4 A         4             28
#>  5 A         0             28
#>  6 B         9              9
#>  7 B         7             16
#>  8 B         9             25
#>  9 B         3             28
#> 10 B        10             38
#> 11 B         8             46
#> 12 B        13             59
#> 13 C         1              1
#> 14 C        12             13
#> 15 C         0             13
#> 16 C         8             21

xyz %>% 
    group_by(ID) %>% 
    mutate(column_to_make = rollapplyr(Value, 3, sum, partial = TRUE))
#> # A tibble: 16 x 3
#> # Groups:   ID [3]
#>    ID    Value column_to_make
#>    <fct> <int>          <int>
#>  1 A         7              7
#>  2 A        10             17
#>  3 A         7             24
#>  4 A         4             21
#>  5 A         0             11
#>  6 B         9              9
#>  7 B         7             16
#>  8 B         9             25
#>  9 B         3             19
#> 10 B        10             22
#> 11 B         8             21
#> 12 B        13             31
#> 13 C         1              1
#> 14 C        12             13
#> 15 C         0             13
#> 16 C         8             20

Created on 2019-03-12 by the reprex package (v0.2.1)

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.