Help with loops!

Hi there!

I'm learning how to write loops. I am trying to calculate the rolling average of several columns and add them back into the original dataframe for several different sites.
The rollmean() line of code is exactly what I want it to be.

The problem I'm running into is that the current code works for one column but I can't get the rolling average for more than one column.

The final output would be the original df with two columns added titled 'roll.os' and 'roll.so4'.

Any help is appreciated. Thanks so much!

text <- "SiteNumber os  SO4 Date
3   1   1   1/1/2000
3   1   1   1/2/2000
3   1   1   1/3/2000
3   2   2   1/4/2000
3   2   2   1/5/2000
3   2   2   1/6/2000
3   3   3   1/7/2000
3   3   3   1/1/2000
7   3   3   1/2/2000
7   4   4   1/3/2000
7   4   4   1/4/2000
7   4   4   1/5/2000
7   5   5   1/6/2000
7   5   5   1/7/2000
7   5   5   1/8/2000
7   6   6   1/9/2000
"
df <- read.delim(text=text, header=TRUE)

library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
df$Date <- mdy(df$Date)

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:lubridate':
#> 
#>     intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
df <- df %>% 
  mutate(SiteNum = case_when(
    SiteNumber == "7" ~ 1L,
    SiteNumber == "3" ~ 2L,)
  )

library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
all_so4 = data.frame()

for(i in 1:2) {
  site <- subset(df, df$SiteNum==i)
  site$roll.so4 <- rollmean(site$SO4, 3, na.rm=TRUE , fill=NA, align="right")
  all <- data.frame(site)
  all_so4 <- rbind(all_so4, all)
}

# this is the final desired output
"SiteNumber  os  SO4 Date    roll.os roll.so4
3   1   1   1/1/2000    NA  NA
3   1   1   1/2/2000    NA  NA
3   1   1   1/3/2000    1   1
3   2   2   1/4/2000    1.33    1.33
3   2   2   1/5/2000    1.67    1.67
3   2   2   1/6/2000    2.00    2.00
3   3   3   1/7/2000    2.33    2.33
3   3   3   1/1/2000    2.67    2.67
7   3   3   1/2/2000    NA  NA
7   4   4   1/3/2000    NA  NA
7   4   4   1/4/2000    3.67    3.67
7   4   4   1/5/2000    4.00    4.00
7   5   5   1/6/2000    4.33    4.33
7   5   5   1/7/2000    4.67    4.67
7   5   5   1/8/2000    5.00    5.00
7   6   6   1/9/2000    5.33    5.33
Created on 2020-06-23 by the reprex package (v0.3.0)

Admittedly, i didn't really understand your expected output format. Do you really want a string to be output, instead of a tabular form. My solution below will give you a tibble, which you can convert if required.

And, one more point is that I didn't get the column names as you want easily. Sure, I could have used rename later, but I didn't as seemed unnecessary.

You can try something like this:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric

data_text <- "SiteNumber os  SO4 Date
3   1   1   1/1/2000
3   1   1   1/2/2000
3   1   1   1/3/2000
3   2   2   1/4/2000
3   2   2   1/5/2000
3   2   2   1/6/2000
3   3   3   1/7/2000
3   3   3   1/1/2000
7   3   3   1/2/2000
7   4   4   1/3/2000
7   4   4   1/4/2000
7   4   4   1/5/2000
7   5   5   1/6/2000
7   5   5   1/7/2000
7   5   5   1/8/2000
7   6   6   1/9/2000"

data_df <- read.table(text = data_text,
                      header = TRUE)

data_df %>%
    mutate(Date = mdy(Date)) %>%
    group_by(SiteNumber) %>%
    mutate_at(.vars = vars(os, SO4),
              .funs = list(roll = ~ rollmean(x = .x,
                                             k = 3,
                                             fill = NA,
                                             align="right"))) %>%
    ungroup()
#> # A tibble: 16 x 6
#>    SiteNumber    os   SO4 Date       os_roll SO4_roll
#>         <int> <int> <int> <date>       <dbl>    <dbl>
#>  1          3     1     1 2000-01-01   NA       NA   
#>  2          3     1     1 2000-01-02   NA       NA   
#>  3          3     1     1 2000-01-03    1        1   
#>  4          3     2     2 2000-01-04    1.33     1.33
#>  5          3     2     2 2000-01-05    1.67     1.67
#>  6          3     2     2 2000-01-06    2        2   
#>  7          3     3     3 2000-01-07    2.33     2.33
#>  8          3     3     3 2000-01-01    2.67     2.67
#>  9          7     3     3 2000-01-02   NA       NA   
#> 10          7     4     4 2000-01-03   NA       NA   
#> 11          7     4     4 2000-01-04    3.67     3.67
#> 12          7     4     4 2000-01-05    4        4   
#> 13          7     5     5 2000-01-06    4.33     4.33
#> 14          7     5     5 2000-01-07    4.67     4.67
#> 15          7     5     5 2000-01-08    5        5   
#> 16          7     6     6 2000-01-09    5.33     5.33

Hope this helps.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.