 # Rolling lag based on percentage change?

I am sure there must be a simple solution to this...

I want to conditionally mutate a new column by groups, to create a rolling total based on the `pct_change` column.

``````library(tidyverse)

#simulate some data
df <- tibble(
quarter = c("2009.1", "2009.2", "2009.3", "2009.4", "2010.1", "2010.2", "2010.3", "2010.4"),
group = c("a", "a", "a", "a", "b", "b", "b", "b"),
amount = c(100,NA, NA, NA, 200, NA, NA, NA),
pct_change = c(1.05, 1.06, 1.07, NA, 1.02, 0.99, 1.05, NA)
)

df
#> # A tibble: 8 x 4
#>   quarter group amount pct_change
#>   <chr>   <chr>  <dbl>      <dbl>
#> 1 2009.1  a        100       1.05
#> 2 2009.2  a         NA       1.06
#> 3 2009.3  a         NA       1.07
#> 4 2009.4  a         NA      NA
#> 5 2010.1  b        200       1.02
#> 6 2010.2  b         NA       0.99
#> 7 2010.3  b         NA       1.05
#> 8 2010.4  b         NA      NA

#this doesn't work
df %>%
group_by(group) %>%
mutate(new_amount = case_when(!is.na(amount) ~ amount,
is.na(amount) ~ lag(amount, n=1L)*lag(pct_change, n=1L)))
#> # A tibble: 8 x 5
#> # Groups:   group 
#>   quarter group amount pct_change new_amount
#>   <chr>   <chr>  <dbl>      <dbl>      <dbl>
#> 1 2009.1  a        100       1.05        100
#> 2 2009.2  a         NA       1.06        105
#> 3 2009.3  a         NA       1.07         NA
#> 4 2009.4  a         NA      NA            NA
#> 5 2010.1  b        200       1.02        200
#> 6 2010.2  b         NA       0.99        204
#> 7 2010.3  b         NA       1.05         NA
#> 8 2010.4  b         NA      NA            NA

#neither does this
df %>%
group_by(group) %>%
mutate(new_amount = case_when(!is.na(amount) ~ amount,
is.na(amount) ~ lag(new_amount, n=1L)*lag(pct_change, n=1L)))

#Output should look like this
df <- tibble(
quarter = c("2009.1", "2009.2", "2009.3", "2009.4", "2010.1", "2010.2", "2010.3", "2010.4"),
group = c("a", "a", "a", "a", "b", "b", "b", "b"),
amount = c(100,NA, NA, NA, 200, NA, NA, NA),
pct_change = c(1.05, 1.06, 1.07, NA, 1.02, 0.99, 1.05, NA),
new_amount = c(100, 105, 111.3, 119.091, 200, 204, 201.96, 212.058)
)

df
#> # A tibble: 8 x 5
#>   quarter group amount pct_change new_amount
#>   <chr>   <chr>  <dbl>      <dbl>      <dbl>
#> 1 2009.1  a        100       1.05       100
#> 2 2009.2  a         NA       1.06       105
#> 3 2009.3  a         NA       1.07       111.
#> 4 2009.4  a         NA      NA          119.
#> 5 2010.1  b        200       1.02       200
#> 6 2010.2  b         NA       0.99       204
#> 7 2010.3  b         NA       1.05       202.
#> 8 2010.4  b         NA      NA          212.
``````

Created on 2019-10-23 by the reprex package (v0.3.0)

Many thanks.

Perhaps not the most elegant code since I used a dash of base R, but that seems to do what you want:

``````
library(tidyverse)

#simulate some data
df <- tibble(
quarter = c("2009.1", "2009.2", "2009.3", "2009.4", "2010.1", "2010.2", "2010.3", "2010.4"),
group = c("a", "a", "a", "a", "b", "b", "b", "b"),
amount = c(100,NA, NA, NA, 200, NA, NA, NA),
pct_change = c(1.05, 1.06, 1.07, NA, 1.02, 0.99, 1.05, NA)
)

df %>%
mutate(new_amount = amount) -> df

while(any(is.na(df\$new_amount))) {
df %>%
mutate(new_amount = if_else(is.na(new_amount), lag(new_amount)*lag(pct_change), new_amount)) -> df
}

df
#> # A tibble: 8 x 5
#>   quarter group amount pct_change new_amount
#>   <chr>   <chr>  <dbl>      <dbl>      <dbl>
#> 1 2009.1  a        100       1.05       100
#> 2 2009.2  a         NA       1.06       105
#> 3 2009.3  a         NA       1.07       111.
#> 4 2009.4  a         NA      NA          119.
#> 5 2010.1  b        200       1.02       200
#> 6 2010.2  b         NA       0.99       204
#> 7 2010.3  b         NA       1.05       202.
#> 8 2010.4  b         NA      NA          212.
``````

Created on 2019-10-23 by the reprex package (v0.3.0)

2 Likes

Fantastic! Thanks. Very grateful

Just as a follow-up, I tried this on my real data (~1500 rows), and was spectacularly slow (still running after 4 mins). Grateful if anyone knows any more efficient approaches.

Hummm, the while loop must be stuck. On 1600 rows, the following takes less than 2 sec on my old computer:

``````library(tidyverse)

#simulate some data
df <- tibble(
quarter = c("2009.1", "2009.2", "2009.3", "2009.4", "2010.1", "2010.2", "2010.3", "2010.4"),
group = c("a", "a", "a", "a", "b", "b", "b", "b"),
amount = c(100,NA, NA, NA, 200, NA, NA, NA),
pct_change = c(1.05, 1.06, 1.07, NA, 1.02, 0.99, 1.05, NA)
)

df <- replicate(200, df, simplify = FALSE) %>% bind_rows()

df %>%
mutate(new_amount = amount) -> df

p <- progress_estimated(nrow(df))
for(i in 1:nrow(df)) {
df %>%
mutate(new_amount = if_else(is.na(new_amount), lag(new_amount)*lag(pct_change), new_amount)) -> df
p\$tick()\$print()
}

df
#> # A tibble: 1,600 x 5
#>    quarter group amount pct_change new_amount
#>    <chr>   <chr>  <dbl>      <dbl>      <dbl>
#>  1 2009.1  a        100       1.05       100
#>  2 2009.2  a         NA       1.06       105
#>  3 2009.3  a         NA       1.07       111.
#>  4 2009.4  a         NA      NA          119.
#>  5 2010.1  b        200       1.02       200
#>  6 2010.2  b         NA       0.99       204
#>  7 2010.3  b         NA       1.05       202.
#>  8 2010.4  b         NA      NA          212.
#>  9 2009.1  a        100       1.05       100
#> 10 2009.2  a         NA       1.06       105
#> # … with 1,590 more rows
``````

Created on 2019-10-23 by the reprex package (v0.3.0)

Try that alternative; I also added a progression bar for you to monitor how things are moving forward!

++

3 Likes

Amazing! That works perfectly! Thanks so much for your perseverance with this. very, very grateful.
Peter

1 Like

It seems like you'd really like the "cumulative product" of the `pct_change` column, which you can then multiply by your original amount to get the `new_amount`. How's this?

``````library(tidyverse)

#simulate some data
df <- tibble(
quarter = c("2009.1", "2009.2", "2009.3", "2009.4", "2010.1", "2010.2", "2010.3", "2010.4"),
group = c("a", "a", "a", "a", "b", "b", "b", "b"),
amount = c(100,NA, NA, NA, 200, NA, NA, NA),
pct_change = c(1.05, 1.06, 1.07, NA, 1.02, 0.99, 1.05, NA)
)

df
#> # A tibble: 8 x 4
#>   quarter group amount pct_change
#>   <chr>   <chr>  <dbl>      <dbl>
#> 1 2009.1  a        100       1.05
#> 2 2009.2  a         NA       1.06
#> 3 2009.3  a         NA       1.07
#> 4 2009.4  a         NA      NA
#> 5 2010.1  b        200       1.02
#> 6 2010.2  b         NA       0.99
#> 7 2010.3  b         NA       1.05
#> 8 2010.4  b         NA      NA

df %>%
# Create a filled out `amount` column that we can multiply by
fill(amount, .direction = "down") %>%
# Lag `pct_change` and backfill with 1 to get something we can take the cumprod() of
group_by(group) %>%
mutate(lag_pct_change = lag(pct_change)) %>%
replace_na(list(lag_pct_change = 1)) %>%
# `growth` is the pct_change values multiplied together, which we can then
# use to scale up our original `amount`
mutate(
growth = cumprod(lag_pct_change),
new_amount = amount * growth
)
#> # A tibble: 8 x 7
#> # Groups:   group 
#>   quarter group amount pct_change lag_pct_change growth new_amount
#>   <chr>   <chr>  <dbl>      <dbl>          <dbl>  <dbl>      <dbl>
#> 1 2009.1  a        100       1.05           1      1          100
#> 2 2009.2  a        100       1.06           1.05   1.05       105
#> 3 2009.3  a        100       1.07           1.06   1.11       111.
#> 4 2009.4  a        100      NA              1.07   1.19       119.
#> 5 2010.1  b        200       1.02           1      1          200
#> 6 2010.2  b        200       0.99           1.02   1.02       204
#> 7 2010.3  b        200       1.05           0.99   1.01       202.
#> 8 2010.4  b        200      NA              1.05   1.06       212.
``````
5 Likes

That is awesome - `cumprod` is completely new to me. Thanks!