Please use reprex
. You can get enough data in readily usable form simply with
library(magrittr)
head(my_data,25) %>% dput
and cutting and pasting the output into the window. As shown in the reprex
below. (Yours will be a bit wordier; I used a shortcut.)
suppressPackageStartupMessages(library(dplyr))
dat <- structure(list(
gvkey =
c(1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1013, 1013, 1013, 1013, 1013, 1013, 1021),
fyear =
c(2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2005, 2006, 2007, 2008, 2009, 2010, 2005),
cashflow =
c(NA, 63.54, 73255, 106229, 108.4, 86527, 127295, 147.36, 164.1, 170.1, 73.2, 101.3, 108.4, 104.7, NA, 162.3, 148.3, 138.9, 62, 86.9, NA)),
class =
c("spec_tbl_df", "tbl_df", "tbl", "data.frame"),
row.names =
c(NA, -21L), spec = structure(list( cols = list(gvkey = structure(list(),
class =
c("collector_double", "collector")), fyear = structure(list(),
class =
c("collector_double", "collector")), cashflow = structure(list(),
class =
c("collector_number", "collector"))), default = structure(list(),
class =
c("collector_guess", "collector")), skip = 1),
class = "col_spec"))
dat %>% group_by(gvkey) %>% mutate(yoy = cashflow - lag(cashflow,1)) %>% print(n = Inf)
#> # A tibble: 21 x 4
#> # Groups: gvkey [3]
#> gvkey fyear cashflow yoy
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1004 2004 NA NA
#> 2 1004 2005 63.5 NA
#> 3 1004 2006 73255 73191.
#> 4 1004 2007 106229 32974
#> 5 1004 2008 108. -106121.
#> 6 1004 2009 86527 86419.
#> 7 1004 2010 127295 40768
#> 8 1004 2011 147. -127148.
#> 9 1004 2012 164. 16.7
#> 10 1004 2013 170. 6
#> 11 1004 2014 73.2 -96.9
#> 12 1004 2015 101. 28.1
#> 13 1004 2016 108. 7.1
#> 14 1004 2017 105. -3.7
#> 15 1013 2005 NA NA
#> 16 1013 2006 162. NA
#> 17 1013 2007 148. -14
#> 18 1013 2008 139. -9.4
#> 19 1013 2009 62 -76.9
#> 20 1013 2010 86.9 24.9
#> 21 1021 2005 NA NA
Created on 2020-03-28 by the reprex package (v0.3.0)