Calculate the value for each stock by year

Hello everybody,

I would like to ask for help. I have such data like that.
I would like to calculate the sum of "volume" for each stock ticker by yearly and put the result into a new variable "yearvolume".
I made a function with a loop like that but it didn't work, and I can't fix it by myself.

year_cp=unique(df$year)

for (j in 1:length(year_cp)){
df_year<-df%>%filter(year==year_cp[j])
df$yearvolume<- sum(df_year$volume,data=df_year)
}

I would appreciate any help.
Best,

df <- data.frame(stringsAsFactors = FALSE,  
                 X = c(1L, 293L, 584L, 871L, 1161L, 1455L, 1749L, 2032L, 2320L,
                       2621L, 2910L, 3206L, 3491L, 3768L, 4060L),
                 date = c(20160711L, 20160710L, 20170709L, 20170708L, 20170705L,
                          20180704L, 20180703L, 20180702L, 20180701L, 20160628L,
                          20160627L, 20170626L, 20170625L, 20160624L, 20160621L),
                 closedprice = c(18.7, 18.9, 19.25, 19.15, 19.05, 18.65, 18.65, 18.6, 18.75,
                                 18.35, 18, 18.35, 18.35, 18.4, 18.4),
                 volume = c(4733210L, 6069130L, 4623340L, 4396390L, 5769840L, 2249190L,
                            2908920L, 1991260L, 3974530L, 2918320L, 2751900L, 1842990L,
                            1946690L, 2462630L, 2977350L),
                 year = c(2016L, 2016L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L,
                          2018L, 2016L, 2016L, 2017L, 2017L, 2016L, 2016L),
                 yearvolume = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                 ticker = as.factor(c("AAA", "AAA", "AAA", "AAA", "AAA", "AAA", "AAA",
                                      "AAA", "AAA", "AAM", "AAM", "AAM", "AAM", "AAC",
                                      "AAC"))
)

There is no need to write a loop. You can use group_by() and summarize() from the dplyr package.

library(dplyr)
df<-data.frame(
  X = c(1L, 293L, 584L, 871L, 1161L, 1455L, 1749L, 2032L, 2320L,
        2621L, 2910L, 3206L, 3491L, 3768L, 4060L),
  date = c(20160711L, 20160710L, 20170709L, 20170708L, 20170705L,
           20180704L, 20180703L, 20180702L, 20180701L, 20160628L,
           20160627L, 20170626L, 20170625L, 20160624L, 20160621L),
  closedprice = c(18.7, 18.9, 19.25, 19.15, 19.05, 18.65, 18.65, 18.6, 18.75,
                  18.35, 18, 18.35, 18.35, 18.4, 18.4),
  volume = c(4733210L, 6069130L, 4623340L, 4396390L, 5769840L, 2249190L,
             2908920L, 1991260L, 3974530L, 2918320L, 2751900L, 1842990L,
             1946690L, 2462630L, 2977350L),
  year = c(2016L, 2016L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L,
           2018L, 2016L, 2016L, 2017L, 2017L, 2016L, 2016L),
  ticker = as.factor(c("AAA", "AAA", "AAA", "AAA", "AAA", "AAA", "AAA",
                       "AAA", "AAA", "AAM", "AAM", "AAM", "AAM", "AAC",
                       "AAC"))
)
VolumeSum <- df %>% group_by(ticker, year) %>% 
  summarize(yearvolume = sum(volume))
df <- inner_join(df, VolumeSum, by = c("year", "ticker"))
head(df)
#>      X     date closedprice  volume year ticker yearvolume
#> 1    1 20160711       18.70 4733210 2016    AAA   10802340
#> 2  293 20160710       18.90 6069130 2016    AAA   10802340
#> 3  584 20170709       19.25 4623340 2017    AAA   14789570
#> 4  871 20170708       19.15 4396390 2017    AAA   14789570
#> 5 1161 20170705       19.05 5769840 2017    AAA   14789570
#> 6 1455 20180704       18.65 2249190 2018    AAA   11123900

Created on 2019-08-01 by the reprex package (v0.2.1)

2 Likes

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.