hii ,I need to find a shorter code than that to get to the final dataframe. Would appreciate help

loadfile=as.character(prigo5Years$date)
date= as.Date(loadfile,format="%d/%m/%Y")
jan18 = prigo5Years[(date>="2018-01-01" & date<="2018-01-31"),]
feb18 = prigo5Years[(date>="2018-02-01" & date<="2018-02-28"),]
mar18 = prigo5Years[(date>="2018-03-04" & date<="2018-03-29"),]
apr18 = prigo5Years[(date>="2018-04-01" & date<="2018-04-30"),]
may18 = prigo5Years[(date>="2018-05-01" & date<="2018-05-31"),]
june18 = prigo5Years[(date>="2018-06-03" & date<="2018-06-28"),]
jul18 = prigo5Years[(date>="2018-07-01" & date<="2018-07-31"),]
aug18 = prigo5Years[(date>="2018-08-01" & date<="2018-08-30"),]
sep18 = prigo5Years[(date>="2018-09-02" & date<="2018-09-27"),]
oct18 = prigo5Years[(date>="2018-10-02" & date<="2018-10-31"),]
nov18 = prigo5Years[(date>="2018-11-01" & date<="2018-11-29"),]
dec18 = prigo5Years[(date>="2018-12-02" & date<="2018-12-31"),]
check_bigger_jan=(jan18$opening.gate>jan18$Adjusted.lock.gate)
check_bigger_feb=(feb18$opening.gate>feb18$Adjusted.lock.gate)
check_bigger_mar=(mar18$opening.gate>mar18$Adjusted.lock.gate)
check_bigger_april=(apr18$opening.gate>apr18$Adjusted.lock.gate)
check_bigger_may=(may18$opening.gate>may18$Adjusted.lock.gate)
check_bigger_jun=(june18$opening.gate>june18$Adjusted.lock.gate)
check_bigger_jul=(jul18$opening.gate>jul18$Adjusted.lock.gate)
check_bigger_aug=(aug18$opening.gate>aug18$Adjusted.lock.gate)
check_bigger_sep=(sep18$opening.gate>sep18$Adjusted.lock.gate)
check_bigger_oct=(oct18$opening.gate>oct18$Adjusted.lock.gate)
check_bigger_nov=(nov18$opening.gate>nov18$Adjusted.lock.gate)
check_bigger_dec=(dec18$opening.gate>dec18$Adjusted.lock.gate)

check_smaller_jan=(jan18$opening.gate<jan18$Adjusted.lock.gate)
check_smaller_feb=(feb18$opening.gate<feb18$Adjusted.lock.gate)
check_smaller_mar=(mar18$opening.gate<mar18$Adjusted.lock.gate)
check_smaller_april=(apr18$opening.gate<apr18$Adjusted.lock.gate)
check_smaller_may=(may18$opening.gate<may18$Adjusted.lock.gate)
check_smaller_jun=(june18$opening.gate<june18$Adjusted.lock.gate)
check_smaller_jul=(jul18$opening.gate<jul18$Adjusted.lock.gate)
check_smaller_aug=(aug18$opening.gate<aug18$Adjusted.lock.gate)
check_smaller_sep=(sep18$opening.gate<sep18$Adjusted.lock.gate)
check_smaller_oct=(oct18$opening.gate<oct18$Adjusted.lock.gate)
check_smaller_nov=(nov18$opening.gate<nov18$Adjusted.lock.gate)
check_smaller_dec=(dec18$opening.gate<dec18$Adjusted.lock.gate)

open_bigger_close=c(sum(check_bigger_jan==TRUE),sum(check_bigger_feb==TRUE),sum(check_bigger_mar==TRUE),sum(check_bigger_april==TRUE),sum(check_bigger_may==TRUE),sum(check_bigger_jun==TRUE),sum(check_bigger_jul==TRUE),sum(check_bigger_aug==TRUE),sum(check_bigger_sep==TRUE),sum(check_bigger_oct==TRUE),sum(check_bigger_nov==TRUE),sum(check_bigger_dec==TRUE))


open_smaller_close=c(sum(check_smaller_jan==TRUE),sum(check_smaller_feb==TRUE),sum(check_smaller_mar==TRUE),sum(check_smaller_april==TRUE),sum(check_smaller_may==TRUE),sum(check_smaller_jun==TRUE),sum(check_smaller_jul==TRUE),sum(check_smaller_aug==TRUE),sum(check_smaller_sep==TRUE),sum(check_smaller_oct==TRUE),sum(check_smaller_nov==TRUE),sum(check_smaller_dec==TRUE))

open_equal_close=c(sum(check_bigger_jan==FALSE && check_smaller_jan==FALSE ),sum(check_bigger_feb==FALSE && check_smaller_feb==FALSE),sum(check_bigger_mar==FALSE && check_smaller_mar==FALSE),sum(check_bigger_april==FALSE && check_smaller_april==FALSE),sum(check_bigger_may==FALSE && check_smaller_may==FALSE),sum(check_bigger_jun==FALSE && check_smaller_jun==FALSE),sum(check_bigger_jul==FALSE && check_smaller_jul==FALSE),sum(check_bigger_aug==FALSE && check_smaller_aug==FALSE),sum(check_bigger_sep==FALSE && check_smaller_sep==FALSE),sum(check_bigger_oct==FALSE && check_smaller_oct==FALSE),sum(check_bigger_nov==FALSE && check_smaller_nov==FALSE),sum(check_bigger_dec==FALSE && check_smaller_dec==FALSE))

trading_days=open_bigger_close+open_smaller_close+open_equal_close

data_frame=data.frame(open_bigger_close,open_smaller_close,open_equal_close,trading_days)
data_frame
##    open_bigger_close open_smaller_close open_equal_close trading_days
## 1                 20                  3                0           23
## 2                 20                  0                0           20
## 3                 18                  2                0           20
## 4                 19                  0                0           19
## 5                 20                  2                0           22
## 6                 20                  0                0           20
## 7                 22                  0                0           22
## 8                 21                  1                0           22
## 9                 11                  2                0           13
## 10                16                  5                0           21
## 11                17                  4                0           21
## 12                14                  8                0           22

Without data to work with, it is easy to make a mistake in writing code. Here is my first attempt

library(lubridate)
library(dplyr)
DF <- prigo5year %>% mutate(date = dmy(date)) %>%
       filter(year(date) == 2018) %>%
       mutate(Month = month(date)) %>%
       group_by(Month) %>%
       summarize(open_bigger_close = sum(opening.gate > Adjusted.lock.gate),
          open_smaller_close = sum(opening.gate < Adjusted.lock.gate),
          open_equal_close = sum(opening.gate == Adjusted.lock.gate),
          trading_days = n())

Hi, thank you very much. These marks%>%?
Should I add the date?
I have to check every month for one year to see who is the maximum and minimum

This part of the code

mutate(date = dmy(date))

changes the date column from characters to a numeric date.

The %>% operator is called a pipe. It takes the result produced by the function on its left side and passes that as the first argument of the function on its right side. For example, this code

prigo5year %>% mutate(date = dmy(date))

is equal to this code

mutate(prigo5year, date = dmy(date))

The code as written means "pass prigo5year to the mutate() function, pass that result to filter(), pass that result to mutate()..." and so on.
The advantage of using the pipe is that you can make a chain of calculations without the need to store intermediate results. The code could be rewritten without the chain like this:

prigo5year <- mutate(prigo5year, date = dmy(date))
tmp <-  filter(prigo5year, year(date) == 2018)        
tmp <- mutate(tmp, Month = month(date))
tmp <- group_by(tmp, Month)
DF <- summarize(tmp, 
          open_bigger_close = sum(opening.gate > Adjusted.lock.gate),
          open_smaller_close = sum(opening.gate < Adjusted.lock.gate),
          open_equal_close = sum(opening.gate == Adjusted.lock.gate),
          trading_days = n())

That works but the code is harder to read, I think, than using pipes which emphasize the logical progress of the code.

By the way, I fixed a typo in my code just now. A parenthesis was missing.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.