Convert data from daily to monthly

Hello everybody,

I would like to ask for help. I would like to convert my data from daily to monthly. I tried to search for some solution in the internet but I'm still confused.
Thank you in advance. I really appreciate any help.
Best regards,

df<-data.frame(
           X = c(1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,
                 11L,12L,13L,14L,15L),
        yyyy = c(2008L,2008L,2008L,2008L,2008L,2008L,
                 2008L,2008L,2008L,2008L,2008L,2008L,2008L,2008L,2008L),
          ri = c(-0.00728338007687596,-0.0183096782312803,
                 Inf,-0.035907451295242,0.0057190543237344,
                 -0.0226363034901215,-0.0378277647034841,0.0119325766193033,
                 -0.0403204823664911,-0.0542900725081914,0.0599738800819928,
                 0.00409967383412201,0.0127249011367669,-0.0102068317411085,
                Inf),
          rf = c(7.6,7.6,7.6,7.6,7.6,7.6,7.6,7.6,7.6,
                 7.6,7.6,7.6,7.6,7.6,7.6),
          rm = c(4.3429638556917e-05,7.70772313859037e-05,
                 0.000420954680905269,0.00186350134962684,0.000649379730395496,
                 0.000216346701282072,0.00115417682460048,
                 0.00043003254300326,0.000749669788307526,0.00118831000037139,
                 0.00102934216753432,0.00086425236168951,0.00115829659484429,
                 8.39519794678045e-05,0.000755764251111981),
        rmrf = c(-0.0759565703614431,-0.0759229227686141,
                 -0.0755790453190947,-0.0741364986503732,-0.0753506202696045,
                 -0.0757836532987179,-0.0748458231753995,-0.0755699674569967,
                 -0.0752503302116925,-0.0748116899996286,-0.0749706578324657,
                 -0.0751357476383105,-0.0748417034051557,-0.0759160480205322,
                 -0.075244235748888),
         SMB = c(-0.0138336234596368,-0.0376765830064032,
                  Inf ,-0.065582243903649,0.0116216591632856,
                 -0.0428305859026046,-0.078889960355383,0.0200490966891709,
                 -0.0810627441826267,-0.10914382364897,0.119653889549086,
                 0.0102117683739566,0.0233941872117376,-0.0199079742351951,
                 -0.0464478512502466),
         HML = c(-0.0178720437149426,-0.0414247348710134,
                 -0.0324046574203728,-0.0486990006301299,0.0141183944492492,
                 -0.0349429455202579,-0.071573645147338,0.0104204981114911,
                 -0.0794264686336368,-0.113973437526823,0.12592541617383,
                 0.00784965980643828,0.0285630957381206,-0.0253310703042621,
                 -0.0374480409836909),
         RMW = c(-0.00931978562560168,-0.0310040010584668,
                 -0.0243205682831627,-0.0595297285622089,0.000209071999381634,
                 -0.0494502793426794,-0.0846239374821405,0.0257605656858241,
                 -0.0757355238775755,-0.0987564042542379,0.0997879292530537,
                 0.00222220629944809,0.0194281285669282,-0.0135016416967778,
                 -0.0435607606145273),
         CMA = c(-0.0157924116505553,-0.0385093948927399,
                Inf,-0.0725936580944562,0.00629692093767557,
                 -0.0408029859076323,-0.0721803055715067,0.0204869920333833,
                 -0.0784663478297932,-0.10669270754608,0.118944263303556,
                 0.00818675587978938,0.0196567741178579,-0.0193026399768541,
                 -0.0449606326959603),
        date = as.factor(c("2008-01-02",
                           "2008-01-03","2008-01-04","2008-01-07","2008-01-08",
                           "2008-01-09","2008-01-10","2008-01-11","2008-01-14",
                           "2008-02-1","2008-02-6","2008-02-7","2008-02-8",
                           "2008-02-21","2008-02-22"))

what do you want to do with your none-date data, take averages ? take the max ? take the min ?

if you are happy doing your own search for solution and just need to be pointed to the right place, I would suggest to study the lubridate package, for manipulating dates,
and the dplyr package for data-frame manipulation, in particular group_by() and summarise() functions

2 Likes

This is an example of monthly aggregations using the tsibble package from the tidyverse

library(dplyr)
library(tsibble)
library(lubridate)

df<-data.frame(
    X = c(1L,2L,3L,4L,5L,6L,7L,8L,9L,10L,
          11L,12L,13L,14L,15L),
    yyyy = c(2008L,2008L,2008L,2008L,2008L,2008L,
             2008L,2008L,2008L,2008L,2008L,2008L,2008L,2008L,2008L),
    ri = c(-0.00728338007687596,-0.0183096782312803,
           Inf,-0.035907451295242,0.0057190543237344,
           -0.0226363034901215,-0.0378277647034841,0.0119325766193033,
           -0.0403204823664911,-0.0542900725081914,0.0599738800819928,
           0.00409967383412201,0.0127249011367669,-0.0102068317411085,
           Inf),
    rf = c(7.6,7.6,7.6,7.6,7.6,7.6,7.6,7.6,7.6,
           7.6,7.6,7.6,7.6,7.6,7.6),
    rm = c(4.3429638556917e-05,7.70772313859037e-05,
           0.000420954680905269,0.00186350134962684,0.000649379730395496,
           0.000216346701282072,0.00115417682460048,
           0.00043003254300326,0.000749669788307526,0.00118831000037139,
           0.00102934216753432,0.00086425236168951,0.00115829659484429,
           8.39519794678045e-05,0.000755764251111981),
    rmrf = c(-0.0759565703614431,-0.0759229227686141,
             -0.0755790453190947,-0.0741364986503732,-0.0753506202696045,
             -0.0757836532987179,-0.0748458231753995,-0.0755699674569967,
             -0.0752503302116925,-0.0748116899996286,-0.0749706578324657,
             -0.0751357476383105,-0.0748417034051557,-0.0759160480205322,
             -0.075244235748888),
    SMB = c(-0.0138336234596368,-0.0376765830064032,
            Inf ,-0.065582243903649,0.0116216591632856,
            -0.0428305859026046,-0.078889960355383,0.0200490966891709,
            -0.0810627441826267,-0.10914382364897,0.119653889549086,
            0.0102117683739566,0.0233941872117376,-0.0199079742351951,
            -0.0464478512502466),
    HML = c(-0.0178720437149426,-0.0414247348710134,
            -0.0324046574203728,-0.0486990006301299,0.0141183944492492,
            -0.0349429455202579,-0.071573645147338,0.0104204981114911,
            -0.0794264686336368,-0.113973437526823,0.12592541617383,
            0.00784965980643828,0.0285630957381206,-0.0253310703042621,
            -0.0374480409836909),
    RMW = c(-0.00931978562560168,-0.0310040010584668,
            -0.0243205682831627,-0.0595297285622089,0.000209071999381634,
            -0.0494502793426794,-0.0846239374821405,0.0257605656858241,
            -0.0757355238775755,-0.0987564042542379,0.0997879292530537,
            0.00222220629944809,0.0194281285669282,-0.0135016416967778,
            -0.0435607606145273),
    CMA = c(-0.0157924116505553,-0.0385093948927399,
            Inf,-0.0725936580944562,0.00629692093767557,
            -0.0408029859076323,-0.0721803055715067,0.0204869920333833,
            -0.0784663478297932,-0.10669270754608,0.118944263303556,
            0.00818675587978938,0.0196567741178579,-0.0193026399768541,
            -0.0449606326959603),
    date = as.factor(c("2008-01-02",
                       "2008-01-03","2008-01-04","2008-01-07","2008-01-08",
                       "2008-01-09","2008-01-10","2008-01-11","2008-01-14",
                       "2008-02-1","2008-02-6","2008-02-7","2008-02-8",
                       "2008-02-21","2008-02-22")))

df %>%
    mutate(date = ymd(date)) %>% 
    as_tsibble(index = date) %>% 
    index_by(year_month = ~ yearmonth(.)) %>% # monthly aggregates
    summarise(
        accum_rf = sum(rf, na.rm = TRUE),
        mean_rm = mean(rm, na.rm = TRUE)
    )
#> # A tsibble: 2 x 3 [1M]
#>   year_month acum_rf  mean_rm
#>        <mth>   <dbl>    <dbl>
#> 1   2008 jan    68.4 0.000623
#> 2   2008 feb    45.6 0.000847

Created on 2020-02-03 by the reprex package (v0.3.0)

2 Likes

My formulas for all other variables is
image
Thank you

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.