vectorizing if statement with sapply function

Patmad · June 25, 2019, 10:13pm

The below Function which is using to count time in some ranges uses only one condition to all of the statements. I want to vectorize that based on sapply function . But it doesn't work correctly. How can fix it ? Do I have to do that in another way? It is tough to use ifelse function if I've overwritten dataset a few times in if statement

data <- data.frame(date1=c(as.POSIXct("2016-01-15 13:04:00"),
as.POSIXct("2016-02-25 15:04:00"),as.POSIXct("2016-02-25 15:04:00"),
as.POSIXct("2016-02-25 15:04:00"),as.POSIXct("2012-10-10 17:50:00"),
as.POSIXct("2016-04-14 13:15:00"),as.POSIXct("2016-07-13 08:32:00"),
as.POSIXct("2016-07-13 08:35:00"),as.POSIXct("2016-07-13 08:36:00"),
as.POSIXct("2016-08-09 11:57:00")),

date2=c(as.POSIXct("2018-12-20 16:49:00"),as.POSIXct("2018-12-20 16:50:00"),
        as.POSIXct("2018-12-20 16:50:00"),as.POSIXct("2018-12-20 16:54:00"),
        as.POSIXct("2019-01-11 16:52:00"),as.POSIXct("2019-01-22 11:09:00"),
        as.POSIXct("2019-01-22 11:46:00"),as.POSIXct("2019-01-22 11:26:00"),
        as.POSIXct("2019-01-22 11:18:00"),as.POSIXct("2019-01-22 11:19:00")),

Mid.Category=c("A12","BN1","BN1","BN1","A06","A06","A06","A06",
"A06","A06"),

true_time=c(14983.77,12223.78,12223.78,
                            12223.85,31975.05,14179.92,
                            12925.25,12924.87, 12924.72,
                            12543.38))




data$Mid.Category <- as.character(data$Mid.Category)
str(data)



#################### FUNCTION ###############################


function_SLA <- Vectorize(function(date1,date2){
  
library(lubridate)
  
  
dni_wolne <- c("2018-01-01","2018-01-06","2018-04-01","2018-04-02",
              "2018-05-01","2018-05-03","2018-05-20","2018-05-30",
              "2018-08-15","2018-11-01","2018-11-11","2018-11-12",
              "2018-12-25","2018-12-26","2019-01-01","2019-01-06",
              "2019-04-21","2019-04-22","2019-05-01","2019-05-03",
              "2019-06-09","2019-06-20","2019-08-15","2019-11-01",
              "2019-11-11","2019-12-25","2019-12-26")


sapply(data$Mid.Category, function(x)


if(x %in% c("B06","B12","BN0","BN1")){
  
  
dates <- seq(from=date1,to=date2,"mins")
dates <- dates[which(weekdays(dates) %in% c("poniedziałek","wtorek",
"środa","czwartek","piątek","sobota")

                  & hour(dates) >= 8 & hour(dates) <22
                  & !(as.character(dates,format="%Y-%m-%d") %in% dni_wolne))]

dates <- length(dates)/60

return(dates)


} else if(x %in% c("A06","A12","AN0")){
  
dates <- seq(from=date1, to=date2, "mins")

dates <- dates[which(hour(dates) >= 8 & hour(dates) < 22)]

dates <- length(dates)/60

return(dates)

}



)


}


)


data$wynik <- function_SLA(data$date1,data$date2)

Yarnabrina · June 26, 2019, 5:49am

Welcome to the community!

Does this help?

library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:base':
#> 
#>     date

dataset <- data.frame(date1 = ymd_hms(c("2016-01-15 13:04:00", "2016-02-25 15:04:00",
                                        "2016-02-25 15:04:00", "2016-02-25 15:04:00",
                                        "2012-10-10 17:50:00", "2016-04-14 13:15:00",
                                        "2016-07-13 08:32:00", "2016-07-13 08:35:00",
                                        "2016-07-13 08:36:00", "2016-08-09 11:57:00")),
                      date2 = ymd_hms(c("2018-12-20 16:49:00", "2018-12-20 16:50:00",
                                        "2018-12-20 16:50:00", "2018-12-20 16:54:00",
                                        "2019-01-11 16:52:00", "2019-01-22 11:09:00",
                                        "2019-01-22 11:46:00", "2019-01-22 11:26:00",
                                        "2019-01-22 11:18:00", "2019-01-22 11:19:00")),
                      Mid.Category = c("A12", "BN1", "BN1", "BN1", "A06",
                                       "A06", "A06", "A06", "A06", "A06"),
                      true_time = c(14983.77, 12223.78, 12223.78, 12223.85, 31975.05,
                                    14179.92, 12925.25, 12924.87, 12924.72, 12543.38),
                      stringsAsFactors = FALSE)

offdays <- date(x = c("2018-01-01", "2018-01-06", "2018-04-01", "2018-04-02", "2018-05-01",
                      "2018-05-03", "2018-05-20", "2018-05-30", "2018-08-15", "2018-11-01",
                      "2018-11-11","2018-11-12", "2018-12-25", "2018-12-26", "2019-01-01",
                      "2019-01-06", "2019-04-21", "2019-04-22", "2019-05-01", "2019-05-03",
                      "2019-06-09", "2019-06-20", "2019-08-15", "2019-11-01", "2019-11-11",
                      "2019-12-25", "2019-12-26"))

function_SLA <- function(date_1, date_2, Mid_Category)
{
  sapply(X = Mid_Category,
         FUN = function(t)
         {
           total_minutes <- seq(from = date_1,
                                to = date_2,
                                by = "min")
           if (startsWith(x = t,
                          prefix = "B"))
           {
             valid_minutes_indices <- ((wday(x = total_minutes,
                                             label = TRUE) != "Sun") &
                                         (hour(x = total_minutes) >= 8) &
                                         (hour(x = total_minutes) < 22) &
                                         (!(date(x = total_minutes) %in% offdays))
             )
           } else if (startsWith(x = t,
                                 prefix = "A"))
           {
             valid_minutes_indices <- ((hour(x = total_minutes) >= 8) &
                                         (hour(x = total_minutes) < 22)
             )
           }
           (sum(valid_minutes_indices) / 60)
         }
  )
}

within(data = dataset,
       expr = {
         score <- Vectorize(FUN = function_SLA)(date1, date2, Mid.Category)
       })
#>                  date1               date2 Mid.Category true_time    score
#> 1  2016-01-15 13:04:00 2018-12-20 16:49:00          A12  14983.77 14983.77
#> 2  2016-02-25 15:04:00 2018-12-20 16:50:00          BN1  12223.78 12223.78
#> 3  2016-02-25 15:04:00 2018-12-20 16:50:00          BN1  12223.78 12223.78
#> 4  2016-02-25 15:04:00 2018-12-20 16:54:00          BN1  12223.85 12223.85
#> 5  2012-10-10 17:50:00 2019-01-11 16:52:00          A06  31975.05 31975.05
#> 6  2016-04-14 13:15:00 2019-01-22 11:09:00          A06  14179.92 14179.92
#> 7  2016-07-13 08:32:00 2019-01-22 11:46:00          A06  12925.25 12925.25
#> 8  2016-07-13 08:35:00 2019-01-22 11:26:00          A06  12924.87 12924.87
#> 9  2016-07-13 08:36:00 2019-01-22 11:18:00          A06  12924.72 12924.72
#> 10 2016-08-09 11:57:00 2019-01-22 11:19:00          A06  12543.38 12543.38

^{Created on 2019-06-26 by the reprex package (v0.3.0)}

Notes

I used the English weekdays of course, so you'll have to translate "Sun" to Polish.
You can also use purrr::pmap_dbl or base::mapply, but on my system, they are not performing better than Vectorize.
I've used lubridate throughout. You can always use their base counterparts. You'll have to use too many format though.

Patmad · June 26, 2019, 3:24pm

Thank you so much !!!

system · July 3, 2019, 3:24pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.