The slowest functionality your code employs is printing out the progress of your process. You can easily gain the biggest performance increase by doing that less.
There are also occasions where your code seems to repeat a calculation over a row it has done an equivalent calculation for (unless I am mistaken), therefore I added additional skip checks.
The new code performs as the old code in terms of results, though if there are edge cases that I've not considered that your example data doesnt cover this may be misleading.
For convenience and to facilitate benchmarking, I use a function to generate more of your example data.
make_new_data <- function(reps) {
temp <- data.frame(
alarm = c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
setpoint = c(10, 10, 10, 10, 10, 10, 10, 10, 8, 8, 9, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10, 10, 8, 10, 10, 10)
)
# create a new column to capture if there is any changes to setpoint after any alarm
temp$check <- ""
temp
do.call(rbind,rep(list(temp),reps))
}
newdata<-make_new_data(200)
do_with_skips <- function(temp) {
temp$beenhere <- ""
# review everyrow in dataframe
for (i in 1:nrow(temp)) {
cat(round(i / nrow(temp) * 100, 2), "% \r") # prints the percentage complete in realtime.
if (temp$alarm[i] == 1 && temp$setpoint[i] >= 10) {
# for when alarm has occurred and the setpoint is 10 or above review the next 5 rows
for (j in 0:5) {
if (temp$setpoint[i] != temp$setpoint[i + j]) {
if (temp$beenhere[i + j] == "1") {
break
}
temp$beenhere[i + j] <- paste0(temp$beenhere[i + j], "1")
# for when there has been a change in the setpoint
for (q in 0:10) {
if (temp$setpoint[i] != temp$setpoint[i + q]) {
if (temp$check[i + q] == "1") {
break
}
temp$check[i + q] <- paste0(temp$check[i + q], "1")
if (temp$setpoint[i + q] != (temp$setpoint[i + q + 1])) {
break
}
}
}
}
}
}
}
temp$beenhere <- NULL
temp
}
do_with_skips_less_print <- function(temp) {
temp$beenhere <- ""
# review everyrow in dataframe
for (i in 1:nrow(temp)) {
if(i %% 100==0)
cat(round(i / nrow(temp) * 100, 2), "% \r") # prints the percentage complete in realtime.
if (temp$alarm[i] == 1 && temp$setpoint[i] >= 10) {
# for when alarm has occurred and the setpoint is 10 or above review the next 5 rows
for (j in 0:5) {
if (temp$setpoint[i] != temp$setpoint[i + j]) {
if (temp$beenhere[i + j] == "1") {
break
}
temp$beenhere[i + j] <- paste0(temp$beenhere[i + j], "1")
# for when there has been a change in the setpoint
for (q in 0:10) {
if (temp$setpoint[i] != temp$setpoint[i + q]) {
if (temp$check[i + q] == "1") {
break
}
temp$check[i + q] <- paste0(temp$check[i + q], "1")
if (temp$setpoint[i + q] != (temp$setpoint[i + q + 1])) {
break
}
}
}
}
}
}
}
temp$beenhere <- NULL
temp
}
do_without_skips <- function(temp) {
for (i in 1:nrow(temp)) {
cat(round(i / nrow(temp) * 100, 2), "% \r") # prints the percentage complete in realtime.
if (temp$alarm[i] == 1 && temp$setpoint[i] >= 10) {
# for when alarm has occurred and the setpoint is 10 or above review the next 5 rows
for (j in 0:5) {
if (temp$setpoint[i] != temp$setpoint[i + j]) {
# for when there has been a change in the setpoint
for (q in 0:10) {
if (temp$setpoint[i] != temp$setpoint[i + q]) {
temp$check[i + q] <- "1"
if (temp$setpoint[i + q] != (temp$setpoint[i + q + 1])) {
break
}
}
}
}
}
}
}
temp
}
library(bench)
mark(do_with_skips_less_print(newdata),
do_with_skips(newdata),
do_without_skips(newdata),
iterations = 30)
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory
<bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list>
1 do_with_skips_less_print(newdata) 58.3ms 68.2ms 14.3 47.6MB 11.9 30 25 2.09s <df> <Rprofmem>
2 do_with_skips(newdata) 272.6ms 299.6ms 3.30 47.6MB 2.86 30 26 9.09s <df> <Rprofmem>
3 do_without_skips(newdata) 324.7ms 356.7ms 2.70 90.1MB 3.78 30 42 11.12s <df> <Rprofmem>