library(purrr)
df <- data.frame(code = letters[1:26],
value = rnorm(26),
stringsAsFactors = FALSE)
forfunc <- function(df){
for (i in seq_along(df$code)) {
row <- df[i, ]
print(row) # or do something much more complex with the row
}}
walkfor <- function(df){
walk(1:nrow(df), ~print(df[., ]))
}
pwalkfunc <- function(df){
pwalk(.l = df, .f = ~print(paste0(..1," ",..2)))
}
directfunc <- function(df){
cat(paste0(df[,1]," ",df[,2],"\n"))
}
library(microbenchmark)
microbenchmark(forfunc(df),times = 10L,unit="us")
microbenchmark(walkfor(df),times = 10L,unit="us")
microbenchmark(pwalkfunc(df),times = 10L,unit="us")
microbenchmark(print(df),times = 10L,unit="us")
microbenchmark(directfunc(df),times = 10L,unit="us")
EDITED:: to add slide
# expr min lq mean median uq max neval
# forfunc(df) 9770 10224 11303 11061 11814 13777 10
# walkfor(df) 10170 10864 11808 11416 11917 20878 10
# pwalkfunc(df) 857 1306 1472 1561 1722 1821 10
# print(df) 749 904 1248 1333 1501 1592 10
# directfunc(df) 521 621 900 657 735 2693 10
# slidefunc(df) 71 72 223 73 76 1249 10
in conclusion, since R is vectorised, its usually best to go direct if you can. You dont need a for loop or any programmer determined iterator when the base language (and many packaged extensions ) has a default to iterate for you