Learning How To Vectorize Code in R

I have this dataset in R that looks something like this:

   id = sample.int(10000, 100000, replace = TRUE)
res = c(1,0)
results = sample(res, 100000, replace = TRUE)
date_exam_taken = sample(seq(as.Date('1999/01/01'), as.Date('2020/01/01'), by="day"), 100000, replace = TRUE)


my_data = data.frame(id, results, date_exam_taken)
my_data <- my_data[order(my_data$id, my_data$date_exam_taken),]

my_data$general_id = 1:nrow(my_data)
my_data$exam_number = ave(my_data$general_id, my_data$id, FUN = seq_along)
my_data$general_id = NULL

Using the R programming language, I wrote this loop that calculates conditional probabilities for the next exam results of a student conditional on the previous exam:

    library(data.table)

    setDT(my_data)
    
    my_list = vector("list", length(unique(my_data$id)))
        
# Create an empty vector with pre-specified dimensions
my_vector = vector("list", 100)

for (i in 1:length(unique(my_data$id)))
{
    tryCatch({
        start_i = my_data[my_data$id == i,]
        pairs_i =  data.frame(first = head(start_i$results, -1), second = tail(start_i$results, -1))
        frame_i =  as.data.table(table(pairs_i))
        frame_i[, id := i]
        print(frame_i)
        my_vector[[i]] = frame_i
    }, error = function(e){})
}

final = rbindlist(my_vector)

I am now trying to "vectorize" this code for improved efficiency. Here is my attempt:

# I don't think I need to create a "list or vector" to store the results in advance?
my_vector = sapply(unique(my_data$id), function(i) {
    
    {tryCatch({
        setDT(my_data)
        start_i = my_data[my_data$id == i,]
        
        pairs_i =  data.frame(first = head(start_i$results, -1), second = tail(start_i$results, -1))
        frame_i =  as.data.frame(table(pairs_i))
      frame_i$i = i
        print(frame_i)
        return(frame_i)
    }, error = function(e){print(paste("An error occurred:", e))})
    }
    
})

# produced an error, so I tried a different code
 final = rbindlist(my_vector, fill = TRUE)
# not sure if this fully worked either?
final = do.call(rbind.data.frame, my_vector)

Have I correctly "vectorized" this code?

Thanks!

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.