I have loaded dataset with 3.3M records using datatable. There are few datetime/date columns in my data.
1. Time taken for loading data.
user system elapsed
3.99 0.17 3.44
2.Time taken for converting date columns.
user system elapsed
29.78 0.04 30.13
#>reading data
getwd()
library(data.table)
library(esquisse)
library(readxl)
library(purrr)
library(fasttime)
library(lubridate)
ColnamesTransactionData_Train <- (read_xlsx("data\\train_data\\Updated_Column_Description.xlsx", range = "Transaction_data!A1:B9"
, col_names = FALSE,
col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf,
guess_max = min(14)))
ColnamesTransactionData_Train <- ColnamesTransactionData_Train$X__2
#####======Transactional Data Analysis=====#####
xxx = c("character","character","Date","character","character","Date","Date","character","numeric")
system.time(TransactionData_Train <- fread("data\\train_data\\TransactionData_Train.csv", col.names = ColnamesTransactionData_Train, colClasses = xxx))
str(TransactionData_Train)
#still the column are not converted to date type.. Any help ?
system.time(TransactionData_Train[, DateOfTransaction := as.Date(substring(TransactionData_Train$`The date of transaction`, 0, nchar(TransactionData_Train$`The date of transaction`)-9L),"%d%b%Y")])
````Preformatted text`