How to add columns with random values to a table.date?

How to add columns with random values to a table.date?
how to make L1 and L2 different?

library(data.table)

> ColList =c("L1","L2")

> ss<-as.data.table(replicate(6,rexp(3)))

> ss
         V1         V2         V3        V4        V5        V6
1: 1.923995 1.20302097 0.37898679 0.2217218 1.9059492 1.6427446
2: 2.341905 0.37402232 0.08830153 1.1923393 0.2844974 0.1004820
3: 2.761185 0.02096285 2.06375748 0.6477536 0.7691468 0.5830274

> ColList
[1] "L1" "L2"

> ss[,(ColList) := {.(replicate(1,rexp(3)))}]

> ss
         V1         V2         V3        V4        V5        V6          L1          L2
1: 1.923995 1.20302097 0.37898679 0.2217218 1.9059492 1.6427446 0.911114781 0.911114781
2: 2.341905 0.37402232 0.08830153 1.1923393 0.2844974 0.1004820 0.002369246 0.002369246
3: 2.761185 0.02096285 2.06375748 0.6477536 0.7691468 0.5830274 0.905753789 0.905753789

> ss[,(ColList) := {rep(rexp(1),3)}]

> ss
         V1         V2         V3        V4        V5        V6       L1       L2
1: 1.923995 1.20302097 0.37898679 0.2217218 1.9059492 1.6427446 0.111691 0.111691
2: 2.341905 0.37402232 0.08830153 1.1923393 0.2844974 0.1004820 0.111691 0.111691
3: 2.761185 0.02096285 2.06375748 0.6477536 0.7691468 0.5830274 0.111691 0.111691

> ss[,(ColList) := {rep(rexp(3),1)}]

> ss
         V1         V2         V3        V4        V5        V6        L1        L2
1: 1.923995 1.20302097 0.37898679 0.2217218 1.9059492 1.6427446 2.0016799 2.0016799
2: 2.341905 0.37402232 0.08830153 1.1923393 0.2844974 0.1004820 0.8342851 0.8342851
3: 2.761185 0.02096285 2.06375748 0.6477536 0.7691468 0.5830274 1.4054135 1.4054135

> ss[,(ColList) := {rep(rexp(.N),1)}]

> ss
         V1         V2         V3        V4        V5        V6        L1        L2
1: 1.923995 1.20302097 0.37898679 0.2217218 1.9059492 1.6427446 3.6161513 3.6161513
2: 2.341905 0.37402232 0.08830153 1.1923393 0.2844974 0.1004820 0.4961897 0.4961897
3: 2.761185 0.02096285 2.06375748 0.6477536 0.7691468 0.5830274 1.0814954 1.0814954

Hi @HerClau,
Tricky problem. I had to employ a user-defined function to get a solution: its not pretty but it works. It might give you an idea for something better.

library(data.table)

ColList <- c("L1","L2")
ss <- as.data.table(replicate(6,rexp(3)))
ss

ss[,(ColList) := {rep(rexp(1),3)}]
ss

ss[,(ColList) := {rep(rexp(3),1)}]
ss

ss[,(ColList) := {rep(rexp(.N),1)}]
ss

# How to fix?
FUN <- function(n) {
  total_values <- rexp(n*length(ColList))
  mat <- matrix(total_values, n, length(ColList))
  df <- as.data.frame(mat)
  return(df)
}

FUN(3)

ss[,(ColList) := {FUN(3)}]
ss

HTH

replicate will, by default, try to combine the different results it creates into an array. We can have it return a list instead, with each element being a different result, using the simplify = FALSE argument. Assignment in a data.table is best done using lists, so we can use that.

ss[, (ColList) := replicate(length(ColList), rexp(.N), simplify = FALSE)]
ss
#           V1        V2        V3        V4         V5        V6        L1         L2
# 1: 0.6610169 0.4820143 1.6450835 0.9941823 0.23075773 0.8260628 0.1791834 0.04202013
# 2: 0.3877575 0.1101192 0.1109054 0.4232560 0.07832235 0.2079005 0.7009319 2.00463266
# 3: 1.2564525 0.9422300 0.1572804 1.4132214 0.64212319 0.5494943 0.5361786 0.04683751
1 Like

Is it possible to improve these times?
Are they expected?
How could I activate something with the help of R to improve performance?
sessionInfo()
version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18362)

Matrix products: default

Random number generation:
RNG: Mersenne-Twister
Normal: Inversion
Sample: Rounding

Processor: AMD Ryzen 7 1800 nEight-Core Processor 3.90 GHz
Installed RAM 16.0 GB

library(data.table)
library(microbenchmark)
nrow=1e7
ncol = 10
ColList <-c("L1","L2")
set.seed(42)

ss<-as.data.table(replicate(2,rexp(nrow)))
print(ss,topn = 3)
ColList
#ta<-replicate(length(ColList),rexp(nrow))
ttDT=microbenchmark(
ta<-replicate(length(ColList),rexp(nrow)),
DT1=ss[,(ColList) := {as.list(as.data.table(ta))}],

DT2=ss[,(ColList) := {as.list(as.data.table(replicate(length(ColList),rexp(nrow))))}],

DT3=ss[,(ColList) := {as.data.table(replicate(length(ColList),rexp(nrow)))}],

DT4=ss[,(ColList) := {as.data.table(replicate(length(ColList),rexp(.N)))}],

##https://community.rstudio.com/u/nwerth
DT5=ss[, (ColList) := replicate(length(ColList), rexp(.N), simplify = FALSE)],

#mapply(function(x,y){rep(rexp(x),y,len = 3)},c(3),c(1,1))
DT6=ss[,(ColList) := {as.data.table(mapply(function(x,y){rep(rexp(x),y,len = .N)},c(.N),c(1,1)))}],

DT7=ss[,(ColList) := {as.data.table(mapply(function(x,y){rep(rexp(x),y,len = .N)},c(.N),c(rep(1,length(ColList)))))}]
)
ttDT
ttDT
Unit: milliseconds
                                         expr      min         lq      mean    median        uq       max neval
 ta <- replicate(length(ColList), rexp(nrow)) 753.2457  782.72410  877.8430  930.9511  940.1743  956.8393   100
                                          DT1  82.6479   83.94285  172.8322  134.5889  283.2170  308.3972   100
                                          DT2 844.7064  993.69225  989.1314 1007.4347 1014.7164 1092.7617   100
                                          DT3 844.5222  900.12560  975.4255 1006.7963 1011.3696 1085.1980   100
                                          DT4 844.3975  959.01670  981.6247 1008.3374 1015.3442 1079.5024   100
                                          DT5 662.6736  687.03540  764.6300  714.5847  858.8434  883.7546   100
                                          DT6 892.9834 1029.61100 1043.5304 1070.9019 1098.1291 1132.0790   100
                                          DT7 893.1555  942.93830 1036.4575 1059.7355 1089.2190 1144.0754   100
library(ggplot2)
autoplot(ttDT)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

Up to here I have arrived.

library(data.table)
> ColList <-c("L1","L2")
> set.seed(42)

> ss<-as.data.table(replicate(6,rexp(3)))
> ss
          V1        V2        V3        V4         V5        V6
1: 0.1983368 0.0381919 0.3139846 0.7148625 0.09614656 0.3098157
2: 0.6608953 0.4731766 0.4101296 1.3447156 0.05714136 0.4751912
3: 0.2834910 1.4636271 1.1915978 2.4086844 1.25396105 0.6221103

> ColList
[1] "L1" "L2"

> ta<-replicate(2,rexp(3))
> ss[,(ColList) := {as.list(as.data.table(ta))}]
> ss
          V1        V2        V3        V4         V5        V6        L1        L2
1: 0.1983368 0.0381919 0.3139846 0.7148625 0.09614656 0.3098157 1.2455803 0.6658322
2: 0.6608953 0.4731766 0.4101296 1.3447156 0.05714136 0.4751912 0.3703395 4.9959685
3: 0.2834910 1.4636271 1.1915978 2.4086844 1.25396105 0.6221103 4.8628050 0.2235573

> ss[,(ColList) := {as.list(as.data.table(replicate(2,rexp(3))))}]
> ss
          V1        V2        V3        V4         V5        V6        L1        L2
1: 0.1983368 0.0381919 0.3139846 0.7148625 0.09614656 0.3098157 1.2113841 0.2799575
2: 0.6608953 0.4731766 0.4101296 1.3447156 0.05714136 0.4751912 0.7190924 0.2311141
3: 0.2834910 1.4636271 1.1915978 2.4086844 1.25396105 0.6221103 1.3084920 1.2870888

> ss[,(ColList) := {as.list(as.data.table(replicate(2,rexp(3))))}]
> ss
          V1        V2        V3        V4         V5        V6        L1        L2
1: 0.1983368 0.0381919 0.3139846 0.7148625 0.09614656 0.3098157 0.5693856 0.3545537
2: 0.6608953 0.4731766 0.4101296 1.3447156 0.05714136 0.4751912 3.0185564 1.7564090
3: 0.2834910 1.4636271 1.1915978 2.4086844 1.25396105 0.6221103 0.4975908 0.7374990

> ss[,(ColList) := {as.data.table(replicate(2,rexp(3)))}]
> ss
          V1        V2        V3        V4         V5        V6         L1        L2
1: 0.1983368 0.0381919 0.3139846 0.7148625 0.09614656 0.3098157 0.02882587 1.9020858
2: 0.6608953 0.4731766 0.4101296 1.3447156 0.05714136 0.4751912 0.35121455 0.7782936
3: 0.2834910 1.4636271 1.1915978 2.4086844 1.25396105 0.6221103 0.39265985 0.6563170

But I think it could be improved, using some of the functions of the family "apply".
Thinking about data.table with more data in rows and columns!!!
How to use the rep() function, to replace replicate()?

1 Like

Maybe changing the number of CPU threads used by data.table can speed things up?
?getDTthreads

1 Like