passing strings as variable name and as argument in a data table within a loop

Dear R experts,

I tried to pass strings as variable name to create new columns in data table. specifically, I wan to do the following things:

x=data.frame(x1=c(1,2,3,4,5),x2=c(4,5,6,7,8),x3=c('a','a','b','b','c'),x4=c('a','a','a','b','b'))
setDT(x)[,a:=uniqueN(ifelse(x1==1,x2,NA),na.rm=T),by=.(x3,x4)] 

However, I need to create multiple new columns (e.g., 'a' and 'b') and want to do in a loop.

lo=data.frame(variablename=c('a','b'),targetcollumn=c('x1','x2'))

Conceptually, I want to do

for (i in 1:nrow(lo)) {
setDT(x)[,lo$variablename[i]:=uniqueN(ifelse(lo$targetcollumn[i]==1,x2,NA),na.rm=T),by=.(x3,x4)]}

The problem is that I do not know how to tranform
setDT(x)[,a:=uniqueN(ifelse(x1==1,x2,NA),na.rm=T),by=.(x3,x4)]
to be a formula in the lop
How can I pass lo$variablename[i] and lo$targetcollumn[i] in the loop correctly? I tried eval and as.formula, neither worked. Can someone help me out? Thanks.

Veda

x=data.frame(x1=c(1,2,3,4,5),x2=c(4,5,6,7,8),x3=c('a','a','b','b','c'),x4=c('a','a','a','b','b'))
# setDT(x)[,a:=uniqueN(ifelse(x1==1,x2,NA),na.rm=T),by=.(x3,x4)] 

lo=data.frame(variablename=c('a','b'),targetcollumn=c('x1','x2'))


for (i in 1:nrow(lo)) {
  ifelse_str <- paste0("ifelse(",lo$targetcollumn[i],"==1,x2,NA)")
  ifelse_expr <- rlang::parse_expr(ifelse_str)
  setDT(x)[,lo$variablename[i]:=uniqueN(eval(ifelse_expr),na.rm=T),by=.(x3,x4)]
  }

These versions both seem to work:

library(data.table)

# v1
x = data.table(
  x1 = c(1, 2, 3, 4, 5), 
  x2 = c(4, 5, 6, 7, 8), 
  x3 = c('a', 'a', 'b', 'b', 'c'), 
  x4 = c('a', 'a', 'a', 'b', 'b')
)

# strings are required, rather than factors
lo = data.frame(variablename = c('a', 'b'), targetcollumn = c('x1', 'x2'), stringsAsFactors = FALSE)

for (i in 1:nrow(lo)) {
  x[, eval(lo$variablename[i]) := uniqueN(ifelse(get(lo$targetcollumn[i]) == 1, x2, NA), na.rm = T), by = .(x3, x4)]
}


# v2
x = data.table(
  x1 = c(1, 2, 3, 4, 5), 
  x2 = c(4, 5, 6, 7, 8), 
  x3 = c('a', 'a', 'b', 'b', 'c'), 
  x4 = c('a', 'a', 'a', 'b', 'b')
)

# strings are not converted to factors
lo = data.table(variablename = c('a', 'b'), targetcollumn = c('x1', 'x2'))

for (i in 1:nrow(lo)) {
  x[, eval(lo[i, variablename]) := uniqueN(ifelse(get(lo[i, targetcollumn]) == 1, x2, NA), na.rm = T), by = .(x3, x4)]
}

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.