rnso
rnso

Reputation: 24623

How to send data.table columns to a function

I have following data and code:

mydt = structure(list(vnum1 = c(0.517551446921093, -0.997822163825322, 
3.40784990301597, -0.20990292802279, 0.171252718589118, -0.624084617915488, 
0.0979152932727754, -0.673949942523713, 0.689937370719125, -0.356403906786312, 
-0.565253563082689, -0.725285109477077, -0.343253827285705, -0.515803106223986, 
2.21193745540815, 0.179392018244011, 0.695885203438304, -0.869946981188651, 
0.170084087339536, 0.864392658315656, 0.801471783050381, 0.753880989575548, 
-0.572671791856263, -0.238511443188091, -1.1837711276515, 1.13728246296508, 
0.702244681081861, -0.851470541269798, 0.0471820411719659, 0.547952252697306, 
0.527539936397851, 0.247070882010565, -0.562100684713534, -1.05183021003772, 
0.934263969812236, -0.603673312084538, -2.00612207642211, 0.2312103046843, 
-0.214991379754579, 0.282701708464789, 0.289934023279607, 0.567328033965404, 
-0.359157137438815, 0.648221129776207, 0.857904763904759, 0.289415512264559, 
1.06555885899638, 0.333119386976963, -1.46070627726311, 0.0552050036156248
), vfac1 = structure(c(2L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 1L, 4L, 
4L, 3L, 1L, 3L, 1L, 4L, 4L, 4L, 1L, 2L, 2L, 4L, 2L, 4L, 1L, 3L, 
4L, 1L, 2L, 2L, 2L, 1L, 3L, 4L, 1L, 2L, 1L, 3L, 1L, 4L, 2L, 3L, 
2L, 1L, 2L, 2L, 2L, 3L, 4L, 2L), .Label = c("1", "2", "3", "4"
), class = "factor"), vch1 = structure(c(3L, 4L, 5L, 4L, 1L, 
5L, 5L, 3L, 3L, 4L, 1L, 4L, 3L, 5L, 1L, 3L, 4L, 5L, 1L, 3L, 5L, 
2L, 5L, 5L, 1L, 2L, 5L, 5L, 1L, 3L, 4L, 1L, 2L, 2L, 5L, 1L, 4L, 
2L, 1L, 5L, 4L, 4L, 3L, 2L, 5L, 4L, 3L, 2L, 3L, 2L), .Label = c("A", 
"B", "C", "D", "E"), class = "factor")), .Names = c("vnum1", 
"vfac1", "vch1"), class = c("data.table", "data.frame"), row.names = c(NA, 
50L))


mydt[,list(mean=mean(vnum1), sd=sd(vnum1)),list(vfac1, vch1)]
    vfac1 vch1        mean         sd
 1:     2    C  0.52725962 0.54536269
 2:     1    D -1.50197212 0.71297571
 3:     1    E  1.16354778 2.13889714
 4:     2    D  0.22424664 0.31039463
 5:     1    A  0.23359711 1.10743823
 6:     3    E -0.56994386 0.07656659
 7:     2    E  0.29615501 0.67455339
 8:     3    C -0.67394994         NA
 9:     1    C  0.17334177 0.73057650
10:     4    D  0.16974065 0.74408077
11:     4    A -0.56525356         NA
12:     3    D -0.07897854 0.91401552
13:     4    C -0.64065713 1.15972463
14:     4    E -0.03087801 0.67895741
15:     4    B -0.14897461 1.27683063
16:     3    B  0.28487787 0.69502367
17:     2    A -0.27824564 0.46022423
18:     1    B  0.64822113         NA
19:     2    B  0.05520500         NA

I want to create following function where I can send the column names and get above result. However, following function is not working:

myfn = function(ddt, 'vnum1', 'vfac1', 'vch1'){
        mydt[,list(mean=mean('vnum1'), sd=sd('vnum1')),list('vfac1', 'vch1')]
}

How can I send column names (or column vector themselves) so that I can get result from a function? Thanks for your help.

Upvotes: 3

Views: 141

Answers (2)

akrun
akrun

Reputation: 887901

You could try

 myfn <- function(dt, v1, v2, v3){
   dt[, list(mean=mean(eval(as.name(v1))), sd=sd(eval(as.name(v1)))), 
        by=c(deparse(substitute(v2)), deparse(substitute(v3)))]
 }
 myfn(mydt, 'vnum1', vfac1, vch1)
 #   vfac1 vch1        mean         sd
 #1:     2    C  0.52725962 0.54536269
 #2:     1    D -1.50197212 0.71297571
 #3:     1    E  1.16354778 2.13889714
 #4:     2    D  0.22424664 0.31039463
 #5:     1    A  0.23359711 1.10743823
 #6:     3    E -0.56994386 0.07656659
 #7:     2    E  0.29615501 0.67455339
 #8:     3    C -0.67394994         NA
 #9:     1    C  0.17334177 0.73057650
#10:     4    D  0.16974065 0.74408077
#11:     4    A -0.56525356         NA
#12:     3    D -0.07897854 0.91401552
#13:     4    C -0.64065713 1.15972463
#14:     4    E -0.03087801 0.67895741
#15:     4    B -0.14897461 1.27683063
#16:     3    B  0.28487787 0.69502367
#17:     2    A -0.27824564 0.46022423
#18:     1    B  0.64822113         NA
#19:     2    B  0.05520500         NA

Also works when the colum names are changed

 setnames(mydt, names(mydt), letters[1:3])
 head(myfn(mydt, 'a', b, c),2)
 #   b c       mean        sd
 #1: 2 C  0.5272596 0.5453627
 #2: 1 D -1.5019721 0.7129757

Or you can remove the deparse(substitute(.. and pass variables are quoted strings

 myfn <- function(dt, v1, v2, v3){
    dt[, list(mean=mean(eval(as.name(v1))), sd=sd(eval(as.name(v1)))), 
        by=c(v2, v3)]
 }
myfn(mydt, 'vnum1', 'vfac1', 'vch1')

Here is another variant which can take the unquoted string

 myfn <- function(dt, v1, v2, v3){
   args  <- as.list(match.call())
   e1 <- c(deparse(args$v2), deparse(args$v3))
   dt[, .(mean=mean(eval(args$v1)), sd=sd(eval(args$v1))), by=e1]
  }

  head(myfn(mydt, vnum1, vfac1, vch1),2)
  #  vfac1 vch1       mean        sd
  #1:     2    C  0.5272596 0.5453627
  #2:     1    D -1.5019721 0.7129757

Upvotes: 3

David Arenburg
David Arenburg

Reputation: 92300

I think you can simplify this and make it more data.table idiomatic by just using .SD. You also don't need to evaluate within the by statement because data.table won't look for variables within the global environment in order to aggregate by (unlike in j statement). So simply

myfn <- function(dt, v1, v2, v3){
  dt[, .(mean = mean(.SD[[v1]]), sd = sd(.SD[[v1]])), c(v2, v3)]
}
myfn(mydt, "vnum1", "vfac1", "vch1")
##    vfac1 vch1       mean         sd
## 1:     2    C  0.5272596 0.54536269
## 2:     1    D -1.5019721 0.71297571
## 3:     1    E  1.1635478 2.13889714
## 4:     2    D  0.2242466 0.31039463
## 5:     1    A  0.2335971 1.10743823
## 6:     3    E -0.5699439 0.07656659
...

Testing for other column names

setnames(mydt, letters[1:3])
head(myfn(mydt, "a", "b", "c"), 2)
#    b c       mean        sd
# 1: 2 C  0.5272596 0.5453627
# 2: 1 D -1.5019721 0.7129757

Alternatively, you could also use get as in

myfn <- function(dt, v1, v2, v3){
  dt[, .(mean = mean(get(v1)), sd = sd(get(v1))), c(v2, v3)]
}

Though, efficiency wise @akruns eval(as.name()) combinations within the j statement should be the fastest until Arun/Matt will optimise .SD.

Upvotes: 2

Related Questions