Reputation: 367
Any help with this problem I'm having would be greatly appreciated. I am a moderately advanced R programmer, but so far all my solutions have failed me. I start with the logic behind what I am trying to do, followed by my attempt, followed by the test cases. I tried to be as explicit as possible.
I should probably mention that I sort of know what the problem is, but I don't know what the solution is.
# sqldf has some limitations:
cpaste <- function(x) paste(x, collapse = ", ")
dd <- data.frame(a = 1:10)
b <- 5:8
# this is what I want to get
sqldf("select * from dd where a in (5, 6, 7, 8)")
# but I want to get it by typing this
sqldf(sprintf("select * from %s where a in (%s)", dd, b)) # error
# and it doesn't work, because this is what sprintf expects:
sqldf(sprintf("select * from %s where a in (%s)", "dd", paste(b, collapse = ", ")))
# in other words,
# (1) the name of data frame, not the data frame itself, and
# (2) the vector must be turned into a single string with comma separated values
# I wrote a wrapper function for sqldf
# it uses sprintf to create the sql string that I need to feed to sqldf
# but before doing that it does (1) and (2) as mentioned above
# so I can do this and it would work:
run_sql("select * from %s where a in (%s)", dd, b)
# it works until I try running it inside another funciton
# where I start running into some problems
# here's the function, followed by test cases
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(eval(x))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(eval(x))) {
cpaste(eval(x)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(eval(x))) cpaste(eval(x)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode)
} else {
zcode
}
}
dd <- data.frame(a = 1:10)
b <- 5:8
run_sql("select * from %s where a > %s", dd, 5)
run_sql("select * from %s where a in (%s)", dd, b)
# it works when the function uses variables in .GlobalEnv
# but this is not the preferred way:
foo <- function() {
run_sql("select * from %s where a in (%s)", dd, b)
}
foo()
# here's the preferred way
# but things stop working:
foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
# here's one solution to the above, but I am hoping there's a better way
foo <- function(x, y) {
do.call(run_sql, list("select * from %s where a in (%s)",
substitute(x),
substitute(y)))
}
foo(dd, b)
# also, the above solution does not work with local variables
foo <- function() {
bb <- dd
do.call(run_sql, list("select * from %s where a in (%s)",
bb,
substitute(y)))
}
foo()
Upvotes: 0
Views: 198
Reputation: 1164
@G. Grothendieck's solution will probably be easier for many readers of this post. That said, I think you can fix your function by identifying the parent environment of the call to run_sql
, then using envir=
to specify that environment anytime you call a function that depends on environments -- specifically, eval()
and sqldf()
. Like so:
cpaste <- function(x) paste(x, collapse = ", ")
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(eval(x, envir=envir))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(eval(x, envir=envir))) {
cpaste(eval(x, envir=envir)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(eval(x, envir=envir))) cpaste(eval(x, envir=envir)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode, envir=envir)
} else {
zcode
}
}
This works in your test case using x and y:
foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
And, with some tweaking, in a test case using do.call
and local variables:
foo <- function(y) {
bb <- dd
do.call(run_sql, list("select * from %s where a in (%s)",
as.name("bb"),
substitute(y),
envir=environment()))
}
foo(b)
To understand the problem with your original function, and to understand what environment was visible to eval()
each time it was called, I wrote a function called enveval
to wrap several sys.xxx
functions and the eval()
call. Then, back in the run_sql
function, I replaced all calls to eval
with calls to enveval
.
# enveval: Replace an eval() call with enveval() to see a description of the stack of environments experienced by eval()
enveval <- function(x, envir=parent.frame()) {
cat(paste0("EVALUATING ",as.character(x),":\n"))
stack <- data.frame(frame_num=1:sys.nframe(), call=strtrim(as.character(sys.calls()),15),
is_eval_envir=NA, vars_in_frame=NA, x_exists=NA, eval_x=NA)
for(i in 1:nrow(stack)) {
f <- which(i==stack$frame_num)
stack[f,"is_eval_envir"] <- identical(envir,sys.frame(f))
stack[f,"vars_in_frame"] <- paste(ls(envir=sys.frame(f)),collapse=",")
stack[f,"x_exists"] <- exists(as.character(x), where=sys.frame(f))
if(stack[f,"is_eval_envir"] & stack[f,"x_exists"]) {
# if all the variables to evaluate are single-element atomic, you can also run the following line:
if(is.atomic(eval(x, envir=sys.frame(f)))) {
stack[f,"eval_x"] <- cpaste(eval(x, envir=sys.frame(f)))
} else {
stack[f,"eval_x"] <- "[non-atomic]"
}
}
}
print(stack)
eval(x, envir=envir)
}
# The new run_sql where eval is replaced with enveval:
run_sql <- function(zcode = NULL, ..., display = TRUE, eval = TRUE, envir=parent.frame()) {
require(sqldf)
ellipsis <- as.list(match.call(expand.dots = TRUE))
ellipsis[1] <- NULL
ellipsis$inline <- NULL
ellipsis$display <- NULL
ellipsis$eval <- NULL
# print(ellipsis)
# print(lapply(ellipsis, class))
ffn <- function(x) {
if (is.name(x)) { # the argument passed is itself a variable
if (is.data.frame(enveval(x, envir=envir))) {
as.character(x) # returns just the name of the data frame
} else if (is.atomic(enveval(x, envir=envir))) {
cpaste(enveval(x, envir=envir)) # return the atomic vector as comma-sep string
} else "_____FAIL1_____"
} else if (is.call(x)) { # the argument passed is a function call, eg 2:4
if (is.atomic(enveval(x, envir=envir))) cpaste(enveval(x, envir=envir)) else "_____FAIL2_____"
} else {
if (is.atomic(x)) cpaste(x) else "_____FAIL3_____"
}
}
ellipsis <- lapply(ellipsis, ffn)
zcode <- do.call(sprintf, unname(ellipsis))
if (display == TRUE) cat(paste0(zcode, "\n\n"))
if (eval == TRUE) {
sqldf(zcode, envir=envir)
} else {
zcode
}
}
Playing around with the test cases shows you what enveval
sees (and what eval
would have seen) each time it's called. For example, running the first test function:
foo <- function(x, y) {
run_sql("select * from %s where a in (%s)", x, y)
}
foo(dd, b)
gave the following print-out showing that the frame for the foo(dd, b)
call was the useful environment for every call to eval()
:
EVALUATING x:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE [non-atomic]
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[2]], ... FALSE x TRUE <NA>
5 5 is.data.frame(e FALSE x TRUE <NA>
6 6 enveval(x, envi FALSE envir,f,i,stack,x TRUE <NA>
EVALUATING y:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE 5, 6, 7, 8
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[3]], ... FALSE x FALSE <NA>
5 5 is.data.frame(e FALSE x FALSE <NA>
6 6 enveval(x, envi FALSE envir,f,i,stack,x FALSE <NA>
EVALUATING y:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE 5, 6, 7, 8
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[3]], ... FALSE x FALSE <NA>
5 5 enveval(x, envi FALSE envir,f,i,stack,x FALSE <NA>
EVALUATING y:
frame_num call is_eval_envir vars_in_frame x_exists eval_x
1 1 foo(dd, b) TRUE x,y TRUE 5, 6, 7, 8
2 2 run_sql("select FALSE display,ellipsis,envir,eval,ffn,zcode FALSE <NA>
3 3 lapply(ellipsis FALSE FUN,X FALSE <NA>
4 4 FUN(X[[3]], ... FALSE x FALSE <NA>
5 5 cpaste(enveval( FALSE x FALSE <NA>
6 6 paste(x, collap FALSE collapse,sep FALSE <NA>
7 7 enveval(x, envi FALSE envir,f,i,stack,x FALSE <NA>
select * from x where a in (5, 6, 7, 8)
a
1 5
2 6
3 7
4 8
Upvotes: 2
Reputation: 269441
The functionality you are looking for already exists in the gsubfn package which is automatically pulled in by sqldf. See Example 5 on the sqldf home page for the way to do this and look at the vignette from the gsubfn package for even more information on fn
. In terms of the present examples:
dd <- data.frame(a = 1:10)
ddname <- "dd"
b <- 5
fn$sqldf("select * from $ddname where a > $b")
b <- 5:8
fn$sqldf("select * from $ddname where a in (`toString(b)`)")
Upvotes: 2