stats_noob
stats_noob

Reputation: 5897

R Error: Optimization resulting in "unused argument(x)"

I am working with the R programming language. I defined the following function and I am trying to perform the "random search" algorithm on this function.

First, I loaded the library:

#load library : https://cran.r-project.org/web/packages/randomsearch/index.html 

library(randomsearch)

Then, I defined the function:

# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,10)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)


#define function (4 inputs x[1], x[2], x[3], x[4] and 4 outputs f1, f2, f3, f4)

fn <- function(i) {
    x1 <- x[i,1]; x2 <- x[i,2]; x3 <- x[i,3] ; x4 <- x[i,4]
    f <- numeric(4)
    #bin data according to random criteria
    train_data <- train_data %>%
        mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
                            ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
    
    train_data$cat = as.factor(train_data$cat)
    
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(a1, b1, c1, cat)
    
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(a1, b1, c1, cat)
    
    c_table = train_data %>%
        filter(cat == "c") %>%
        select(a1, b1, c1, cat)
    
    
    #calculate  quantile ("quant") for each bin
    
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 150,1,0 )))
    
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 300,1,0 )))
    
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 400,1,0 )))
    
    f1 = mean(table_a$quant)
    f2 = mean(table_b$quant)
    f3 = mean(table_c$quant)
    
    
    #group all tables
    
    final_table = rbind(table_a, table_b, table_c)
    # calculate the total mean : this is what needs to be optimized
    
    f4 = mean(final_table$quant)
    
    #add some constraints
    if((x3 - x1) < 0. | (x4 - x2) < 0.) {
        f[1] <- NaN
        f[2] <- NaN
        f[3] <- NaN
        f[4] <- NaN
        
    }
    
    return (f)
}

Finally, I tried to run the "random search" algorithm on this function:

#run algorithm
res = randomsearch(fn, lower = c(80, 80, 80, 80), upper = c(100,120,100,120), minimize = c(TRUE, TRUE, TRUE, TRUE), max.evals = 30)
rs = summary(res)

But this resulted in the following error:

Error in fun(x, ...) : unused argument (x)

Does anyone know why this error is being produced? Is it related to the way I have defined the function "fn"?

Thanks

Upvotes: 0

Views: 788

Answers (2)

Sam Rogers
Sam Rogers

Reputation: 797

As I mentioned in the comments, I'm not entirely sure what you're aiming for, or what problem you're attempting to solve, so all I've done with this really is optimise the code that you've already written. It's entirely possible that I would write very similar code if I was doing this myself from scratch, or it may be very different.

I have added a commented version so you can see what I've done, and also a reduced version with the unneeded code removed. Depending on your ultimate objectives, it's probably possible to still reduce the number of lines of the minimal version, and likely possible to speed it up too, but they may not be important considerations.

One basic principle I've used in this case is that there's no point running code that is not necessary or saving things that are not used.

Commented version

library(randomsearch)
library(tidyverse)

# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,10)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)

fun_2  <- function(x) {
    # x1 <- x[1]    # Storing these as additional variables doesn't help at all
    # x2 <- x[2]    # They are only used to bin the data
    # x3 <- x[3] 
    # x4 <- x[4] 
    
    #bin data according to random criteria
    train_data <- train_data %>%
        mutate(cat = ifelse(a1 <= x[1] & b1 <= x[3], "a",
                            ifelse(a1 <= x[2] & b1 <= x[4], "b", "c")))
    
    train_data$cat = as.factor(train_data$cat)
    
    #new splits
    a_table = train_data %>%
        filter(cat == "a") #%>%
    # select(a1, b1, c1, cat) # There are no other columns to select, so this is not needed
    
    b_table = train_data %>%
        filter(cat == "b") #%>%
    # select(a1, b1, c1, cat) 
    
    c_table = train_data %>%
        filter(cat == "c") #%>%
    # select(a1, b1, c1, cat)
    
    #calculate  quantile ("quant") for each bin
    # table_a = data.frame(a_table %>% group_by(cat)    # We don't need another variable for this
                         # %>% mutate(quant = ifelse(c1 > 200,1,0 )))   
    a_table$quant = ifelse(a_table$c1 > 200, 1, 0)  # It can also be simlpified
    
    # table_b = data.frame(b_table%>% group_by(cat) %>%
                             # mutate(quant = ifelse(c1 > 300,1,0 )))
    b_table$quant = ifelse(b_table$c1 > 300, 1, 0)
    
    # table_c = data.frame(c_table%>% group_by(cat) %>%
                             # mutate(quant = ifelse(c1 > 400,1,0 )))
    c_table$quant = ifelse(c_table$c1 > 400, 1, 0)
    
    # f1 = mean(a_table$quant)
    # f2 = mean(b_table$quant)
    # f3 = mean(c_table$quant)

    #group all tables
    
    # final_table = rbind(table_a, table_b, table_c)    # This is not used
    # calculate the total mean : this is what needs to be optimized
    
    # f4 = mean(final_table$quant)  # This is not used
    
    return(c(mean(a_table$quant), mean(b_table$quant), mean(c_table$quant)))
}

Reduced version

fun_2  <- function(x) {
    #bin data according to random criteria
    train_data <- train_data %>%
        mutate(cat = factor(ifelse(a1 <= x[1] & b1 <= x[3], "a",
                            ifelse(a1 <= x[2] & b1 <= x[4], "b", "c"))))
    
    train_data$cat = as.factor(train_data$cat)
    
    #new splits
    a_table = train_data %>% filter(cat == "a") 
    b_table = train_data %>% filter(cat == "b") 
    c_table = train_data %>% filter(cat == "c") 
    
    #calculate  quantile ("quant") for each bin
    a_table$quant = ifelse(a_table$c1 > 200, 1, 0) 
    b_table$quant = ifelse(b_table$c1 > 300, 1, 0)
    c_table$quant = ifelse(c_table$c1 > 400, 1, 0)
 
    return(c(mean(a_table$quant), mean(b_table$quant), mean(c_table$quant)))
}

They unfortunately don't produce identical results, but I believe this is due to the random nature of the search, rather than an error in the code.

Hope that helps. I'll have a look at your other question when I get some time.

Upvotes: 0

stats_noob
stats_noob

Reputation: 5897

The following code works, but I couldn't get the constraints to work:

library(randomsearch)

# create some data for this example
a1 = rnorm(1000,100,10)
b1 = rnorm(1000,100,10)
c1 = sample.int(1000, 1000, replace = TRUE)
train_data = data.frame(a1,b1,c1)

fun_1  <- function(x) {
    x1 <- x[1]
    x2 <- x[2]
    x3 <- x[3] 
    x4 <- x[4] 
    
    
    #bin data according to random criteria
    train_data <- train_data %>%
        mutate(cat = ifelse(a1 <= x1 & b1 <= x3, "a",
                            ifelse(a1 <= x2 & b1 <= x4, "b", "c")))
    
    train_data$cat = as.factor(train_data$cat)
    
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(a1, b1, c1, cat)
    
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(a1, b1, c1, cat)
    
    c_table = train_data %>%
        filter(cat == "c") %>%
        select(a1, b1, c1, cat)
    
    
    #calculate  quantile ("quant") for each bin
    
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 200,1,0 )))
    
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 300,1,0 )))
    
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = ifelse(c1 > 400,1,0 )))
    
    f1 = mean(table_a$quant)
    f2 = mean(table_b$quant)
    f3 = mean(table_c$quant)
    
    
    #group all tables
    
    final_table = rbind(table_a, table_b, table_c)
    # calculate the total mean : this is what needs to be optimized
    
    f4 = mean(final_table$quant)
    
    
    
    return(c(f1, f2,f3))
}
    

res = randomsearch(fun_1, lower = c(90, 100, 90, 100), upper = c(100,120,100,120), minimize = c(TRUE, TRUE,TRUE), max.evals = 30)
rs = summary(res)

Now, view the results:

> head(rs)
$pareto.front
         y_1       y_2       y_3       x1       x2       x3       x4
1  0.7605634 0.6851628 0.6400000 91.12101 114.1228 96.77341 117.0649
4  0.7611940 0.6974249 0.5867238 90.16010 110.6879 99.06183 103.1964
5  0.7631579 0.6996337 0.5863570 93.49183 103.2529 90.54579 100.0828
8  0.7804878 0.7196653 0.5791667 93.37388 101.6645 91.03374 100.0605
9  0.7878788 0.6862745 0.5936842 92.85005 106.4595 94.16650 105.3454
14 0.7884615 0.6828423 0.6010782 94.17298 106.6873 91.62018 109.2036

@Sam Rogers: I would be curious to see what you had in mind?

Thanks!

Upvotes: 1

Related Questions