R: Correctly Specifying Constraints in a Function

Question

I am working with the R programming language.

I have the following data:

library(GA)
library(dplyr)

var_1 = rnorm(1000,10,10)
var_2 = rnorm(1000,5,5)
var_3 = rnorm(1000, 1,1)
goal = rnorm(1000,100,100)

my_data = data.frame(var_1, var_2, var_3, goal)

I wrote the following function that randomly splits this data into 3 different groups and evaluates a "fitness value" ("total_mean") of these groups based on the percentage of the data within each group that is less than some randomly assigned percentile:

#define fitness function
fitness <- function(x) {

x1 = x[1]
x2 = x[2]
x3 = x[3]
x4 = x[4]
x5 = x[5]
x6 = x[6]
x7 = x[7]
x8 = x[8]
x9 = x[9]

    #bin data according to random criteria
    train_data <- my_data %>% mutate(cat = ifelse(var_1 <= x1 & var_2 <= x2 & var_3 <= x3, "a", ifelse( var_1 <= x4 &  var_2  <= x5 & var_3 <= x6, "b", "c")))
   
    train_data$cat = as.factor(train_data$cat)
   
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    c_table = train_data %>%
        filter(cat == "c") %>%
       select(var_1, var_2, var_3, goal, cat)
   
    x7 =  runif(1,0, 1)
    x8 =  runif(1, 0, 1)
    x9 =  runif(1, 0, 1)
   
    #calculate  quantile ("quant") for each bin
   
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x7)))
   
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x8)))
   
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x9)))
   
    #create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
    table_a$diff = ifelse(table_a$quant > table_a$goal,1,0)
    table_b$diff = ifelse(table_b$quant > table_b$goal,1,0)
    table_c$diff = ifelse(table_c$quant > table_c$goal,1,0)
   
    #group all tables
   
    final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
    total_mean = mean(final_table$diff)

n_row_a = nrow(table_a)
n_row_b = nrow(table_b)
n_row_c = nrow(table_c)

return(total_mean)


}

I was able to then optimize this function using the Genetic Algorithm in R:

GA <- ga(type = "real-valued",
         fitness = fitness,
         lower = c(min(var_1), min(var_2), min(var_3), min(var_1), min(var_2), min(var_3), 0,0,0), upper = c(max(var_1), max(var_2), max(var_3), max(var_1), max(var_2), max(var_3), 1,1,1),
         popSize = 50, maxiter = 10, run = 10)

My Question: I would now like to add some "constraints" to this function that prevents the arguments of this function from taking certain values and also prevents the splits made by this function having 0 rows. My logic being that these constraints will work by assigning the returned value of the function as "NaN":

if (n_row_a < 1 | n_row_b < 1 | n_row_c <1 | x4 < x1 | x5 < x2 | x6 < x3){

total_mean <- NaN

}

I tried to add these constraints to the above function:

#define fitness function
fitness <- function(x) {

x1 = x[1]
x2 = x[2]
x3 = x[3]
x4 = x[4]
x5 = x[5]
x6 = x[6]
x7 = x[7]
x8 = x[8]
x9 = x[9]

    #bin data according to random criteria
    train_data <- my_data %>% mutate(cat = ifelse(var_1 <= x1 & var_2 <= x2 & var_3 <= x3, "a", ifelse( var_1 <= x4 &  var_2  <= x5 & var_3 <= x6, "b", "c")))


   
    train_data$cat = as.factor(train_data$cat)
   
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    c_table = train_data %>%
        filter(cat == "c") %>%
       select(var_1, var_2, var_3, goal, cat)
   
    x7 =  runif(1,0, 1)
    x8 =  runif(1, 0, 1)
    x9 =  runif(1, 0, 1)
   
    #calculate  quantile ("quant") for each bin
   
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x7)))
   
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x8)))
   
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x9)))
   
   
   
   
    #create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
    table_a$diff = ifelse(table_a$quant > table_a$goal,1,0)
    table_b$diff = ifelse(table_b$quant > table_b$goal,1,0)
    table_c$diff = ifelse(table_c$quant > table_c$goal,1,0)
   
    #group all tables
   
    final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
    total_mean = mean(final_table$diff)

n_row_a = nrow(table_a)
n_row_b = nrow(table_b)
n_row_c = nrow(table_c)



if (n_row_a < 1 | n_row_b < 1 | n_row_c <1 | x4 < x1 | x5 < x2 | x6 < x3){

total_mean <- NaN

}
   return(total_mean)
}

My Problem: However, now the constraints do not seem to be respected:

GA <- ga(type = "real-valued",
         fitness = fitness,
         lower = c(min(var_1), min(var_2), min(var_3), min(var_1), min(var_2), min(var_3), 0,0,0), upper = c(max(var_1), max(var_2), max(var_3), max(var_1), max(var_2), max(var_3), 1,1,1),
         popSize = 50, maxiter = 1000, run = 100)

# output

> GA@solution
     x1   x2  x3 x4 x5   x6   x7   x8   x9
[1,] 24 -5.3 4.4 38 12 -1.6 0.88 0.23 0.99
[2,] 21 -5.3 4.4 38 12 -1.6 0.88 0.23 0.99

As we can see here, X6 is less than X3 - it appears that these constraints were not respected. These solutions should have never been returned.

Can someone please show me how to correctly specify these constraints in my function?

Note: You can check to see whether all the solutions obey the constraints at once:

test = data.frame(GA@population)

test$constraint = ifelse(test$X4 > test$X1 & test$X5 > test$X2 & test$X6 > test$X3, "YES", "NO")

table(test$constraint)

R: Correctly Specifying Constraints in a Function

Answers (1)

Related Questions