Pietro
Pietro

Reputation: 347

R - subset Dataframe into all possible combinations with contraints

I have the following dataframe:

Person     City     Ethnicity
A            1          2
B            2          3
C            3          3
D            1          1
E            2          1 
F            3          1
G            2          2
H            1          1
I            2          2 
J            1          2
K            1          3 
L            1          3
M            2          2

I want to have a df with all the possible combinations of 6 people so that the following constraints are satisfied:

Is there a way to do this in R?

Thanks


Data

structure(list(Person = structure(1:13, .Label = c("A", "B", 
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"), class = "factor"), 
    City = c(1L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 1L, 
    2L), Ethnicity = c(2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 
    3L, 3L, 2L)), class = "data.frame", row.names = c(NA, -13L
))

A possible combination is A,B,C,D,E,H.

Upvotes: 0

Views: 221

Answers (1)

Jackson
Jackson

Reputation: 106

You could try using combn to generate all combinations, then using a few predicate functions to filter out the ones that you want as follows:

# Data
data <- structure(list(
  Person = structure(1:13, .Label = c(
    "A", "B",
    "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"
  ), class = "factor"),
  City = c(
    1L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 1L,
    2L
  ), Ethnicity = c(
    2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
    3L, 3L, 2L
  )
), class = "data.frame", row.names = c(NA, -13L))


# Helpers
has_all_cities <- function(x, data) {
  all_cities <- unique(data$City)
  setequal(data[x, ]$City, all_cities)
}

has_ppl_from_city_one <- function(x, data) {
  num_ppl_from_city_one <- data[x, ]$City == 1
  sum(num_ppl_from_city_one) >= 3  # three or more
}

has_all_ethnicity <- function(x, data) {
  all_ethnicities <- unique(data$Ethnicity)
  setequal(data[x, ]$Ethnicity, all_ethnicities)
}

satisfy_all_constraints <- function(x, data) {
    has_all_cities(x, data) && 
        has_ppl_from_city_one(x, data) &&
        has_all_ethnicity(x, data)
} 


# Main
row.names(data) <- data$Person

y <- combn(data$Person, m = 6)
dim(y)

ind <- apply(y, 2, satisfy_all_constraints, data = data)
res <- y[, ind]
res[, 1:6]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] A    A    A    A    A    A   
# [2,] B    B    B    B    B    B   
# [3,] C    C    C    C    C    C   
# [4,] D    D    D    D    D    D   
# [5,] E    E    E    E    F    F   
# [6,] H    J    K    L    H    J   
# Levels: A B C D E F G H I J K L M
ncol(res)
# 574

# Check requirements
data[res[, 1], ]
#    Person City Ethnicity
# A      A    1         2
# B      B    2         3
# C      C    3         3
# D      D    1         1
# E      E    2         1
# H      H    1         1

# No duplicate person
# Has all cities: 1, 2, 3 
# Has all ethnicity: 1, 2, 3
# Has at least 3 people from city 1


# Convert into data.frame
df <- as.data.frame(structure(as.character(res), dim = dim(res)))
df[, 1:6]

Upvotes: 3

Related Questions