Reputation: 347
I have the following dataframe:
Person City Ethnicity
A 1 2
B 2 3
C 3 3
D 1 1
E 2 1
F 3 1
G 2 2
H 1 1
I 2 2
J 1 2
K 1 3
L 1 3
M 2 2
I want to have a df with all the possible combinations of 6 people so that the following constraints are satisfied:
Is there a way to do this in R?
Thanks
Data
structure(list(Person = structure(1:13, .Label = c("A", "B",
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"), class = "factor"),
City = c(1L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 1L,
2L), Ethnicity = c(2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
3L, 3L, 2L)), class = "data.frame", row.names = c(NA, -13L
))
A possible combination is A,B,C,D,E,H.
Upvotes: 0
Views: 221
Reputation: 106
You could try using combn
to generate all combinations, then using a few predicate functions to filter out the ones that you want as follows:
# Data
data <- structure(list(
Person = structure(1:13, .Label = c(
"A", "B",
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"
), class = "factor"),
City = c(
1L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 1L,
2L
), Ethnicity = c(
2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
3L, 3L, 2L
)
), class = "data.frame", row.names = c(NA, -13L))
# Helpers
has_all_cities <- function(x, data) {
all_cities <- unique(data$City)
setequal(data[x, ]$City, all_cities)
}
has_ppl_from_city_one <- function(x, data) {
num_ppl_from_city_one <- data[x, ]$City == 1
sum(num_ppl_from_city_one) >= 3 # three or more
}
has_all_ethnicity <- function(x, data) {
all_ethnicities <- unique(data$Ethnicity)
setequal(data[x, ]$Ethnicity, all_ethnicities)
}
satisfy_all_constraints <- function(x, data) {
has_all_cities(x, data) &&
has_ppl_from_city_one(x, data) &&
has_all_ethnicity(x, data)
}
# Main
row.names(data) <- data$Person
y <- combn(data$Person, m = 6)
dim(y)
ind <- apply(y, 2, satisfy_all_constraints, data = data)
res <- y[, ind]
res[, 1:6]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] A A A A A A
# [2,] B B B B B B
# [3,] C C C C C C
# [4,] D D D D D D
# [5,] E E E E F F
# [6,] H J K L H J
# Levels: A B C D E F G H I J K L M
ncol(res)
# 574
# Check requirements
data[res[, 1], ]
# Person City Ethnicity
# A A 1 2
# B B 2 3
# C C 3 3
# D D 1 1
# E E 2 1
# H H 1 1
# No duplicate person
# Has all cities: 1, 2, 3
# Has all ethnicity: 1, 2, 3
# Has at least 3 people from city 1
# Convert into data.frame
df <- as.data.frame(structure(as.character(res), dim = dim(res)))
df[, 1:6]
Upvotes: 3