Reputation: 145
I want to generate vectors with all possible combinations of vector elements where a consecutive multiple occurrences of an element is considered as single occurrence of that element.
Simple cases
For n = 2,
original <- c("a","a","a","b","b","b")
v1 <- c("b","b","b","a","a","a")
So all unique occurrences of a swap with b.
For n = 3, we get
original<-c("a","a","a","b","b","b","c","c","c")
ver1<-c("a","a","a","c","c","c","b","b","b")
ver2<-c("b","b","b","a","a","a","c","c","c")
ver3<-c("b","b","b","c","c","c","a","a","a")
ver4<-c("c","c","c","b","b","b","a","a","a")
ver5<-c("c","c","c","a","a","a","b","b","b")
So all unique occurrences of a
swap with b
and c
, all unique occurrences of b
swap with a
and c
AND all unique occurrences of c
swap with b
and a
.
The cases go up to n = 10. (I believe the possible vectors with different combinations are 10!)
Also, there can be more than a single chunk of a, b, c...
Complex case
For n = 2;
original<-c("a","a","a","b","b","b","a","a","b","b")
ver1<-c("b","b","b","a","a","a","b","b","a","a")
But if we swap the elements correctly the complex case and simple case should not matter.
What I am trying: (for n=2)
original<-c("a","a","a","b","b","b","a","a","b","b")
ver1<-replace(original,which(original=='a'),'b')
ver1<-replace(ver1,which(original=='b'),'a')
gives ver1<-c("b","b","b","a","a","a","b","b","a","a")
But not sure how to automate this.
Upvotes: 9
Views: 461
Reputation: 101538
Here we made some improvement on the previous answer, where the result is stored in matrix
(instead of list
), and arrangement::permuations
is applied (instead of pracma::perms
(thank recommendation from @Gregor Thomas)
f_TIC2 <- function(x) {
u <- unique(x)
idx <- match(x, u)
n <- max(idx)
m <- matrix(u[perms(1:n)], ncol = n)
matrix(t(m)[c(outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`))], nrow = nrow(m), byrow = TRUE)
}
f_TIC2Arr <- function(x) {
u <- unique(x)
idx <- match(x, u)
n <- max(idx)
m <- matrix(u[permutations(1:n)], ncol = n)
matrix(t(m)[c(outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`))], nrow = nrow(m), byrow = TRUE)
}
and the output looks like
> f_TIC2(c("a", "b", "b", "c", "b"))
[,1] [,2] [,3] [,4] [,5]
[1,] "c" "b" "b" "a" "b"
[2,] "c" "a" "a" "b" "a"
[3,] "b" "c" "c" "a" "c"
[4,] "b" "a" "a" "c" "a"
[5,] "a" "b" "b" "c" "b"
[6,] "a" "c" "c" "b" "c"
> f_TIC2Arr(c("a", "b", "b", "c", "b"))
[,1] [,2] [,3] [,4] [,5]
[1,] "a" "b" "b" "c" "b"
[2,] "a" "c" "c" "b" "c"
[3,] "b" "a" "a" "c" "a"
[4,] "b" "c" "c" "a" "c"
[5,] "c" "a" "a" "b" "a"
[6,] "c" "b" "b" "a" "b"
Here is a benchmark among some of the existing answers (Maël's solution is computational heavy, thus being skipped.)
NB: This benchmark is NOT 100% fair since my improved solutions yield matrices rather than lists, which save a lot of time. Thus, the comparsion is not saying mine is the fastest but indicating the possible approaches to improve the performance.
library(RcppAlgos)
library(arrangements)
library(pracma)
f_TIC1 <- function(x) {
idx <- match(x, unique(x))
m <- asplit(matrix(unique(x)[perms(1:max(idx))], ncol = max(idx)), 1)
Map(`[`, m, list(idx))
}
f_TIC1Arr <- function(x) {
idx <- match(x, unique(x))
m <- asplit(matrix(unique(x)[permutations(1:max(idx))], ncol = max(idx)), 1)
Map(`[`, m, list(idx))
}
f_TIC2 <- function(x) {
u <- unique(x)
idx <- match(x, u)
n <- max(idx)
m <- matrix(u[perms(1:n)], ncol = n)
matrix(t(m)[outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`)], nrow = nrow(m), byrow = TRUE)
}
f_TIC2Arr <- function(x) {
u <- unique(x)
idx <- match(x, u)
n <- max(idx)
m <- matrix(u[permutations(1:n)], ncol = n)
matrix(t(m)[outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`)], nrow = nrow(m), byrow = TRUE)
}
f_GT <- function(x) {
ux <- unique(x)
xi <- as.integer(factor(x))
perm <- permutations(seq_along(ux))
apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}
f_RS <- function(x) {
permuteGeneral(uv <- unique(x), length(uv), FUN = \(m) uv[match(x, m)])
}
set.seed(1)
x <- sample(letters[1:10], 10, replace = TRUE)
bm <- bench::mark(
f_GT = f_GT(x),
f_TIC1 = f_TIC1(x),
f_TIC1Arr = f_TIC1Arr(x),
f_TIC2 = f_TIC2(x),
f_TIC2Arr = f_TIC2Arr(x),
f_RS = f_RS(x),
check = FALSE
)
autoplot(bm)
and you will see
> bm
# A tibble: 6 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
<bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
1 f_GT 11.55ms 15.57ms 58.9 315.14KB 7.06 25 3 425ms
2 f_TIC1 17.05ms 20.8ms 45.5 2.58MB 10.1 18 4 396ms
3 f_TIC1Arr 16.45ms 19.62ms 48.9 1.06MB 13.6 18 5 368ms
4 f_TIC2 2.47ms 3.31ms 259. 3.84MB 28.5 91 10 351ms
5 f_TIC2Arr 1.54ms 1.7ms 469. 2.35MB 26.2 197 11 420ms
6 f_RS 5.66ms 7.46ms 93.9 72.75KB 9.63 39 4 415ms
# ... with 4 more variables: result <list>, memory <list>, time <list>,
# gc <list>
and
You can try pracma::perms
like below
library(pracma)
f <- function(x) {
idx <- match(x, unique(x))
m <- asplit(matrix(unique(x)[perms(1:max(idx))], ncol = max(idx)), 1)
Map(`[`, m, list(idx))
}
and you will see
> f(c("a", "a", "a", "b", "b", "b", "a", "a", "b", "b"))
[[1]]
[1] "b" "b" "b" "a" "a" "a" "b" "b" "a" "a"
[[2]]
[1] "a" "a" "a" "b" "b" "b" "a" "a" "b" "b"
> f(c("a", "b", "b", "c", "b"))
[[1]]
[1] "c" "b" "b" "a" "b"
[[2]]
[1] "c" "a" "a" "b" "a"
[[3]]
[1] "b" "c" "c" "a" "c"
[[4]]
[1] "b" "a" "a" "c" "a"
[[5]]
[1] "a" "b" "b" "c" "b"
[[6]]
[1] "a" "c" "c" "b" "c"
Upvotes: 6
Reputation: 9656
Here is base R solution:
vec <- c("a","a","a","b","b","b","c","c","c") # original vector
els <- unique(vec) # unique elements
pers <- do.call(expand.grid, args=rep(list(els), length(els))) # all permutations
pers <- as.matrix(pers[apply(pers, 1, anyDuplicated) == 0,]) # no repeated cases
colnames(pers) <- els
unname(pers[,vec])
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] "c" "c" "c" "b" "b" "b" "a" "a" "a"
[2,] "b" "b" "b" "c" "c" "c" "a" "a" "a"
[3,] "c" "c" "c" "a" "a" "a" "b" "b" "b"
[4,] "a" "a" "a" "c" "c" "c" "b" "b" "b"
[5,] "b" "b" "b" "a" "a" "a" "c" "c" "c"
[6,] "a" "a" "a" "b" "b" "b" "c" "c" "c"
Upvotes: 2
Reputation: 34441
This answer takes the same general approach to those already posted but uses RcppAlgos::permuteGeneral()
which is not only very fast but also allows functions to be applied to the permutations.
library(RcppAlgos)
f <- function(x) permuteGeneral(uv <- unique(x), length(uv), FUN = \(m) uv[match(x, m)])
f(original)
[[1]]
[1] "a" "a" "a" "b" "b" "b" "c" "c" "c"
[[2]]
[1] "a" "a" "a" "c" "c" "c" "b" "b" "b"
[[3]]
[1] "b" "b" "b" "a" "a" "a" "c" "c" "c"
[[4]]
[1] "c" "c" "c" "a" "a" "a" "b" "b" "b"
[[5]]
[1] "b" "b" "b" "c" "c" "c" "a" "a" "a"
[[6]]
[1] "c" "c" "c" "b" "b" "b" "a" "a" "a"
Upvotes: 5
Reputation: 145775
Here's an approach using the very fast arrangements
package for permutations. We calculate the permutations of integers corresponding to the unique elements of the input and then do some clever indexing to output the corresponding swaps. This is extremely fast on small examples and does pretty well on larger example - on my computer it took a little less than 7 seconds to generate the 10! = 3628800
swaps on input of size 30 with 10 unique elements. The results are conveniently returned in a list
.
library(arrangements)
all_swaps = function(x) {
ux = unique(x)
xi = as.integer(factor(x))
perm = permutations(seq_along(ux))
apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}
Test cases from the question:
# n = 2
all_swaps(c("a","a","a","b","b","b","a","a","b","b"))
# [[1]]
# [1] "a" "a" "a" "b" "b" "b" "a" "a" "b" "b"
#
# [[2]]
# [1] "b" "b" "b" "a" "a" "a" "b" "b" "a" "a"
## n = 3
all_swaps(c("a","a","a","b","b","b","c","c","c"))
# [[1]]
# [1] "a" "a" "a" "b" "b" "b" "c" "c" "c"
#
# [[2]]
# [1] "a" "a" "a" "c" "c" "c" "b" "b" "b"
#
# [[3]]
# [1] "b" "b" "b" "a" "a" "a" "c" "c" "c"
#
# [[4]]
# [1] "b" "b" "b" "c" "c" "c" "a" "a" "a"
#
# [[5]]
# [1] "c" "c" "c" "a" "a" "a" "b" "b" "b"
#
# [[6]]
# [1] "c" "c" "c" "b" "b" "b" "a" "a" "a"
A shorter demo with 3 unique elements in a "complex" case where the elements are not all consecutive:
all_swaps(c("a", "b", "b", "c", "b"))
# [[1]]
# [1] "a" "b" "b" "c" "b"
#
# [[2]]
# [1] "a" "c" "c" "b" "c"
#
# [[3]]
# [1] "b" "a" "a" "c" "a"
#
# [[4]]
# [1] "b" "c" "c" "a" "c"
#
# [[5]]
# [1] "c" "a" "a" "b" "a"
#
# [[6]]
# [1] "c" "b" "b" "a" "b"
A larger case:
# n = 10
set.seed(47)
start_t = Sys.time()
n10 = all_swaps(sample(letters[1:10], size = 30, replace = TRUE))
end_t = Sys.time()
end_t - start_t
# Time difference of 6.711215 secs
length(n10)
# [1] 3628800
Benchmarking my answer with Maël's and ThomasIsCoding's, my method relying on the arrangements
package is quick and memory efficient. ThomasIsCoding's answer can be improved by changing from pracma::perms
to arrangements::permutations
--the memory usage is especially improved--but my version still performs better. Maël's uses a lot of time and memory. I'll lead with results, code to reproduce is below.
## 5 Unique Elements
arrange(b5, desc(`itr/sec`))
# # A tibble: 4 × 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
# 1 GregorThomas 2.31ms 12.6ms 77.5 5.77KB 0 40 0 516ms
# 2 ThomasIsCodingArr(in5) 9.3ms 20.5ms 47.4 19.55KB 0 24 0 506ms
# 3 ThomasIsCoding(in5) 12.57ms 22.7ms 41.2 45.41KB 0 22 0 534ms
# 4 Mael 963.64ms 963.6ms 1.04 1.24MB 0 1 0 964ms
# # … with 4 more variables: result <list>, memory <list>, time <list>, gc <list>
## 9 Unique Elements - memory allocation is important
arrange(b9, desc(`itr/sec`))
# # A tibble: 2 × 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result
# <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list>
# 1 GregorThomas 1.8s 1.8s 0.556 27.7MB 0 1 0 1.8s <NULL>
# 2 ThomasIsCoding(in9) 2.5s 2.5s 0.400 230.8MB 0.400 1 1 2.5s <NULL>
# # … with 3 more variables: memory <list>, time <list>, gc <list>
Benchmarking code:
## Functions
library(arrangements)
library(pracma)
ThomasIsCoding <- function(x) {
idx <- match(x, unique(x))
m <- asplit(matrix(unique(x)[perms(1:max(idx))], ncol = max(idx)), 1)
Map(`[`, m, list(idx))
}
ThomasIsCodingArr <- function(x) {
idx <- match(x, unique(x))
m <- asplit(matrix(unique(x)[permutations(1:max(idx))], ncol = max(idx)), 1)
Map(`[`, m, list(idx))
}
Mael <- function(vec){
uni <- unique(vec)
size <- length(uni)
pVec <- paste(uni, collapse = "")
grid <- expand.grid(rep(list(uni), size))
expanded <- grid[apply(grid, 1, function(x) length(unique(x))) == size,]
p <- unname(apply(expanded, 1, paste0, collapse = ""))
lapply(p, function(x) chartr(pVec, x, vec))
}
all_swaps = function(x) {
ux = unique(x)
xi = as.integer(factor(x))
perm = permutations(seq_along(ux))
apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}
set.seed(47)
in5 = c(sample(letters[1:5], 5), sample(letters[1:5], 5, replace = TRUE))
b5 = bench::mark(
GregorThomas = all_swaps(in5),
Mael = Mael(in5),
ThomasIsCoding(in5),
ThomasIsCodingArr(in5),
check = FALSE
)
Upvotes: 12
Reputation: 52004
Using chartr
, you can do (although this might crash for larger vectors):
f <- function(vec){
uni <- unique(vec)
size <- length(uni)
pVec <- paste(uni, collapse = "")
grid <- expand.grid(rep(list(uni), size))
expanded <- grid[apply(grid, 1, function(x) length(unique(x))) == size,]
p <- unname(apply(ex, 1, paste0, collapse = ""))
lapply(p, function(x) chartr(pVec, x, vec))
}
output:
original<-c("a","a","a","b","b","b","c","c","c")
f(original)
# [[1]]
# [1] "c" "c" "c" "b" "b" "b" "a" "a" "a"
#
# [[2]]
# [1] "b" "b" "b" "c" "c" "c" "a" "a" "a"
#
# [[3]]
# [1] "c" "c" "c" "a" "a" "a" "b" "b" "b"
#
# [[4]]
# [1] "a" "a" "a" "c" "c" "c" "b" "b" "b"
#
# [[5]]
# [1] "b" "b" "b" "a" "a" "a" "c" "c" "c"
#
# [[6]]
# [1] "a" "a" "a" "b" "b" "b" "c" "c" "c"
Previous answer (do not work for n > 2).
Using gtools::permutations
. Results are each columns of the matrix. The idea is to get the permutations from unique values, and the repeat the values to match the desired group length.
f <- function(x){
r <- rle(x)
l <- length(r$values)
apply(gtools::permutations(n=l, r=l, v=r$values), 1, function(x) rep(x, each = unique(r$l)))
}
Upvotes: 5