Reputation: 145

Generate all combinations of vector with consecutive occurrences is considered as single occurrence

I want to generate vectors with all possible combinations of vector elements where a consecutive multiple occurrences of an element is considered as single occurrence of that element.

Simple cases

For n = 2,

original <- c("a","a","a","b","b","b")
      v1 <- c("b","b","b","a","a","a")

So all unique occurrences of a swap with b.

For n = 3, we get

original<-c("a","a","a","b","b","b","c","c","c")
    ver1<-c("a","a","a","c","c","c","b","b","b")
    ver2<-c("b","b","b","a","a","a","c","c","c")
    ver3<-c("b","b","b","c","c","c","a","a","a")
    ver4<-c("c","c","c","b","b","b","a","a","a")
    ver5<-c("c","c","c","a","a","a","b","b","b")

So all unique occurrences of a swap with b and c, all unique occurrences of b swap with a and c AND all unique occurrences of c swap with b and a.

The cases go up to n = 10. (I believe the possible vectors with different combinations are 10!)

Also, there can be more than a single chunk of a, b, c...

Complex case

For n = 2;

original<-c("a","a","a","b","b","b","a","a","b","b")
    ver1<-c("b","b","b","a","a","a","b","b","a","a")

But if we swap the elements correctly the complex case and simple case should not matter.

What I am trying: (for n=2)

original<-c("a","a","a","b","b","b","a","a","b","b")
ver1<-replace(original,which(original=='a'),'b')
ver1<-replace(ver1,which(original=='b'),'a')
gives ver1<-c("b","b","b","a","a","a","b","b","a","a")

But not sure how to automate this.

Upvotes: 9

Answers (5)

ThomasIsCoding

Reputation: 101538

Update

Here we made some improvement on the previous answer, where the result is stored in matrix (instead of list), and arrangement::permuations is applied (instead of pracma::perms (thank recommendation from @Gregor Thomas)

f_TIC2 <- function(x) {
  u <- unique(x)
  idx <- match(x, u)
  n <- max(idx)
  m <- matrix(u[perms(1:n)], ncol = n)
  matrix(t(m)[c(outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`))], nrow = nrow(m), byrow = TRUE)
}

f_TIC2Arr <- function(x) {
  u <- unique(x)
  idx <- match(x, u)
  n <- max(idx)
  m <- matrix(u[permutations(1:n)], ncol = n)
  matrix(t(m)[c(outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`))], nrow = nrow(m), byrow = TRUE)
}

and the output looks like

> f_TIC2(c("a", "b", "b", "c", "b"))
     [,1] [,2] [,3] [,4] [,5]
[1,] "c"  "b"  "b"  "a"  "b"
[2,] "c"  "a"  "a"  "b"  "a"
[3,] "b"  "c"  "c"  "a"  "c"
[4,] "b"  "a"  "a"  "c"  "a"
[5,] "a"  "b"  "b"  "c"  "b"
[6,] "a"  "c"  "c"  "b"  "c"

> f_TIC2Arr(c("a", "b", "b", "c", "b"))
     [,1] [,2] [,3] [,4] [,5]
[1,] "a"  "b"  "b"  "c"  "b"
[2,] "a"  "c"  "c"  "b"  "c"
[3,] "b"  "a"  "a"  "c"  "a"
[4,] "b"  "c"  "c"  "a"  "c"
[5,] "c"  "a"  "a"  "b"  "a"
[6,] "c"  "b"  "b"  "a"  "b"

Benchmarking

Here is a benchmark among some of the existing answers (Maël's solution is computational heavy, thus being skipped.)

NB: This benchmark is NOT 100% fair since my improved solutions yield matrices rather than lists, which save a lot of time. Thus, the comparsion is not saying mine is the fastest but indicating the possible approaches to improve the performance.

library(RcppAlgos)
library(arrangements)
library(pracma)
f_TIC1 <- function(x) {
  idx <- match(x, unique(x))
  m <- asplit(matrix(unique(x)[perms(1:max(idx))], ncol = max(idx)), 1)
  Map(`[`, m, list(idx))
}
f_TIC1Arr <- function(x) {
  idx <- match(x, unique(x))
  m <- asplit(matrix(unique(x)[permutations(1:max(idx))], ncol = max(idx)), 1)
  Map(`[`, m, list(idx))
}
f_TIC2 <- function(x) {
  u <- unique(x)
  idx <- match(x, u)
  n <- max(idx)
  m <- matrix(u[perms(1:n)], ncol = n)
  matrix(t(m)[outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`)], nrow = nrow(m), byrow = TRUE)
}

f_TIC2Arr <- function(x) {
  u <- unique(x)
  idx <- match(x, u)
  n <- max(idx)
  m <- matrix(u[permutations(1:n)], ncol = n)
  matrix(t(m)[outer(idx, (0:(nrow(m) - 1)) * ncol(m), `+`)], nrow = nrow(m), byrow = TRUE)
}

f_GT <- function(x) {
  ux <- unique(x)
  xi <- as.integer(factor(x))
  perm <- permutations(seq_along(ux))
  apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}

f_RS <- function(x) {
  permuteGeneral(uv <- unique(x), length(uv), FUN = \(m) uv[match(x, m)])
}

set.seed(1)
x <- sample(letters[1:10], 10, replace = TRUE)

bm <- bench::mark(
  f_GT = f_GT(x),
  f_TIC1 = f_TIC1(x),
  f_TIC1Arr = f_TIC1Arr(x),
  f_TIC2 = f_TIC2(x),
  f_TIC2Arr = f_TIC2Arr(x),
  f_RS = f_RS(x),
  check = FALSE
)
autoplot(bm)

and you will see

> bm
# A tibble: 6 x 13
  expression     min  median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
  <bch:expr> <bch:t> <bch:t>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
1 f_GT       11.55ms 15.57ms      58.9  315.14KB     7.06    25     3      425ms
2 f_TIC1     17.05ms  20.8ms      45.5    2.58MB    10.1     18     4      396ms
3 f_TIC1Arr  16.45ms 19.62ms      48.9    1.06MB    13.6     18     5      368ms
4 f_TIC2      2.47ms  3.31ms     259.     3.84MB    28.5     91    10      351ms
5 f_TIC2Arr   1.54ms   1.7ms     469.     2.35MB    26.2    197    11      420ms
6 f_RS        5.66ms  7.46ms      93.9   72.75KB     9.63    39     4      415ms
# ... with 4 more variables: result <list>, memory <list>, time <list>,
#   gc <list>

and

Previous Answer

You can try pracma::perms like below

library(pracma)
f <- function(x) {
  idx <- match(x, unique(x))
  m <- asplit(matrix(unique(x)[perms(1:max(idx))], ncol = max(idx)), 1)
  Map(`[`, m, list(idx))
}

and you will see

> f(c("a", "a", "a", "b", "b", "b", "a", "a", "b", "b"))
[[1]]
 [1] "b" "b" "b" "a" "a" "a" "b" "b" "a" "a"

[[2]]
 [1] "a" "a" "a" "b" "b" "b" "a" "a" "b" "b"


> f(c("a", "b", "b", "c", "b"))
[[1]]
[1] "c" "b" "b" "a" "b"

[[2]]
[1] "c" "a" "a" "b" "a"

[[3]]
[1] "b" "c" "c" "a" "c"

[[4]]
[1] "b" "a" "a" "c" "a"

[[5]]
[1] "a" "b" "b" "c" "b"

[[6]]
[1] "a" "c" "c" "b" "c"

Upvotes: 6

Karolis Koncevičius

Reputation: 9656

Here is base R solution:

vec <- c("a","a","a","b","b","b","c","c","c")  # original vector
els <- unique(vec)                             # unique elements

pers <- do.call(expand.grid, args=rep(list(els), length(els)))  # all permutations
pers <- as.matrix(pers[apply(pers, 1, anyDuplicated) == 0,])    # no repeated cases
colnames(pers) <- els

unname(pers[,vec])

     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] "c"  "c"  "c"  "b"  "b"  "b"  "a"  "a"  "a"
[2,] "b"  "b"  "b"  "c"  "c"  "c"  "a"  "a"  "a"
[3,] "c"  "c"  "c"  "a"  "a"  "a"  "b"  "b"  "b"
[4,] "a"  "a"  "a"  "c"  "c"  "c"  "b"  "b"  "b"
[5,] "b"  "b"  "b"  "a"  "a"  "a"  "c"  "c"  "c"
[6,] "a"  "a"  "a"  "b"  "b"  "b"  "c"  "c"  "c"

Upvotes: 2

lroha

Reputation: 34441

This answer takes the same general approach to those already posted but uses RcppAlgos::permuteGeneral() which is not only very fast but also allows functions to be applied to the permutations.

library(RcppAlgos)

f <- function(x) permuteGeneral(uv <- unique(x), length(uv), FUN = \(m) uv[match(x, m)])

f(original)
[[1]]
[1] "a" "a" "a" "b" "b" "b" "c" "c" "c"

[[2]]
[1] "a" "a" "a" "c" "c" "c" "b" "b" "b"

[[3]]
[1] "b" "b" "b" "a" "a" "a" "c" "c" "c"

[[4]]
[1] "c" "c" "c" "a" "a" "a" "b" "b" "b"

[[5]]
[1] "b" "b" "b" "c" "c" "c" "a" "a" "a"

[[6]]
[1] "c" "c" "c" "b" "b" "b" "a" "a" "a"

Upvotes: 5

Gregor Thomas

Reputation: 145775

Here's an approach using the very fast arrangements package for permutations. We calculate the permutations of integers corresponding to the unique elements of the input and then do some clever indexing to output the corresponding swaps. This is extremely fast on small examples and does pretty well on larger example - on my computer it took a little less than 7 seconds to generate the 10! = 3628800 swaps on input of size 30 with 10 unique elements. The results are conveniently returned in a list.

library(arrangements)

all_swaps = function(x) {
  ux = unique(x)
  xi = as.integer(factor(x))
  perm = permutations(seq_along(ux))
  apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}

Test cases from the question:

# n = 2
all_swaps(c("a","a","a","b","b","b","a","a","b","b"))
# [[1]]
#  [1] "a" "a" "a" "b" "b" "b" "a" "a" "b" "b"
# 
# [[2]]
#  [1] "b" "b" "b" "a" "a" "a" "b" "b" "a" "a"

## n = 3
all_swaps(c("a","a","a","b","b","b","c","c","c"))
# [[1]]
# [1] "a" "a" "a" "b" "b" "b" "c" "c" "c"
# 
# [[2]]
# [1] "a" "a" "a" "c" "c" "c" "b" "b" "b"
# 
# [[3]]
# [1] "b" "b" "b" "a" "a" "a" "c" "c" "c"
# 
# [[4]]
# [1] "b" "b" "b" "c" "c" "c" "a" "a" "a"
# 
# [[5]]
# [1] "c" "c" "c" "a" "a" "a" "b" "b" "b"
# 
# [[6]]
# [1] "c" "c" "c" "b" "b" "b" "a" "a" "a"

A shorter demo with 3 unique elements in a "complex" case where the elements are not all consecutive:

all_swaps(c("a", "b", "b", "c", "b"))
# [[1]]
# [1] "a" "b" "b" "c" "b"
# 
# [[2]]
# [1] "a" "c" "c" "b" "c"
# 
# [[3]]
# [1] "b" "a" "a" "c" "a"
# 
# [[4]]
# [1] "b" "c" "c" "a" "c"
# 
# [[5]]
# [1] "c" "a" "a" "b" "a"
# 
# [[6]]
# [1] "c" "b" "b" "a" "b"

A larger case:

# n = 10
set.seed(47)
start_t = Sys.time()
n10 = all_swaps(sample(letters[1:10], size = 30, replace = TRUE))
end_t = Sys.time()
end_t - start_t
# Time difference of 6.711215 secs
length(n10)
# [1] 3628800

Benchmarking

Benchmarking my answer with Maël's and ThomasIsCoding's, my method relying on the arrangements package is quick and memory efficient. ThomasIsCoding's answer can be improved by changing from pracma::perms to arrangements::permutations--the memory usage is especially improved--but my version still performs better. Maël's uses a lot of time and memory. I'll lead with results, code to reproduce is below.

## 5 Unique Elements
arrange(b5, desc(`itr/sec`))
# # A tibble: 4 × 13
#   expression                  min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
#   <bch:expr>             <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
# 1 GregorThomas             2.31ms   12.6ms     77.5     5.77KB        0    40     0      516ms
# 2 ThomasIsCodingArr(in5)    9.3ms   20.5ms     47.4    19.55KB        0    24     0      506ms
# 3 ThomasIsCoding(in5)     12.57ms   22.7ms     41.2    45.41KB        0    22     0      534ms
# 4 Mael                   963.64ms  963.6ms      1.04    1.24MB        0     1     0      964ms
# # … with 4 more variables: result <list>, memory <list>, time <list>, gc <list>

## 9 Unique Elements - memory allocation is important
arrange(b9, desc(`itr/sec`))
# # A tibble: 2 × 13
#   expression               min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result
#   <bch:expr>          <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm> <list>
# 1 GregorThomas            1.8s     1.8s     0.556    27.7MB    0         1     0       1.8s <NULL>
# 2 ThomasIsCoding(in9)     2.5s     2.5s     0.400   230.8MB    0.400     1     1       2.5s <NULL>
# # … with 3 more variables: memory <list>, time <list>, gc <list>

Benchmarking code:

## Functions
library(arrangements)
library(pracma)
ThomasIsCoding <- function(x) {
  idx <- match(x, unique(x))
  m <- asplit(matrix(unique(x)[perms(1:max(idx))], ncol = max(idx)), 1)
  Map(`[`, m, list(idx))
}
ThomasIsCodingArr <- function(x) {
  idx <- match(x, unique(x))
  m <- asplit(matrix(unique(x)[permutations(1:max(idx))], ncol = max(idx)), 1)
  Map(`[`, m, list(idx))
}
Mael <- function(vec){
  uni <- unique(vec)
  size <- length(uni)
  pVec <- paste(uni, collapse = "")
  grid <- expand.grid(rep(list(uni), size))
  expanded <- grid[apply(grid, 1, function(x) length(unique(x))) == size,]
  p <- unname(apply(expanded, 1, paste0, collapse = ""))
  
  lapply(p, function(x) chartr(pVec, x, vec))
}
all_swaps = function(x) {
  ux = unique(x)
  xi = as.integer(factor(x))
  perm = permutations(seq_along(ux))
  apply(perm, MARGIN = 1, FUN = \(p) ux[p][xi], simplify = FALSE)
}

set.seed(47)
in5 = c(sample(letters[1:5], 5), sample(letters[1:5], 5, replace = TRUE))

b5 = bench::mark(
  GregorThomas = all_swaps(in5),
  Mael = Mael(in5),
  ThomasIsCoding(in5),
  ThomasIsCodingArr(in5),
  check = FALSE
)

Upvotes: 12

Maël

Reputation: 52004

Using chartr, you can do (although this might crash for larger vectors):

f <- function(vec){
  uni <- unique(vec)
  size <- length(uni)
  pVec <- paste(uni, collapse = "")
  grid <- expand.grid(rep(list(uni), size))
  expanded <- grid[apply(grid, 1, function(x) length(unique(x))) == size,]
  p <- unname(apply(ex, 1, paste0, collapse = ""))
  
  lapply(p, function(x) chartr(pVec, x, vec))
}

output:

original<-c("a","a","a","b","b","b","c","c","c")
f(original)

# [[1]]
# [1] "c" "c" "c" "b" "b" "b" "a" "a" "a"
# 
# [[2]]
# [1] "b" "b" "b" "c" "c" "c" "a" "a" "a"
# 
# [[3]]
# [1] "c" "c" "c" "a" "a" "a" "b" "b" "b"
# 
# [[4]]
# [1] "a" "a" "a" "c" "c" "c" "b" "b" "b"
# 
# [[5]]
# [1] "b" "b" "b" "a" "a" "a" "c" "c" "c"
# 
# [[6]]
# [1] "a" "a" "a" "b" "b" "b" "c" "c" "c"

Previous answer (do not work for n > 2).

Using gtools::permutations. Results are each columns of the matrix. The idea is to get the permutations from unique values, and the repeat the values to match the desired group length.

f <- function(x){
  r <- rle(x)
  l <- length(r$values)
  apply(gtools::permutations(n=l, r=l, v=r$values), 1, function(x) rep(x, each = unique(r$l)))
}

Upvotes: 5

Generate all combinations of vector with consecutive occurrences is considered as single occurrence

Answers (5)

Update

Benchmarking

Previous Answer

Benchmarking

Related Questions