rnso
rnso

Reputation: 24623

Analyzing permutations in R

I can get all permutations of letters a-e by:

> library(gtools)
> permutations(5,5, letters[1:5])
       [,1] [,2] [,3] [,4] [,5]
  [1,] "a"  "b"  "c"  "d"  "e" 
  [2,] "a"  "b"  "c"  "e"  "d" 
  [3,] "a"  "b"  "d"  "c"  "e" 
  [4,] "a"  "b"  "d"  "e"  "c" 
  [5,] "a"  "b"  "e"  "c"  "d" 
  [6,] "a"  "b"  "e"  "d"  "c" 
  [7,] "a"  "c"  "b"  "d"  "e" 
  [8,] "a"  "c"  "b"  "e"  "d" 
.....

But how do I know, for each row, how many letters in the row are in their proper place?

Edit: Thanks for your replies. I used microbenchmark see the speed:

> n=7
> f1 <- function() {A= permutations(n,n, letters[1:n]); table(apply(A, 1, function(u) sum( u == letters[1:n] )))}
> f2 <- function() {A= permutations(n,n, letters[1:n]); rowSums(A==letters[1:n][col(A)])}
> f3 <- function() {A <-permutations(n,n); rowSums(A==col(A))}
> library(microbenchmark)
> microbenchmark(f1(), f2(), f3(), unit="relative")
Unit: relative
 expr      min       lq     mean   median       uq      max neval
 f1() 1.426850 1.426050 1.424601 1.428461 1.410686 1.727781   100
 f2() 1.076791 1.074002 1.071137 1.065674 1.069168 1.071190   100
 f3() 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000   100

I had accepted apply answer since it was clearly understandable.

Keeping permutation function outside:

> n=8
> library(gtools)
> A= permutations(n,n, letters[1:n]);
> B <-permutations(n,n); 
> f1 <- function() {table(apply(A, 1, function(u) sum( u == letters[1:n] )))}
> f2 <- function() {rowSums(A==letters[1:n][col(A)])}
> f3 <- function() {rowSums(B==col(B))}
> library(microbenchmark)
> microbenchmark(f1(), f2(), f3(), unit="relative")
Unit: relative
 expr       min        lq      mean    median        uq      max neval
 f1() 79.426189 72.674500 40.296357 68.896710 43.559159 6.269005   100
 f2()  3.440729  3.614968  2.807806  3.589499  2.740272 1.349151   100
 f3()  1.000000  1.000000  1.000000  1.000000  1.000000 1.000000   100
> 

Upvotes: 1

Views: 152

Answers (2)

Vincent Guillemot
Vincent Guillemot

Reputation: 3429

Something like this might work :

library(gtools)
A <- permutations(5,5, letters[1:5])
apply(A, 1, function(u) sum( u == letters[1:5] ))

We obtain

head(data.frame(A,n=apply(A, 1, function(u) sum( u == letters[1:5] ))))
#   X1 X2 X3 X4 X5 n
# 1  a  b  c  d  e 5
# 2  a  b  c  e  d 3
# 3  a  b  d  c  e 3
# 4  a  b  d  e  c 2
# 5  a  b  e  c  d 2
# 6  a  b  e  d  c 3

Upvotes: 3

akrun
akrun

Reputation: 887901

You could also do: (Inspired from @J.R.'s comments)

  A <- permutations(5,5, letters[1:5])
  rowSums(A==letters[1:5][col(A)])
  #[1] 5 3 3 2 2 3 3 1 2 1 1 2 2 1 3 2 1 1 1 2 2 3 1 1 3 1 1 0 0 1 2 0 1 0 0 1 1
  #[38] 0 2 1 0 0 0 1 1 2 0 0 2 0 1 0 0 1 3 1 2 1 1 2 1 0 1 0 0 0 0 1 0 1 0 0 1 0
  #[75] 2 1 0 0 2 1 3 2 1 1 1 0 1 0 0 0 0 0 0 0 1 1 0 1 1 2 0 0 1 2 2 3 1 1 0 1 0
  #[112] 1 0 0 0 0 0 0 1 1

Explanation

Just for the convenience, I am taking a subset of A

  A1 <- head(A)
  col(A1)
  #     [,1] [,2] [,3] [,4] [,5]
  #[1,]    1    2    3    4    5
  #[2,]    1    2    3    4    5
  #[3,]    1    2    3    4    5
  #[4,]    1    2    3    4    5
  #[5,]    1    2    3    4    5
  #[6,]    1    2    3    4    5

letters[1:5][col(A1)], is similar to rep(letters[1:5], each=nrow(A1)) because the numeric index created by col(A1) and the column wise operation make sure letters[1:5] are replicated. The first element of letters[1:5] i.e. a fills all the 1s of col(A1). Similarly, bs in 2s etc of col(A1)

  letters[1:5][col(A1)]
  #[1] "a" "a" "a" "a" "a" "a" "b" "b" "b" "b" "b" "b" "c" "c" "c" "c" "c" "c" "d"
  #[20] "d" "d" "d" "d" "d" "e" "e" "e" "e" "e" "e"

  rep(letters[1:5], each=nrow(A1))
 # [1] "a" "a" "a" "a" "a" "a" "b" "b" "b" "b" "b" "b" "c" "c" "c" "c" "c" "c" "d"
 #[20] "d" "d" "d" "d" "d" "e" "e" "e" "e" "e" "e"

In the comparsion A1==letters[1:5][col(A1)], both the rhs and lhs of == have the same length (i.e. length(A1)#[1] 30, length(letters[1:5][col(A1)])#[1] 30), so the comparison will be elementwise and you get the result

  A1==letters[1:5][col(A1)]
  #    [,1] [,2]  [,3]  [,4]  [,5]
  #[1,] TRUE TRUE  TRUE  TRUE  TRUE
  #[2,] TRUE TRUE  TRUE FALSE FALSE
  #[3,] TRUE TRUE FALSE FALSE  TRUE
  #[4,] TRUE TRUE FALSE FALSE FALSE
  #[5,] TRUE TRUE FALSE FALSE FALSE
  #[6,] TRUE TRUE FALSE  TRUE FALSE

Now, why A1==letters[1:5] it gives a different result is because of how the recycling of elements occur.

   A1==letters[1:5] 
   #     [,1]  [,2]  [,3]  [,4]  [,5]
   #[1,]  TRUE  TRUE  TRUE  TRUE  TRUE
   #[2,] FALSE FALSE FALSE  TRUE FALSE
   #[3,] FALSE FALSE FALSE FALSE FALSE
   #[4,] FALSE FALSE FALSE FALSE  TRUE
   #[5,] FALSE FALSE FALSE  TRUE  TRUE
   #[6,]  TRUE  TRUE FALSE  TRUE FALSE

The comparison above is similar to comparing c(A1) and rep(letters[1:5], nrow(A1))

  c(A1)
  #[1] "a" "a" "a" "a" "a" "a" "b" "b" "b" "b" "b" "b" "c" "c" "d" "d" "e" "e" "d"
  #[20] "e" "c" "e" "c" "d" "e" "d" "e" "c" "d" "c"

  rep(letters[1:5], nrow(A1))
  #[1] "a" "b" "c" "d" "e" "a" "b" "c" "d" "e" "a" "b" "c" "d" "e" "a" "b" "c" "d"
  #[20] "e" "a" "b" "c" "d" "e" "a" "b" "c" "d" "e"

If you want to make use of the recycling, you could do:

 t(t(A1)==letters[1:5])
 #     [,1] [,2]  [,3]  [,4]  [,5]
 # [1,] TRUE TRUE  TRUE  TRUE  TRUE
 # [2,] TRUE TRUE  TRUE FALSE FALSE
 # [3,] TRUE TRUE FALSE FALSE  TRUE
 # [4,] TRUE TRUE FALSE FALSE FALSE
 # [5,] TRUE TRUE FALSE FALSE FALSE
 # [6,] TRUE TRUE FALSE  TRUE FALSE


  

 

Upvotes: 2

Related Questions