MelaniaCB
MelaniaCB

Reputation: 435

Generating unique row pairs in R

I'm trying to do something really simple but can't get it done.

I've got this data frame:

structure(list(First_ID = c(1L, 2L, 4L, 5L, 6L, 8L, 
9L, 20L), First_Lat = c(14.60543375, 14.60928364, 14.60928364, 
14.55621093, 14.50756, 14.5802, 14.5802, 14.66019), First_Lon = c(-90.53911871, 
-90.53998477, -90.53998477, -90.54753174, -90.47934, -90.54794, 
-90.54794, -90.49326), First_11.Territrorios = c("Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate")), class = "data.frame", row.names = c(NA, 
-8L))

And I want to create another one with all possible matches to get it looking like this:

structure(list(X.U.FEFF.First_ID = c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 
6L, 6L, 6L, 8L, 8L, 9L), First_Lat = c(14.60543375, 14.60543375, 
14.60543375, 14.60543375, 14.60543375, 14.60543375, 14.60543375, 
14.60928364, 14.60928364, 14.60928364, 14.60928364, 14.60928364, 
14.60928364, 14.60928364, 14.60928364, 14.60928364, 14.60928364, 
14.60928364, 14.55621093, 14.55621093, 14.55621093, 14.55621093, 
14.50756, 14.50756, 14.50756, 14.5802, 14.5802, 14.5802), First_Lon = c(-90.53911871, 
-90.53911871, -90.53911871, -90.53911871, -90.53911871, -90.53911871, 
-90.53911871, -90.53998477, -90.53998477, -90.53998477, -90.53998477, 
-90.53998477, -90.53998477, -90.53998477, -90.53998477, -90.53998477, 
-90.53998477, -90.53998477, -90.54753174, -90.54753174, -90.54753174, 
-90.54753174, -90.47934, -90.47934, -90.47934, -90.54794, -90.54794, 
-90.54794), First_11.Territrorios = c("Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate"), Second_ID = c(2L, 4L, 5L, 
6L, 8L, 9L, 20L, 4L, 5L, 6L, 8L, 9L, 20L, 5L, 6L, 8L, 9L, 20L, 
6L, 8L, 9L, 20L, 8L, 9L, 20L, 9L, 20L, 20L), Second_Lat = c(14.60928364, 
14.60928364, 14.55621093, 14.50756, 14.5802, 14.5802, 14.66019, 
14.60928364, 14.55621093, 14.50756, 14.5802, 14.5802, 14.66019, 
14.55621093, 14.50756, 14.5802, 14.5802, 14.66019, 14.50756, 
14.5802, 14.5802, 14.66019, 14.5802, 14.5802, 14.66019, 14.5802, 
14.66019, 14.66019), Second_Lon = c(-90.53998477, -90.53998477, 
-90.54753174, -90.47934, -90.54794, -90.54794, -90.49326, -90.53998477, 
-90.54753174, -90.47934, -90.54794, -90.54794, -90.49326, -90.54753174, 
-90.47934, -90.54794, -90.54794, -90.49326, -90.47934, -90.54794, 
-90.54794, -90.49326, -90.54794, -90.54794, -90.49326, -90.54794, 
-90.49326, -90.49326), Second_11.Territrorios = c("Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate", "Guate", "Guate", 
"Guate", "Guate", "Guate", "Guate", "Guate")), class = "data.frame", row.names = c(NA, 
-28L))

By repeiting each row with each other until there's no more pairs left! - I've tried using combn function but it gives me a list creating all pairs for each column as a new object and I only want a single data frame doing it for all. Also tried using expand.grid but it's giving me this error:

Error in paste0(nmc[i], "=", if (is.numeric(x)) format(x) else x) : 
  cannot coerce type 'closure' to vector of type 'character'

Could someone please give me a hint?

Upvotes: 1

Views: 49

Answers (2)

r2evans
r2evans

Reputation: 160407

One method:

combinations <- combn(8,2)
combinations
#      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28]
# [1,]    1    1    1    1    1    1    1    2    2     2     2     2     2     3     3     3     3     3     4     4     4     4     5     5     5     6     6     7
# [2,]    2    3    4    5    6    7    8    3    4     5     6     7     8     4     5     6     7     8     5     6     7     8     6     7     8     7     8     8
out <- cbind(dat[combinations[1,],], dat[combinations[2,],])

names(out) <- c(names(dat), gsub("First", "Second", names(dat)))
out
#     First_ID First_Lat First_Lon First_11.Territrorios Second_ID Second_Lat Second_Lon Second_11.Territrorios
# 1          1  14.60543 -90.53912                 Guate         2   14.60928  -90.53998                  Guate
# 1.1        1  14.60543 -90.53912                 Guate         4   14.60928  -90.53998                  Guate
# 1.2        1  14.60543 -90.53912                 Guate         5   14.55621  -90.54753                  Guate
# 1.3        1  14.60543 -90.53912                 Guate         6   14.50756  -90.47934                  Guate
# 1.4        1  14.60543 -90.53912                 Guate         8   14.58020  -90.54794                  Guate
# 1.5        1  14.60543 -90.53912                 Guate         9   14.58020  -90.54794                  Guate
# 1.6        1  14.60543 -90.53912                 Guate        20   14.66019  -90.49326                  Guate
# 2          2  14.60928 -90.53998                 Guate         4   14.60928  -90.53998                  Guate
# 2.1        2  14.60928 -90.53998                 Guate         5   14.55621  -90.54753                  Guate
# 2.2        2  14.60928 -90.53998                 Guate         6   14.50756  -90.47934                  Guate
# 2.3        2  14.60928 -90.53998                 Guate         8   14.58020  -90.54794                  Guate
# 2.4        2  14.60928 -90.53998                 Guate         9   14.58020  -90.54794                  Guate
# 2.5        2  14.60928 -90.53998                 Guate        20   14.66019  -90.49326                  Guate
# 3          4  14.60928 -90.53998                 Guate         5   14.55621  -90.54753                  Guate
# 3.1        4  14.60928 -90.53998                 Guate         6   14.50756  -90.47934                  Guate
# 3.2        4  14.60928 -90.53998                 Guate         8   14.58020  -90.54794                  Guate
# 3.3        4  14.60928 -90.53998                 Guate         9   14.58020  -90.54794                  Guate
# 3.4        4  14.60928 -90.53998                 Guate        20   14.66019  -90.49326                  Guate
# 4          5  14.55621 -90.54753                 Guate         6   14.50756  -90.47934                  Guate
# 4.1        5  14.55621 -90.54753                 Guate         8   14.58020  -90.54794                  Guate
# 4.2        5  14.55621 -90.54753                 Guate         9   14.58020  -90.54794                  Guate
# 4.3        5  14.55621 -90.54753                 Guate        20   14.66019  -90.49326                  Guate
# 5          6  14.50756 -90.47934                 Guate         8   14.58020  -90.54794                  Guate
# 5.1        6  14.50756 -90.47934                 Guate         9   14.58020  -90.54794                  Guate
# 5.2        6  14.50756 -90.47934                 Guate        20   14.66019  -90.49326                  Guate
# 6          8  14.58020 -90.54794                 Guate         9   14.58020  -90.54794                  Guate
# 6.1        8  14.58020 -90.54794                 Guate        20   14.66019  -90.49326                  Guate
# 7          9  14.58020 -90.54794                 Guate        20   14.66019  -90.49326                  Guate

Another method, though less efficient because it does a full cartesian join before filtering:

subset(merge(dat, dat, by = NULL), First_ID.x < First_ID.y)
#    First_ID.x First_Lat.x First_Lon.x First_11.Territrorios.x First_ID.y First_Lat.y First_Lon.y First_11.Territrorios.y
# 9           1    14.60543   -90.53912                   Guate          2    14.60928   -90.53998                   Guate
# 17          1    14.60543   -90.53912                   Guate          4    14.60928   -90.53998                   Guate
# 18          2    14.60928   -90.53998                   Guate          4    14.60928   -90.53998                   Guate
# 25          1    14.60543   -90.53912                   Guate          5    14.55621   -90.54753                   Guate
# 26          2    14.60928   -90.53998                   Guate          5    14.55621   -90.54753                   Guate
# 27          4    14.60928   -90.53998                   Guate          5    14.55621   -90.54753                   Guate
# 33          1    14.60543   -90.53912                   Guate          6    14.50756   -90.47934                   Guate
# 34          2    14.60928   -90.53998                   Guate          6    14.50756   -90.47934                   Guate
# 35          4    14.60928   -90.53998                   Guate          6    14.50756   -90.47934                   Guate
# 36          5    14.55621   -90.54753                   Guate          6    14.50756   -90.47934                   Guate
# 41          1    14.60543   -90.53912                   Guate          8    14.58020   -90.54794                   Guate
# 42          2    14.60928   -90.53998                   Guate          8    14.58020   -90.54794                   Guate
# 43          4    14.60928   -90.53998                   Guate          8    14.58020   -90.54794                   Guate
# 44          5    14.55621   -90.54753                   Guate          8    14.58020   -90.54794                   Guate
# 45          6    14.50756   -90.47934                   Guate          8    14.58020   -90.54794                   Guate
# 49          1    14.60543   -90.53912                   Guate          9    14.58020   -90.54794                   Guate
# 50          2    14.60928   -90.53998                   Guate          9    14.58020   -90.54794                   Guate
# 51          4    14.60928   -90.53998                   Guate          9    14.58020   -90.54794                   Guate
# 52          5    14.55621   -90.54753                   Guate          9    14.58020   -90.54794                   Guate
# 53          6    14.50756   -90.47934                   Guate          9    14.58020   -90.54794                   Guate
# 54          8    14.58020   -90.54794                   Guate          9    14.58020   -90.54794                   Guate
# 57          1    14.60543   -90.53912                   Guate         20    14.66019   -90.49326                   Guate
# 58          2    14.60928   -90.53998                   Guate         20    14.66019   -90.49326                   Guate
# 59          4    14.60928   -90.53998                   Guate         20    14.66019   -90.49326                   Guate
# 60          5    14.55621   -90.54753                   Guate         20    14.66019   -90.49326                   Guate
# 61          6    14.50756   -90.47934                   Guate         20    14.66019   -90.49326                   Guate
# 62          8    14.58020   -90.54794                   Guate         20    14.66019   -90.49326                   Guate
# 63          9    14.58020   -90.54794                   Guate         20    14.66019   -90.49326                   Guate

Row names are distracting but harmless. Column names can be updated as in the first example.

The biggest difference between doing the cbind outside combn (as above) versus inside using FUN= is that doing it outside makes one call to cbind; inside it makes in this example 28 calls to cbind then combines the results. While performance is not a concern with this dataset, it is certainly less efficient to iterate over each combination.

Upvotes: 1

ThomasIsCoding
ThomasIsCoding

Reputation: 101099

Try combn like below

do.call(
  rbind,
  combn(nrow(df),
    2,
    function(x) {
      cbind(
        df[x[1], ],
        setNames(df[x[2], ], gsub("First", "Second", names(df)))
      )
    },
    simplify = FALSE
  )
)

which gives

   First_ID First_Lat First_Lon First_11.Territrorios Second_ID Second_Lat
1         1  14.60543 -90.53912                 Guate         2   14.60928
2         1  14.60543 -90.53912                 Guate         4   14.60928
3         1  14.60543 -90.53912                 Guate         5   14.55621
4         1  14.60543 -90.53912                 Guate         6   14.50756
5         1  14.60543 -90.53912                 Guate         8   14.58020
6         1  14.60543 -90.53912                 Guate         9   14.58020
7         1  14.60543 -90.53912                 Guate        20   14.66019
21        2  14.60928 -90.53998                 Guate         4   14.60928
22        2  14.60928 -90.53998                 Guate         5   14.55621
23        2  14.60928 -90.53998                 Guate         6   14.50756
24        2  14.60928 -90.53998                 Guate         8   14.58020
25        2  14.60928 -90.53998                 Guate         9   14.58020
26        2  14.60928 -90.53998                 Guate        20   14.66019
31        4  14.60928 -90.53998                 Guate         5   14.55621
32        4  14.60928 -90.53998                 Guate         6   14.50756
33        4  14.60928 -90.53998                 Guate         8   14.58020
34        4  14.60928 -90.53998                 Guate         9   14.58020
35        4  14.60928 -90.53998                 Guate        20   14.66019
41        5  14.55621 -90.54753                 Guate         6   14.50756
42        5  14.55621 -90.54753                 Guate         8   14.58020
43        5  14.55621 -90.54753                 Guate         9   14.58020
44        5  14.55621 -90.54753                 Guate        20   14.66019
51        6  14.50756 -90.47934                 Guate         8   14.58020
52        6  14.50756 -90.47934                 Guate         9   14.58020
53        6  14.50756 -90.47934                 Guate        20   14.66019
61        8  14.58020 -90.54794                 Guate         9   14.58020
62        8  14.58020 -90.54794                 Guate        20   14.66019
71        9  14.58020 -90.54794                 Guate        20   14.66019
   Second_Lon Second_11.Territrorios
1   -90.53998                  Guate
2   -90.53998                  Guate
3   -90.54753                  Guate
4   -90.47934                  Guate
5   -90.54794                  Guate
6   -90.54794                  Guate
7   -90.49326                  Guate
21  -90.53998                  Guate
22  -90.54753                  Guate
23  -90.47934                  Guate
24  -90.54794                  Guate
25  -90.54794                  Guate
26  -90.49326                  Guate
31  -90.54753                  Guate
32  -90.47934                  Guate
33  -90.54794                  Guate
34  -90.54794                  Guate
35  -90.49326                  Guate
41  -90.47934                  Guate
42  -90.54794                  Guate
43  -90.54794                  Guate
44  -90.49326                  Guate
51  -90.54794                  Guate
52  -90.54794                  Guate
53  -90.49326                  Guate
61  -90.54794                  Guate
62  -90.49326                  Guate
71  -90.49326                  Guate

Upvotes: 1

Related Questions