user17163779
user17163779

Reputation:

Join specific columns from databases

I have these two datasets (df1 and df2):

df1<-structure(list(nclusters = c("2", "3", "4", "5", "6", "7", "8", 
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", 
"31", "32", "33", "34", "35"), score = c(0.977098310323762, 0.353912285832437, 
0.099991714420885, 0.0376332612272506, 0.0376332612272506, 0.0991405887047536, 
0.0767699856835315, 0.11960646869728, 0.143669700374312, 0.164847860595995, 
0.161874525649473, 0.161874525649473, 0.161874525649473, 0.161874525649473, 
0.161874525649473, 0.161874525649473, 0.186199471714141, 0.190526715556196, 
0.179434847554162, 0.179434847554162, 0.179434847554162, 0.180225650241049, 
0.186290449043437, 0.186290449043437, 0.197864663984131, 0.197864663984131, 
0.197864663984131, 0.197864663984131, 0.197864663984131, 0.197864663984131, 
0.197864663984131, 0.199807305611238, 0.211462882119658, 0.211462882119658
), rank = c(1L, 2L, 30L, 33L, 34L, 31L, 32L, 29L, 28L, 21L, 22L, 
23L, 24L, 25L, 26L, 27L, 16L, 13L, 18L, 19L, 20L, 17L, 14L, 15L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 5L, 3L, 4L)), row.names = c(NA, 
-34L), class = "data.frame")

> df1
   nclusters      score rank
1          2 0.97709831    1
2          3 0.35391229    2
3          4 0.09999171   30
4          5 0.03763326   33
5          6 0.03763326   34
6          7 0.09914059   31
7          8 0.07676999   32
8          9 0.11960647   29
9         10 0.14366970   28
10        11 0.16484786   21
11        12 0.16187453   22
12        13 0.16187453   23
13        14 0.16187453   24
14        15 0.16187453   25
15        16 0.16187453   26
16        17 0.16187453   27
17        18 0.18619947   16
18        19 0.19052672   13
19        20 0.17943485   18
20        21 0.17943485   19
21        22 0.17943485   20
22        23 0.18022565   17
23        24 0.18629045   14
24        25 0.18629045   15
25        26 0.19786466    6
26        27 0.19786466    7
27        28 0.19786466    8
28        29 0.19786466    9
29        30 0.19786466   10
30        31 0.19786466   11
31        32 0.19786466   12
32        33 0.19980731    5
33        34 0.21146288    3
34        35 0.21146288    4

df2<-structure(list(nclusters = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 
28, 29, 30, 31, 32, 33, 34, 35), Coverage = c(0.792205228889284, 
0.912851642353066, 0.255393354214851, 0.255393354214851, 0.255393354214851, 
0.353597680650851, 0.333736589639135, 0.412223082792884, 0.480194093041655, 
0.566807993260141, 0.552509003210828, 0.552509003210828, 0.552509003210828, 
0.552509003210828, 0.552509003210828, 0.552509003210828, 0.701211420528388, 
0.737881313741794, 0.651352623359039, 0.651352623359039, 0.651352623359039, 
0.656768957603489, 0.701940649590619, 0.701940649590619, 0.81095422062782, 
0.81095422062782, 0.81095422062782, 0.81095422062782, 0.81095422062782, 
0.81095422062782, 0.81095422062782, 0.833079159600134, 1, 1), 
    Production = c(1, 0.290313357105995, 0.104438599897452, 0.0395731724045748, 
    0.0395731724045748, 0.0395731724045748, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
    0.000580816952630715, 0.000580816952630715, 0.000580816952630715
    ), Performance = c(0.896102614444642, 0.60158249972953, 0.179915977056152, 
    0.147483263309713, 0.147483263309713, 0.196585426527713, 
    0.167158703295883, 0.206401949872757, 0.240387454997143, 
    0.283694405106386, 0.276544910081729, 0.276544910081729, 
    0.276544910081729, 0.276544910081729, 0.276544910081729, 
    0.276544910081729, 0.350896118740509, 0.369231065347212, 
    0.325966720155835, 0.325966720155835, 0.325966720155835, 
    0.32867488727806, 0.351260733271625, 0.351260733271625, 0.405767518790225, 
    0.405767518790225, 0.405767518790225, 0.405767518790225, 
    0.405767518790225, 0.405767518790225, 0.405767518790225, 
    0.416829988276383, 0.500290408476315, 0.500290408476315), 
    Rank = c(1, 2, 31, 34, 33, 30, 32, 29, 28, 21, 27, 26, 25, 
    24, 23, 22, 16, 13, 20, 19, 18, 17, 15, 14, 12, 11, 10, 9, 
    8, 7, 6, 5, 4, 3)), class = "data.frame", row.names = c(NA, 
-34L))

> df2
   nclusters  Coverage  Production Performance Rank
1          2 0.7922052 1.000000000   0.8961026    1
2          3 0.9128516 0.290313357   0.6015825    2
3          4 0.2553934 0.104438600   0.1799160   31
4          5 0.2553934 0.039573172   0.1474833   34
5          6 0.2553934 0.039573172   0.1474833   33
6          7 0.3535977 0.039573172   0.1965854   30
7          8 0.3337366 0.000580817   0.1671587   32
8          9 0.4122231 0.000580817   0.2064019   29
9         10 0.4801941 0.000580817   0.2403875   28
10        11 0.5668080 0.000580817   0.2836944   21
11        12 0.5525090 0.000580817   0.2765449   27
12        13 0.5525090 0.000580817   0.2765449   26
13        14 0.5525090 0.000580817   0.2765449   25
14        15 0.5525090 0.000580817   0.2765449   24
15        16 0.5525090 0.000580817   0.2765449   23
16        17 0.5525090 0.000580817   0.2765449   22
17        18 0.7012114 0.000580817   0.3508961   16
18        19 0.7378813 0.000580817   0.3692311   13
19        20 0.6513526 0.000580817   0.3259667   20
20        21 0.6513526 0.000580817   0.3259667   19
21        22 0.6513526 0.000580817   0.3259667   18
22        23 0.6567690 0.000580817   0.3286749   17
23        24 0.7019406 0.000580817   0.3512607   15
24        25 0.7019406 0.000580817   0.3512607   14
25        26 0.8109542 0.000580817   0.4057675   12
26        27 0.8109542 0.000580817   0.4057675   11
27        28 0.8109542 0.000580817   0.4057675   10
28        29 0.8109542 0.000580817   0.4057675    9
29        30 0.8109542 0.000580817   0.4057675    8
30        31 0.8109542 0.000580817   0.4057675    7
31        32 0.8109542 0.000580817   0.4057675    6
32        33 0.8330792 0.000580817   0.4168300    5
33        34 1.0000000 0.000580817   0.5002904    4
34        35 1.0000000 0.000580817   0.5002904    3

Now I would like to create a new dataset, which I can call df3, which uses the rank values of both df1 and df2. So the output table will look like this:

 nclusters  df1  df2 
        2    1    1
        3    2    2
        4   30    31
        5   33    34
        6   34    33
        7   31    30
        8   32    32
        9   29    29
        10  28    28
        11  21    21
        12  22    27
        13  23    26
        14  24    25
        15  25    24
        16  26    23
        17  27    22
        18  16    16
        19  13    13
        20  18    20
        21  19    19
        22  20    18
        23  17    17
        24  14    15
        25  15    14
        26   6    12
        27   7    11
        28   8    10
        29   9    9
        30   10   8
        31   11   7
        32   12   6
        33    5   5
        34    3   4
        35    4   3

Upvotes: 2

Views: 43

Answers (3)

jared_mamrot
jared_mamrot

Reputation: 26494

Perhaps this will suit:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

df1<-structure(list(nclusters = c("2", "3", "4", "5", "6", "7", "8", 
                                  "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
                                  "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", 
                                  "31", "32", "33", "34", "35"), score = c(0.977098310323762, 0.353912285832437, 
                                                                           0.099991714420885, 0.0376332612272506, 0.0376332612272506, 0.0991405887047536, 
                                                                           0.0767699856835315, 0.11960646869728, 0.143669700374312, 0.164847860595995, 
                                                                           0.161874525649473, 0.161874525649473, 0.161874525649473, 0.161874525649473, 
                                                                           0.161874525649473, 0.161874525649473, 0.186199471714141, 0.190526715556196, 
                                                                           0.179434847554162, 0.179434847554162, 0.179434847554162, 0.180225650241049, 
                                                                           0.186290449043437, 0.186290449043437, 0.197864663984131, 0.197864663984131, 
                                                                           0.197864663984131, 0.197864663984131, 0.197864663984131, 0.197864663984131, 
                                                                           0.197864663984131, 0.199807305611238, 0.211462882119658, 0.211462882119658
                                  ), rank = c(1L, 2L, 30L, 33L, 34L, 31L, 32L, 29L, 28L, 21L, 22L, 
                                              23L, 24L, 25L, 26L, 27L, 16L, 13L, 18L, 19L, 20L, 17L, 14L, 15L, 
                                              6L, 7L, 8L, 9L, 10L, 11L, 12L, 5L, 3L, 4L)), row.names = c(NA, 
                                                                                                         -34L), class = "data.frame")

df2<-structure(list(nclusters = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
                                  12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 
                                  28, 29, 30, 31, 32, 33, 34, 35), Coverage = c(0.792205228889284, 
                                                                                0.912851642353066, 0.255393354214851, 0.255393354214851, 0.255393354214851, 
                                                                                0.353597680650851, 0.333736589639135, 0.412223082792884, 0.480194093041655, 
                                                                                0.566807993260141, 0.552509003210828, 0.552509003210828, 0.552509003210828, 
                                                                                0.552509003210828, 0.552509003210828, 0.552509003210828, 0.701211420528388, 
                                                                                0.737881313741794, 0.651352623359039, 0.651352623359039, 0.651352623359039, 
                                                                                0.656768957603489, 0.701940649590619, 0.701940649590619, 0.81095422062782, 
                                                                                0.81095422062782, 0.81095422062782, 0.81095422062782, 0.81095422062782, 
                                                                                0.81095422062782, 0.81095422062782, 0.833079159600134, 1, 1), 
                    Production = c(1, 0.290313357105995, 0.104438599897452, 0.0395731724045748, 
                                   0.0395731724045748, 0.0395731724045748, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715
                    ), Performance = c(0.896102614444642, 0.60158249972953, 0.179915977056152, 
                                       0.147483263309713, 0.147483263309713, 0.196585426527713, 
                                       0.167158703295883, 0.206401949872757, 0.240387454997143, 
                                       0.283694405106386, 0.276544910081729, 0.276544910081729, 
                                       0.276544910081729, 0.276544910081729, 0.276544910081729, 
                                       0.276544910081729, 0.350896118740509, 0.369231065347212, 
                                       0.325966720155835, 0.325966720155835, 0.325966720155835, 
                                       0.32867488727806, 0.351260733271625, 0.351260733271625, 0.405767518790225, 
                                       0.405767518790225, 0.405767518790225, 0.405767518790225, 
                                       0.405767518790225, 0.405767518790225, 0.405767518790225, 
                                       0.416829988276383, 0.500290408476315, 0.500290408476315), 
                    Rank = c(1, 2, 31, 34, 33, 30, 32, 29, 28, 21, 27, 26, 25, 
                             24, 23, 22, 16, 13, 20, 19, 18, 17, 15, 14, 12, 11, 10, 9, 
                             8, 7, 6, 5, 4, 3)), class = "data.frame", row.names = c(NA, 
                                                                                     -34L))

df1$nclusters <- as.numeric(df1$nclusters)
df3 <- dplyr::left_join(df1, df2, by = "nclusters")
df3 %>%
  rename("df1" = rank, "df2" = Rank) %>%
  select(nclusters, df1, df2)
#>    nclusters df1 df2
#> 1          2   1   1
#> 2          3   2   2
#> 3          4  30  31
#> 4          5  33  34
#> 5          6  34  33
#> 6          7  31  30
#> 7          8  32  32
#> 8          9  29  29
#> 9         10  28  28
#> 10        11  21  21
#> 11        12  22  27
#> 12        13  23  26
#> 13        14  24  25
#> 14        15  25  24
#> 15        16  26  23
#> 16        17  27  22
#> 17        18  16  16
#> 18        19  13  13
#> 19        20  18  20
#> 20        21  19  19
#> 21        22  20  18
#> 22        23  17  17
#> 23        24  14  15
#> 24        25  15  14
#> 25        26   6  12
#> 26        27   7  11
#> 27        28   8  10
#> 28        29   9   9
#> 29        30  10   8
#> 30        31  11   7
#> 31        32  12   6
#> 32        33   5   5
#> 33        34   3   4
#> 34        35   4   3

Created on 2022-02-16 by the reprex package (v2.0.1)


Edit

If you have three dataframes you want to merge, you can use:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

df1<-structure(list(nclusters = c("2", "3", "4", "5", "6", "7", "8", 
                                  "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
                                  "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", 
                                  "31", "32", "33", "34", "35"), score = c(0.977098310323762, 0.353912285832437, 
                                                                           0.099991714420885, 0.0376332612272506, 0.0376332612272506, 0.0991405887047536, 
                                                                           0.0767699856835315, 0.11960646869728, 0.143669700374312, 0.164847860595995, 
                                                                           0.161874525649473, 0.161874525649473, 0.161874525649473, 0.161874525649473, 
                                                                           0.161874525649473, 0.161874525649473, 0.186199471714141, 0.190526715556196, 
                                                                           0.179434847554162, 0.179434847554162, 0.179434847554162, 0.180225650241049, 
                                                                           0.186290449043437, 0.186290449043437, 0.197864663984131, 0.197864663984131, 
                                                                           0.197864663984131, 0.197864663984131, 0.197864663984131, 0.197864663984131, 
                                                                           0.197864663984131, 0.199807305611238, 0.211462882119658, 0.211462882119658
                                  ), rank = c(1L, 2L, 30L, 33L, 34L, 31L, 32L, 29L, 28L, 21L, 22L, 
                                              23L, 24L, 25L, 26L, 27L, 16L, 13L, 18L, 19L, 20L, 17L, 14L, 15L, 
                                              6L, 7L, 8L, 9L, 10L, 11L, 12L, 5L, 3L, 4L)), row.names = c(NA, 
                                                                                                         -34L), class = "data.frame")

df2<-structure(list(nclusters = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
                                  12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 
                                  28, 29, 30, 31, 32, 33, 34, 35), Coverage = c(0.792205228889284, 
                                                                                0.912851642353066, 0.255393354214851, 0.255393354214851, 0.255393354214851, 
                                                                                0.353597680650851, 0.333736589639135, 0.412223082792884, 0.480194093041655, 
                                                                                0.566807993260141, 0.552509003210828, 0.552509003210828, 0.552509003210828, 
                                                                                0.552509003210828, 0.552509003210828, 0.552509003210828, 0.701211420528388, 
                                                                                0.737881313741794, 0.651352623359039, 0.651352623359039, 0.651352623359039, 
                                                                                0.656768957603489, 0.701940649590619, 0.701940649590619, 0.81095422062782, 
                                                                                0.81095422062782, 0.81095422062782, 0.81095422062782, 0.81095422062782, 
                                                                                0.81095422062782, 0.81095422062782, 0.833079159600134, 1, 1), 
                    Production = c(1, 0.290313357105995, 0.104438599897452, 0.0395731724045748, 
                                   0.0395731724045748, 0.0395731724045748, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715, 
                                   0.000580816952630715, 0.000580816952630715, 0.000580816952630715
                    ), Performance = c(0.896102614444642, 0.60158249972953, 0.179915977056152, 
                                       0.147483263309713, 0.147483263309713, 0.196585426527713, 
                                       0.167158703295883, 0.206401949872757, 0.240387454997143, 
                                       0.283694405106386, 0.276544910081729, 0.276544910081729, 
                                       0.276544910081729, 0.276544910081729, 0.276544910081729, 
                                       0.276544910081729, 0.350896118740509, 0.369231065347212, 
                                       0.325966720155835, 0.325966720155835, 0.325966720155835, 
                                       0.32867488727806, 0.351260733271625, 0.351260733271625, 0.405767518790225, 
                                       0.405767518790225, 0.405767518790225, 0.405767518790225, 
                                       0.405767518790225, 0.405767518790225, 0.405767518790225, 
                                       0.416829988276383, 0.500290408476315, 0.500290408476315), 
                    Rank = c(1, 2, 31, 34, 33, 30, 32, 29, 28, 21, 27, 26, 25, 
                             24, 23, 22, 16, 13, 20, 19, 18, 17, 15, 14, 12, 11, 10, 9, 
                             8, 7, 6, 5, 4, 3)), class = "data.frame", row.names = c(NA, 
                                                                                     -34L))

df3<-structure(list(nclusters = c("2", "3", "4", "5", "6", "7", "8", 
                                  "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
                                  "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", 
                                  "31", "32", "33", "34", "35"), score = c(0.977098310323762, 0.353912285832437, 
                                                                           0.099991714420885, 0.0376332612272506, 0.0376332612272506, 0.0991405887047536, 
                                                                           0.0767699856835315, 0.11960646869728, 0.143669700374312, 0.164847860595995, 
                                                                           0.161874525649473, 0.161874525649473, 0.161874525649473, 0.161874525649473, 
                                                                           0.161874525649473, 0.161874525649473, 0.186199471714141, 0.190526715556196, 
                                                                           0.179434847554162, 0.179434847554162, 0.179434847554162, 0.180225650241049, 
                                                                           0.186290449043437, 0.186290449043437, 0.197864663984131, 0.197864663984131, 
                                                                           0.197864663984131, 0.197864663984131, 0.197864663984131, 0.197864663984131, 
                                                                           0.197864663984131, 0.199807305611238, 0.211462882119658, 0.211462882119658
                                  ), rank = c(32, 17, 18, 15, 11,  1, 31,  7, 29, 19, 28,
                                              9,  4, 10, 13, 25, 12, 14, 24, 23, 26, 34,
                                              3,  6, 8, 16,  2, 33, 27, 30, 22, 21, 20,  5)), row.names = c(NA, 
                                                                                                         -34L), class = "data.frame")

#View first 6 rows of each df
head(df1)
#>   nclusters      score rank
#> 1         2 0.97709831    1
#> 2         3 0.35391229    2
#> 3         4 0.09999171   30
#> 4         5 0.03763326   33
#> 5         6 0.03763326   34
#> 6         7 0.09914059   31
head(df2)
#>   nclusters  Coverage Production Performance Rank
#> 1         2 0.7922052 1.00000000   0.8961026    1
#> 2         3 0.9128516 0.29031336   0.6015825    2
#> 3         4 0.2553934 0.10443860   0.1799160   31
#> 4         5 0.2553934 0.03957317   0.1474833   34
#> 5         6 0.2553934 0.03957317   0.1474833   33
#> 6         7 0.3535977 0.03957317   0.1965854   30
head(df3)
#>   nclusters      score rank
#> 1         2 0.97709831   32
#> 2         3 0.35391229   17
#> 3         4 0.09999171   18
#> 4         5 0.03763326   15
#> 5         6 0.03763326   11
#> 6         7 0.09914059    1

# Change "nclusters" to numeric
df1$nclusters <- as.numeric(df1$nclusters)
df3$nclusters <- as.numeric(df3$nclusters)

df4 <- df1 %>%
  left_join(df2, by = "nclusters") %>%
  left_join(df3, by = "nclusters") %>%
  rename("df1" = rank.x, "df2" = Rank, "df3" = rank.y) %>%
  select(nclusters, df1, df2, df3)
df4
#>    nclusters df1 df2 df3
#> 1          2   1   1  32
#> 2          3   2   2  17
#> 3          4  30  31  18
#> 4          5  33  34  15
#> 5          6  34  33  11
#> 6          7  31  30   1
#> 7          8  32  32  31
#> 8          9  29  29   7
#> 9         10  28  28  29
#> 10        11  21  21  19
#> 11        12  22  27  28
#> 12        13  23  26   9
#> 13        14  24  25   4
#> 14        15  25  24  10
#> 15        16  26  23  13
#> 16        17  27  22  25
#> 17        18  16  16  12
#> 18        19  13  13  14
#> 19        20  18  20  24
#> 20        21  19  19  23
#> 21        22  20  18  26
#> 22        23  17  17  34
#> 23        24  14  15   3
#> 24        25  15  14   6
#> 25        26   6  12   8
#> 26        27   7  11  16
#> 27        28   8  10   2
#> 28        29   9   9  33
#> 29        30  10   8  27
#> 30        31  11   7  30
#> 31        32  12   6  22
#> 32        33   5   5  21
#> 33        34   3   4  20
#> 34        35   4   3   5

Created on 2022-02-17 by the reprex package (v2.0.1)

Upvotes: 1

jay.sf
jay.sf

Reputation: 72633

Using merge and type.convert. Use by* argument, if more than one column in both data frames have exactly the same name.

res <- merge(type.convert(df1[-2], as.is=TRUE), df2[c(1, 5)])
head(res)
#   nclusters rank Rank
# 1         2    1    1
# 2         3    2    2
# 3         4   30   31
# 4         5   33   34
# 5         6   34   33
# 6         7   31   30

If after that you want to identify rows that differ in both ranks you could do:

## rows
which(with(res, rank - Rank) != 0)
# [1]  3  4  5  6 11 12 13 14 15 16 19 21 23 24 25 26 27 29 30 31 33 34

## clusters
res$nclusters[with(res, rank - Rank) != 0]
# [1]  4  5  6  7 12 13 14 15 16 17 20 22 24 25 26 27 28 30 31 32 34 35

Upvotes: 1

dcarlson
dcarlson

Reputation: 11046

Here is another approach:

df1$nclusters <- as.numeric(df1$nclusters)  # Convert to numeric as in df2
df3 <- merge(df1[, c(1, 3)], df2[, c(1, 5)], by="nclusters", sort=TRUE)
head(df3)
#   nclusters rank Rank
# 1         2    1    1
# 2         3    2    2
# 3         4   30   31
# 4         5   33   34
# 5         6   34   33
# 6         7   31   30

Upvotes: 1

Related Questions