Electrino
Electrino

Reputation: 2900

Trying to match values and replace from one column to another in R?

If my data looks like this:

library(dplyr)
df <- tibble(
  iter = c(1,1,1,  1,1,1,1,1,    2,2,2,2,2),
  num  = c(1,1,1,  2,2,2,2,2,    1,1,1,1,1),
  n  = c(1,2,3,    1,2,4,5,3,    1,2,3,6,7),
  nX = c(1,2,3,    1,2,3,4,5,    1,2,3,4,5),
  LL = c(2,NA,NA,  2,4,NA,NA,NA, 2,NA,6,NA,NA),
  RR = c(3,NA,NA,  3,5,NA,NA,NA, 3,NA,7,NA,NA)
)
> df
# A tibble: 13 × 6
    iter   num     n    nX    LL    RR
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     1     1     1     1     2     3
 2     1     1     2     2    NA    NA
 3     1     1     3     3    NA    NA
 4     1     2     1     1     2     3
 5     1     2     2     2     4     5
 6     1     2     4     3    NA    NA
 7     1     2     5     4    NA    NA
 8     1     2     3     5    NA    NA
 9     2     1     1     1     2     3
10     2     1     2     2    NA    NA
11     2     1     3     3     6     7
12     2     1     6     4    NA    NA
13     2     1     7     5    NA    NA

What Im trying to do is group the data by iter and num and then Im looking at the columns LL and RR for that particular group. What I want to do then is find the values of LL and RR in the n column, and replace them with the corresponding value from the nX column (reading row-wise).

so, for example, if we look at iter = 1, num = 2 (below). To begin, we look at the 1st value in the RR column (i.e., 3). We match it to the value in the n column (i.e., row 5) and replace the value in RR with the corresponding value in nX column (i.e., 5). We then do this for every value in LL and RR.

> dfT <- df %>% filter(iter == 1, num == 2)
# A tibble: 5 × 6
   iter   num     n    nX    LL    RR
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     2     1     1     2     3
2     1     2     2     2     4     5
3     1     2     4     3    NA    NA
4     1     2     5     4    NA    NA
5     1     2     3     5    NA    NA

Attempted Solution

So, I have a little function that can do exactly what I described above, however, it seems to only work if I filter out the groups. When I try grouping the data and applying the function over the whole data frame, the function doesn't work correctly. For example, if I run the function on the filtered data, it works:

testFunc <- function(data){
  intLL <- match(data[,"LL"]$LL, data[,"n"]$n)
  intRR <- match(data[,"RR"]$RR, data[,"n"]$n)
  
  data[,"LL"]$LL <- data[intLL,"nX"]$nX
  data[,"RR"]$RR <- data[intRR,"nX"]$nX
  
  return(data)
} 

> testFunc(dfT)
# A tibble: 5 × 6
   iter   num     n    nX    LL    RR
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     2     1     1     2     5
2     1     2     2     2     3     4
3     1     2     4     3    NA    NA
4     1     2     5     4    NA    NA
5     1     2     3     5    NA    NA

But when I apply it to the whole dataset, it breaks:

df <- df %>% 
  group_by(iter, num)
> testFunc(df)
# A tibble: 13 × 6
# Groups:   iter, num [3]
    iter   num     n    nX    LL    RR
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     1     1     1     1     2     3
 2     1     1     2     2    NA    NA
 3     1     1     3     3    NA    NA
 4     1     2     1     1     2     3
 5     1     2     2     2     3     4
 6     1     2     4     3    NA    NA
 7     1     2     5     4    NA    NA
 8     1     2     3     5    NA    NA
 9     2     1     1     1     2     3
10     2     1     2     2    NA    NA
11     2     1     3     3     4     5
12     2     1     6     4    NA    NA
13     2     1     7     5    NA    NA

What am I doing wrong here?

Upvotes: 1

Views: 1314

Answers (2)

jared_mamrot
jared_mamrot

Reputation: 26225

One option is to use group_modify() to apply the function to each group, e.g.

library(dplyr)

df <- tibble(
  iter = c(1,1,1,  1,1,1,1,1,    2,2,2,2,2),
  num  = c(1,1,1,  2,2,2,2,2,    1,1,1,1,1),
  n  = c(1,2,3,    1,2,4,5,3,    1,2,3,6,7),
  nX = c(1,2,3,    1,2,3,4,5,    1,2,3,4,5),
  LL = c(2,NA,NA,  2,4,NA,NA,NA, 2,NA,6,NA,NA),
  RR = c(3,NA,NA,  3,5,NA,NA,NA, 3,NA,7,NA,NA)
)

testFunc <- function(data){
  intLL <- match(data[,"LL"]$LL, data[,"n"]$n)
  intRR <- match(data[,"RR"]$RR, data[,"n"]$n)
  
  data[,"LL"]$LL <- data[intLL,"nX"]$nX
  data[,"RR"]$RR <- data[intRR,"nX"]$nX
  
  return(data)
}

df %>% 
  group_by(iter, num) %>%
  group_modify(~ testFunc(.x))
#> # A tibble: 13 × 6
#> # Groups:   iter, num [3]
#>     iter   num     n    nX    LL    RR
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1     1     1     1     1     2     3
#>  2     1     1     2     2    NA    NA
#>  3     1     1     3     3    NA    NA
#>  4     1     2     1     1     2     5
#>  5     1     2     2     2     3     4
#>  6     1     2     4     3    NA    NA
#>  7     1     2     5     4    NA    NA
#>  8     1     2     3     5    NA    NA
#>  9     2     1     1     1     2     3
#> 10     2     1     2     2    NA    NA
#> 11     2     1     3     3     4     5
#> 12     2     1     6     4    NA    NA
#> 13     2     1     7     5    NA    NA

Created on 2022-02-02 by the reprex package (v2.0.1)

Upvotes: 2

Ronak Shah
Ronak Shah

Reputation: 389335

Here's a shorter one -

library(dplyr)

df %>%
  group_by(iter, num) %>%
  mutate(across(c(LL, RR), ~nX[match(., n)])) %>%
  ungroup

#    iter   num     n    nX    LL    RR
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1     1     1     1     2     3
# 2     1     1     2     2    NA    NA
# 3     1     1     3     3    NA    NA
# 4     1     2     1     1     2     5
# 5     1     2     2     2     3     4
# 6     1     2     4     3    NA    NA
# 7     1     2     5     4    NA    NA
# 8     1     2     3     5    NA    NA
# 9     2     1     1     1     2     3
#10     2     1     2     2    NA    NA
#11     2     1     3     3     4     5
#12     2     1     6     4    NA    NA
#13     2     1     7     5    NA    NA

Since we want to apply the same function to both the columns (LL and RR) we can use across to do that.

Upvotes: 3

Related Questions