joe_bill.dollar
joe_bill.dollar

Reputation: 374

merging dataframes with 1 column different

I want to merge two different sized dataframes where one of the columns have a different column name. For example, let's say I have the following data:

df1 <- structure(list(fd_rtng = c(45, 45, 45, 45, 45, 45, 45, 45, 45, 
45), srvc_rt = c(40, 40, 40, 40, 40, 40, 40, 40, 40, 40), atmsph_ = c(35, 
35, 35, 35, 35, 35, 35, 35, 35, 35), vl_rtng = c(35, 35, 35, 
35, 35, 35, 35, 35, 35, 35), romantic = c(1, 1, 1, 1, 1, 1, 1, 
1, 1, 1)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", 
"data.frame"))

df2 <- structure(list(fd_rtng = c(45, 45, 45, 45, 45), srvc_rt = c(40, 
40, 40, 40, 40), atmsph_ = c(35, 35, 35, 35, 35), vl_rtng = c(35, 
35, 35, 35, 35), love = c(1, 1, 1, 1, 1)), row.names = c(NA, 
-5L), class = c("tbl_df", "tbl", "data.frame"))

To get the following output:

# A tibble: 15 × 6
   fd_rtng srvc_rt atmsph_ vl_rtng romantic   love
     <dbl>   <dbl>   <dbl>   <dbl>    <dbl>   <dbl>
 1      45      40      35      35        1     0
 2      45      40      35      35        1     0
 3      45      40      35      35        1     0
 4      45      40      35      35        1     0
 5      45      40      35      35        1     0
 6      45      40      35      35        1     0
 7      45      40      35      35        1     0
 8      45      40      35      35        1     0
 9      45      40      35      35        1     0
10      45      40      35      35        1     0
11      45      40      35      35        0     1
12      45      40      35      35        0     1
13      45      40      35      35        0     1
14      45      40      35      35        0     1
15      45      40      35      35        0     1

I have tried the following:

merge(df1, df2)
inner_join(df1, df2)
full_join(df1, df2)

But the produce a table with 50 x 6 so it gives extra values and all values for romantic and love are 1 which is not what I need.

Second example output:

# A tibble: 15 × 6
   fd_rtng srvc_rt atmsph_ vl_rtng romantic   love
     <dbl>   <dbl>   <dbl>   <dbl>    <dbl>   <dbl>
 1      45      40      35      35        1     0
 2      45      40      35      35        1     0
 3      45      40      35      35        1     0
 4      45      40      35      35        1     0
 5      45      40      35      35        1     0
 6      45      40      35      35        1     0
 7      45      40      35      35        1     0
 8      45      40      35      35        1     0
 9      45      40      35      35        1     0
10      45      40      35      35        1     0
11      45      40      35      35        1     1
12      45      40      35      35        1     1
13      45      40      35      35        1     1
14      45      40      35      35        1     1
15      45      40      35      35        1     1

Upvotes: 1

Views: 31

Answers (2)

AndrewGB
AndrewGB

Reputation: 16876

Another option is to use rbind.fill from plyr. Then, we can replace the NAs in romantic and love with 0.

result <- plyr::rbind.fill(df1, df2)

result[,c("romantic", "love")][is.na(result[,c("romantic", "love")])] <- 0

Output

   fd_rtng srvc_rt atmsph_ vl_rtng romantic love
1       45      40      35      35        1    0
2       45      40      35      35        1    0
3       45      40      35      35        1    0
4       45      40      35      35        1    0
5       45      40      35      35        1    0
6       45      40      35      35        1    0
7       45      40      35      35        1    0
8       45      40      35      35        1    0
9       45      40      35      35        1    0
10      45      40      35      35        1    0
11      45      40      35      35        0    1
12      45      40      35      35        0    1
13      45      40      35      35        0    1
14      45      40      35      35        0    1
15      45      40      35      35        0    1

Upvotes: 0

PaulS
PaulS

Reputation: 25528

A possible solution:

library(tidyverse)

bind_rows(df1, df2) %>% mutate(across(everything(), ~ replace_na(.x, 0)))

#> # A tibble: 15 × 6
#>    fd_rtng srvc_rt atmsph_ vl_rtng romantic  love
#>      <dbl>   <dbl>   <dbl>   <dbl>    <dbl> <dbl>
#>  1      45      40      35      35        1     0
#>  2      45      40      35      35        1     0
#>  3      45      40      35      35        1     0
#>  4      45      40      35      35        1     0
#>  5      45      40      35      35        1     0
#>  6      45      40      35      35        1     0
#>  7      45      40      35      35        1     0
#>  8      45      40      35      35        1     0
#>  9      45      40      35      35        1     0
#> 10      45      40      35      35        1     0
#> 11      45      40      35      35        0     1
#> 12      45      40      35      35        0     1
#> 13      45      40      35      35        0     1
#> 14      45      40      35      35        0     1
#> 15      45      40      35      35        0     1

Upvotes: 2

Related Questions