Jack Armstrong
Jack Armstrong

Reputation: 1249

Combine Dataframe based on value in row in R

I have 5 dataframes each with a different number of rows. I need to merge them all together based on values in col1 of each dataframe. Observe

df1 <- read.table(text="
   col1    col2
    A        5
    B        3
    C        6
    E        7", header=TRUE, stringsAsFactors=FALSE)

df2 <- read.table(text="
   col1    col2
    A        5
    B        6
    C       7
    M       8
    Z       9", header=T, stringsAsFactors=FALSE)

But I need it to produce:

   newdf
     col1    col2(#from df1)   col3(#from df2)
      A          5                  5
      B          3                  6
      C          6                  7  
      E          7                  0
      M          0                  8
      Z          0                  9

I have tried to merge a few at a time by='col1' but to no luck. Any tips?

What I have tried:

posidf<-merge(df1,df2,df3,df4,df5,all.x=TRUE)
#wont execute
posidf<-merge(df1,df2,df3,df4,df5,by="col1",all.x=TRUE)
#wont execute
posidf<-merge(df1,df2,df3,df4,df5,by="col1")
Error in fix.by(by.x, x) : 
'by' must specify one or more columns as numbers, names or logical

Upvotes: 0

Views: 7305

Answers (3)

Zerodf
Zerodf

Reputation: 2298

I suspect you are looking for something like this example:

merge(df1, df2, by = "col1", all.x=TRUE, all.y=TRUE)

Edit:

col1 <- c('A', 'B', 'C', 'E')
col2 <- c(5, 3, 6 ,7)
df1 <- data.frame(col1, col2)

col1 <- c('A', 'B', 'C', 'M', 'Z')
col2 <- c(5, 6, 7 ,8, 9)
df2 <- data.frame(col1, col2)

col1 <- c('A', 'B', 'C')
col2 <- c(10, 29, 7)
df3 <- data.frame(col1, col2)

col1 <- c('A', 'S', 'T')
col2 <- c(7 ,8, 9)
df4 <- data.frame(col1, col2)

col1 <- c('B', 'C')
col2 <- c(7 ,8)
df5 <- data.frame(col1, col2)

frame_list <- list(df1, df2, df3, df4, df5)
frame_names <- list('df1', 'df2', 'df3', 'df4', 'df5')
counter <- 0

df <- data.frame(Date=as.Date(character()),
                 File=character(), 
                 User=character(), 
                 stringsAsFactors=FALSE) 

df <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(df) <- c("col1","col2")

for (d in frame_list) {
  counter <- counter + 1
  colnames(d) <- c("col1", paste0('col2_',frame_names[counter]))
  df <- merge(df, d, by = "col1", all.x=TRUE, all.y=TRUE)
}

df$col2 <- NULL
df[is.na(df)] <- 0

Upvotes: 3

Nic
Nic

Reputation: 383

I guess what you would like is a full_join. Using dplyr:

library(dplyr)

df1 <- data.frame(
  col1 = c("A", "B", "C", "E"),
  col2 = c(5, 3, 6, 7)
)

df2 <- data.frame(
  col1 = c("A", "B", "C", "M", "Z"),
  col2 = c(5, 6, 7, 8, 9)
)


df_merged <- full_join(x = df1, y = df2, by = "col1") 

If you want a 0 instead of a NA, replace them with

df_merged[is.na(df_merged)] <- 0

  col1 col2.x col2.y
1    A      5      5
2    B      3      6
3    C      6      7
4    E      7      0
5    M      0      8
6    Z      0      9

Edit for multiple data frames

Store them in a list and use reduce with the respective join, here full_join

set.seed(123)
df_list <- replicate(5, data.frame(col1 = LETTERS[sample(1:26, 5)], col2 = sample(1:9, 5)), simplify = F)
reduce(df_list, full_join, by = "col1")

   col1 col2.x col2.y col2.x.x col2.y.y col2
1     E      9     NA       NA        1    4
2     F      5     NA       NA       NA   NA
3     N      2     NA       NA        8   NA
4     X      7      4       NA       NA    6
5     P      8     NA       NA        5   NA
6     D     NA      8       NA       NA   NA
7     Q     NA      5       NA       NA   NA
8     J     NA      1       NA       NA   NA
9     U     NA      2       NA        9    8
10    V     NA     NA        1       NA   NA
11    M     NA     NA        7       NA   NA
12    B     NA     NA        8       NA   NA
13    H     NA     NA        9       NA   NA
14    I     NA     NA        4       NA   NA
15    K     NA     NA       NA        6   NA
16    W     NA     NA       NA       NA    9
17    O     NA     NA       NA       NA    3

But as @zx8754 suggested in the comment: duplicate from here Simultaneously merge multiple data.frames in a list

Upvotes: 1

gos
gos

Reputation: 484

Assuming your merge looks like this and your example is representative of your data, there are no issues.

newdf <- merge(x = df1, y = df2,'col1')

##  col1 col2.x col2.y
##1    A      5      5
##2    B      3      6
##3    C      6      7

If this is not the output you expect, you may want to look into the different types of merging: inner join (above), outer join, left outer, and right outer.

Outer join

merge(x = df1, y = df2, by = 'col1', all = TRUE)

##  col1 col2.x col2.y
##1    A      5      5
##2    B      3      6
##3    C      6      7
##4    E      7     NA
##5    M     NA      8
##6    Z     NA      9

Left outer

merge(x = df1, y = df2, by = 'col1', all.x = TRUE)

##  col1 col2.x col2.y
##1    A      5      5
##2    B      3      6
##3    C      6      7
##4    E      7     NA

Right outer

merge(x = df1, y = df2, by = 'col1', all.y = TRUE)

##  col1 col2.x col2.y
##1    A      5      5
##2    B      3      6
##3    C      6      7
##4    M     NA      8
##5    Z     NA      9

Upvotes: 1

Related Questions