Tom
Tom

Reputation: 2351

Merging by columns for different values of those columns

I have two data.tables. I want to merge the row of second data.table dfB with the first dfA, if the year from dfA corresponds to a year one year before in dfB.

As an example, the first row of dfB would merge with the first row of dfA because the year of dfB, 2009, is one year before the year of dfA, 2010.

  library(data.table)
  dfA <- fread("
  A   B   C   D   E   F   G   Z   iso   year   matchcode
  1   0   1   1   1   0   1   0   NLD   2010   NLD2010
  2   1   0   0   0   1   0   1   NLD   2014   NLD2014
  3   0   0   0   1   1   0   0   AUS   2010   AUS2010
  4   1   0   1   0   0   1   0   AUS   2006   AUS2006
  5   0   1   0   1   0   1   1   USA   2008   USA2008
  6   0   0   1   0   0   0   1   USA   2010   USA2010
  7   0   1   0   1   0   0   0   USA   2012   USA2012
  8   1   0   1   0   0   1   0   BLG   2008   BLG2008
  9   0   1   0   1   1   0   1   BEL   2008   BEL2008
  10  1   0   1   0   0   1   0   BEL   2010   BEL2010
  11  0   1   1   1   0   1   0   NLD   2010   NLD2010
  12  1   0   0   0   1   0   1   NLD   2014   NLD2014
  13  0   0   0   1   1   0   0   AUS   2010   AUS2010
  14  1   0   1   0   0   1   0   AUS   2006   AUS2006
  15  0   1   0   1   0   1   1   USA   2008   USA2008
  16  0   0   1   0   0   0   1   USA   2010   USA2010
  17  0   1   0   1   0   0   0   USA   2012   USA2012
  18  1   0   1   0   0   1   0   BLG   2008   BLG2008
  19  0   1   0   1   1   0   1   BEL   2008   BEL2008
  20  1   0   1   0   0   1   0   BEL   2010   BEL2010",
  header = TRUE)

  dfB <- fread("
  A   B   C   D   H   I   J   K   iso   year   matchcode
  1   0   1   1   1   0   1   0   NLD   2009   NLD2009
  2   1   0   0   0   1   0   1   NLD   2014   NLD2014
  3   0   0   0   1   1   0   0   AUS   2011   AUS2011
  4   1   0   1   0   0   1   0   AUS   2007   AUS2007
  5   0   1   0   1   0   1   1   USA   2007   USA2007
  6   0   0   1   0   0   0   1   USA   2010   USA2010
  7   0   1   0   1   0   0   0   USA   2013   USA2013
  8   1   0   1   0   0   1   0   BLG   2007   BLG2007
  9   0   1   0   1   1   0   1   BEL   2009   BEL2009
  10   1   0   1   0   0   1   0  BEL   2012   BEL2012",
  header = TRUE)

I thought of trying:

dfA <- merge(dfA , dfB, on =.(iso, year == year-1), all.x = TRUE, allow.cartesian=FALSE)

But that creates a match on the year, which is not what I want.

I believe also roll would try to find the closest match.

How should I write this merge?

DESIRED OUTPUT:

library(data.table)
      dfA <- fread("
      A   B   C   D   E   F   G   Z  H   I   J   K  year_from_B iso  year   matchcode
      1   0   1   1   1   0   1   0  1   0   1   0 2009  NLD   2010   NLD2010
      2   1   0   0   0   1   0   1  NA  NA  NA  NA  NA  NLD   2014   NLD2014
      3   0   0   0   1   1   0   0  NA  NA  NA  NA  NA  AUS   2010   AUS2010
      4   1   0   1   0   0   1   0  NA  NA  NA  NA  NA  AUS   2006   AUS2006
      5   0   1   0   1   0   1   1  NA  NA  NA  NA  NA  USA   2008   USA2008
      6   0   0   1   0   0   0   1  NA  NA  NA  NA  NA  USA   2010   USA2010
      7   0   1   0   1   0   0   0  NA  NA  NA  NA  NA  USA   2012   USA2012
      8   1   0   1   0   0   1   0  0   0   1   0 2007  BLG   2008   BLG2008
      9   0   1   0   1   1   0   1  NA  NA  NA  NA  NA  BEL   2008   BEL2008
      10  1   0   1   0   0   1   0  1   1   0   1   2009 BEL  2010   BEL2010
      11  0   1   1   1   0   1   0  1   0   1   0 2009  NLD   2010   NLD2010
      12  1   0   0   0   1   0   1  NA  NA  NA  NA  NA  NLD   2014   NLD2014
      13  0   0   0   1   1   0   0  NA  NA  NA  NA  NA  AUS   2010   AUS2010
      14  1   0   1   0   0   1   0  NA  NA  NA  NA  NA  AUS   2006   AUS2006
      15  0   1   0   1   0   1   1  NA  NA  NA  NA  NA  USA   2008   USA2008
      16  0   0   1   0   0   0   1  NA  NA  NA  NA  NA  USA   2010   USA2010
      17  0   1   0   1   0   0   0  NA  NA  NA  NA  NA  USA   2012   USA2012
      18  1   0   1   0   0   1   0  0   0   1   0 2007  BLG   2008   BLG2008
      19  0   1   0   1   1   0   1  NA  NA  NA  NA  NA  BEL   2008   BEL2008
      20  1   0   1   0   0   1   0  1   1   0   1   2009 BEL  2010   BEL2010",
      header = TRUE)

Upvotes: 2

Views: 58

Answers (2)

GreenLantern
GreenLantern

Reputation: 186

I don't know if that's what you want, but I hope it helps you. It's little intricate

df<-numeric()
for(i in 1:nrow(dfA)){
  d<-numeric()
  d<-which(dfA$iso[i]==dfB$iso & (dfA$year[i]-1)==dfB$year)
  if(length(d)>0){
    df<-rbind(df,c(dfA[i,1:8],dfB[d,5:8],dfB$year[d],dfA$iso[i],dfA$year[i],dfA$matchcode[i],dfB$matchcode[d]))
  }else{
    df<-rbind(df,c(dfA[i,1:8],rep(NA,5),dfA$iso[i],dfA$year[i],dfA$matchcode[i],NA))
  }
}
colnames(df)[13:17]<-c("year_from_B", "iso",  "year"  , "matchcode1", "matchcode2")
df

and this is the output

      A  B C D E F G Z H  I  J  K  year_from_B iso   year matchcode1 matchcode2
 [1,] 1  0 1 1 1 0 1 0 1  0  1  0  2009        "NLD" 2010 "NLD2010"  "NLD2009" 
 [2,] 2  1 0 0 0 1 0 1 NA NA NA NA NA          "NLD" 2014 "NLD2014"  NA        
 [3,] 3  0 0 0 1 1 0 0 NA NA NA NA NA          "AUS" 2010 "AUS2010"  NA        
 [4,] 4  1 0 1 0 0 1 0 NA NA NA NA NA          "AUS" 2006 "AUS2006"  NA        
 [5,] 5  0 1 0 1 0 1 1 1  0  1  1  2007        "USA" 2008 "USA2008"  "USA2007" 
 [6,] 6  0 0 1 0 0 0 1 NA NA NA NA NA          "USA" 2010 "USA2010"  NA        
 [7,] 7  0 1 0 1 0 0 0 NA NA NA NA NA          "USA" 2012 "USA2012"  NA        
 [8,] 8  1 0 1 0 0 1 0 0  0  1  0  2007        "BLG" 2008 "BLG2008"  "BLG2007" 
 [9,] 9  0 1 0 1 1 0 1 NA NA NA NA NA          "BEL" 2008 "BEL2008"  NA        
[10,] 10 1 0 1 0 0 1 0 1  1  0  1  2009        "BEL" 2010 "BEL2010"  "BEL2009" 
[11,] 11 0 1 1 1 0 1 0 1  0  1  0  2009        "NLD" 2010 "NLD2010"  "NLD2009" 
[12,] 12 1 0 0 0 1 0 1 NA NA NA NA NA          "NLD" 2014 "NLD2014"  NA        
[13,] 13 0 0 0 1 1 0 0 NA NA NA NA NA          "AUS" 2010 "AUS2010"  NA        
[14,] 14 1 0 1 0 0 1 0 NA NA NA NA NA          "AUS" 2006 "AUS2006"  NA        
[15,] 15 0 1 0 1 0 1 1 1  0  1  1  2007        "USA" 2008 "USA2008"  "USA2007" 
[16,] 16 0 0 1 0 0 0 1 NA NA NA NA NA          "USA" 2010 "USA2010"  NA        
[17,] 17 0 1 0 1 0 0 0 NA NA NA NA NA          "USA" 2012 "USA2012"  NA        
[18,] 18 1 0 1 0 0 1 0 0  0  1  0  2007        "BLG" 2008 "BLG2008"  "BLG2007" 
[19,] 19 0 1 0 1 1 0 1 NA NA NA NA NA          "BEL" 2008 "BEL2008"  NA        
[20,] 20 1 0 1 0 0 1 0 1  1  0  1  2009        "BEL" 2010 "BEL2010"  "BEL2009"

Upvotes: 1

Rohit
Rohit

Reputation: 2017

It's a little messy, but try:

dfB[dfA[,c(.SD,.(year1=year-1))],
    on=.(A,B,C,D,iso,year == year1)]
     A B C D  H  I  J  K iso year matchcode E F G Z i.year i.matchcode
 1:  1 0 1 1  1  0  1  0 NLD 2009   NLD2009 1 0 1 0   2010     NLD2010
 2:  2 1 0 0 NA NA NA NA NLD 2013      <NA> 0 1 0 1   2014     NLD2014
 3:  3 0 0 0 NA NA NA NA AUS 2009      <NA> 1 1 0 0   2010     AUS2010
 4:  4 1 0 1 NA NA NA NA AUS 2005      <NA> 0 0 1 0   2006     AUS2006
 5:  5 0 1 0  1  0  1  1 USA 2007   USA2007 1 0 1 1   2008     USA2008
 6:  6 0 0 1 NA NA NA NA USA 2009      <NA> 0 0 0 1   2010     USA2010
 7:  7 0 1 0 NA NA NA NA USA 2011      <NA> 1 0 0 0   2012     USA2012
 8:  8 1 0 1  0  0  1  0 BLG 2007   BLG2007 0 0 1 0   2008     BLG2008
 9:  9 0 1 0 NA NA NA NA BEL 2007      <NA> 1 1 0 1   2008     BEL2008
10: 10 1 0 1 NA NA NA NA BEL 2009      <NA> 0 0 1 0   2010     BEL2010
11: 11 0 1 1 NA NA NA NA NLD 2009      <NA> 1 0 1 0   2010     NLD2010
12: 12 1 0 0 NA NA NA NA NLD 2013      <NA> 0 1 0 1   2014     NLD2014
13: 13 0 0 0 NA NA NA NA AUS 2009      <NA> 1 1 0 0   2010     AUS2010
14: 14 1 0 1 NA NA NA NA AUS 2005      <NA> 0 0 1 0   2006     AUS2006
15: 15 0 1 0 NA NA NA NA USA 2007      <NA> 1 0 1 1   2008     USA2008
16: 16 0 0 1 NA NA NA NA USA 2009      <NA> 0 0 0 1   2010     USA2010
17: 17 0 1 0 NA NA NA NA USA 2011      <NA> 1 0 0 0   2012     USA2012
18: 18 1 0 1 NA NA NA NA BLG 2007      <NA> 0 0 1 0   2008     BLG2008
19: 19 0 1 0 NA NA NA NA BEL 2007      <NA> 1 1 0 1   2008     BEL2008
20: 20 1 0 1 NA NA NA NA BEL 2009      <NA> 0 0 1 0   2010     BEL2010

Upvotes: 1

Related Questions