statquant
statquant

Reputation: 14370

Matricial product of 2 data.table

I have 2 data.table and I'd like to make A matricial product of one by the other, by group.

library(data.table)                                                                                                                                                                                                
set.seed(1)                                                                                                                                                                                                        
DT <- as.data.table(matrix(rnorm(16),ncol=4))                                                                                                                                                                      
DT[,id:=c(1,1,2,2)]                                                                                                                                                                                                
DT2 <- as.data.table(matrix(rnorm(8),ncol=4))                                                                                                                                                                      
DT2[,id:=c(1,2)]                                                                                                                                                                                                   

#DT
#           V1         V2         V3          V4 id
#1: -0.6264538  0.3295078  0.5757814 -0.62124058  1
#2:  0.1836433 -0.8204684 -0.3053884 -2.21469989  1
#3: -0.8356286  0.4874291  1.5117812  1.12493092  2
#4:  1.5952808  0.7383247  0.3898432 -0.04493361  2

#DT2
#            V1        V2        V3          V4 id
#1: -0.01619026 0.8212212 0.9189774  0.07456498  1
#2:  0.94383621 0.5939013 0.7821363 -1.98935170  2

cols <- grep(colnames(DT2), pattern='V.*', value=T)                                                                                                                                                                
ids <- DT2[,unique(id)]                                                                                                                                                                                            
for (id_i in ids) {                                                                                                                                                                                                
    l <- as.matrix(DT[id==id_i,(cols),with=F])                                                                                                                                                                     
    r <- diag(t(DT2[id==id_i,(cols),with=F])[,1L])                                                                                                                                                                 
    DT[id==id_i,(cols):=as.data.table(l%*%r)]                                                                                                                                                                              
}                                                                                                                                                                                                                  


    #DT(i,j) = DT(i,j)*DT2(j) with id matching
             V1         V2         V3          V4 id
1:  0.010142452  0.2705988  0.5291300 -0.04632279  1
2: -0.002973234 -0.6737860 -0.2806450 -0.16513906  1
3: -0.788696543  0.2894848  1.1824189 -2.23788323  2
4:  1.505683787  0.4384920  0.3049105  0.08938875  2

There must be a way to do this efficiently using by and .EACHI but the solution is eluding me

Upvotes: 0

Views: 74

Answers (2)

Marco Sandri
Marco Sandri

Reputation: 24252

Here is a solution using sapply.

ids <- unique(c(DT$id,DT2$id))
nc <- ncol(DT)
myprod <- function(k) {
  mtx1 <- as.matrix(DT[id==k][,-nc,with=F])
  nr <- nrow(mtx1)
  mtx2 <-  t(matrix(rep(as.matrix(DT2[id==k][,-nc,with=F]),nr),ncol=nr))
  mtx1*mtx2
}
do.call(rbind,sapply(ids, myprod,simplify =F))

# Results
               V1         V2         V3          V4
[1,]  0.010142452  0.2705988  0.5291300 -0.04632279
[2,] -0.002973234 -0.6737860 -0.2806450 -0.16513906
[3,] -0.788696543  0.2894848  1.1824189 -2.23788323
[4,]  1.505683787  0.4384920  0.3049105  0.08938875

Upvotes: 0

lmo
lmo

Reputation: 38500

I am not sure that this is a drastic improvement from what you have, but it does use a join and EACHI...

DT[DT2, on="id", by=.EACHI,
   {temp=tcrossprod(matrix(c(x.V1, x.V2, x.V3, x.V4), ncol=4),
                    diag(c(i.V1, i.V2, i.V3, i.V4)))
    as.data.table(temp)}]
   id           V1         V2         V3          V4
1:  1  0.010142452  0.2705988  0.5291300 -0.04632279
2:  1 -0.002973234 -0.6737860 -0.2806450 -0.16513906
3:  2 -0.788696543  0.2894848  1.1824189 -2.23788323
4:  2  1.505683787  0.4384920  0.3049105  0.08938875

I fiddled with trying to automate the construction of the x.V1 ... and i.V1 variable names, but was not successful.

Upvotes: 1

Related Questions