Reputation: 59

Multiply 2 columns from two different data frames, matching rows by common factors

I have 2 data frames.

Df1:

Treatment Time  Species    Base      mn      h
   <fct>  <fct>  <fct>    <fct>     <dbl>  <dbl>
 1 TR      T1      A        C       0.340 0.00435
 2 TR      T2      A        C       0.329 0.00679
 3 TR      T1      B        C       0.336 0.00429
 4 TR      T2      B        C       0.328 0.00701
 5 UTC     T0     UTC       C       0.312 0.00542
 6 TR      T1      A        H       0.350 0.00256
 7 TR      T2      A        H       0.315 0.00525
 8 TR      T1      B        H       0.346 0.00181
 9 TR      T2      B        H       0.317 0.00400
10 UTC     T0     UTC       H       0.344 0.00384
11 TR      T1      A        L       0.18  0.00382
12 TR      T2      A        L       0.219 0.00617
13 TR      T1      B        L       0.198 0.00570
14 TR      T2      B        L       0.223 0.00567
15 UTC     T0     UTC       L       0.191 0.00601
16 TR      T1      A        S       0.131 0.00117
17 TR      T2      A        S       0.137 0.00530
18 TR      T1      B        S       0.120 0.00406
19 TR      T2      B        S       0.133 0.00310
20 UTC     T0     UTC       S       0.153 0.00176

and Df2:

Species Variety Time    Value    Residual_p
    <fct>  <fct>  <fct>    <dbl>       <dbl>
1    UTC   Blue    T0    0.00000000  1.0000000
2     A    Blue    T1    0.03136850  0.9686315
3     A    Blue    T2    0.17359611  0.8264039
4     B    Blue    T1    0.03734234  0.9626577
5     B    Blue    T2    0.10303113  0.8969689
6    UTC   Green   T0    0.00000000  1.0000000
7     A    Green   T1    0.04072433  0.9592757
8     A    Green   T2    0.09362780  0.9063722
9     B    Green   T1    0.03092473  0.9690753
10    B    Green   T2    0.07051702  0.9294830

I create a third data frame where 1:4 columns are the same of Df1 by taping

Df_Blue<-Df1[,1:4]

Then I have to create the column 5 in Df_Blue where rows are the product of multiplying Df1$mn by Df2$Residual_p but factors Species and Time must match and only rows of Df2$Variety == "Blue" should be considered.

Hope it's clear.

Thank you!

Upvotes: 0

Answers (3)

Ronak Shah

Reputation: 389047

Here is a base R version -

merge(df1, df2) |>
  subset(Variety == 'Blue') |>
  transform(mn_residual = mn * Residual_p) 

#   Time Species Treatment Base   mn      h Variety Value Residual_p mn_residual
#1    T0     UTC       UTC    C 0.31 0.0054    Blue 0.000       1.00        0.31
#3    T0     UTC       UTC    H 0.34 0.0038    Blue 0.000       1.00        0.34
#5    T0     UTC       UTC    L 0.19 0.0060    Blue 0.000       1.00        0.19
#7    T0     UTC       UTC    S 0.15 0.0018    Blue 0.000       1.00        0.15
#9    T1       A        TR    C 0.34 0.0043    Blue 0.031       0.97        0.33
#11   T1       A        TR    H 0.35 0.0026    Blue 0.031       0.97        0.34
#13   T1       A        TR    L 0.18 0.0038    Blue 0.031       0.97        0.17
#15   T1       A        TR    S 0.13 0.0012    Blue 0.031       0.97        0.13
#18   T1       B        TR    L 0.20 0.0057    Blue 0.037       0.96        0.19
#20   T1       B        TR    S 0.12 0.0041    Blue 0.037       0.96        0.12
#22   T1       B        TR    C 0.34 0.0043    Blue 0.037       0.96        0.32
#24   T1       B        TR    H 0.35 0.0018    Blue 0.037       0.96        0.33
#26   T2       A        TR    S 0.14 0.0053    Blue 0.174       0.83        0.11
#28   T2       A        TR    C 0.33 0.0068    Blue 0.174       0.83        0.27
#30   T2       A        TR    H 0.32 0.0053    Blue 0.174       0.83        0.26
#32   T2       A        TR    L 0.22 0.0062    Blue 0.174       0.83        0.18
#33   T2       B        TR    H 0.32 0.0040    Blue 0.103       0.90        0.28
#35   T2       B        TR    L 0.22 0.0057    Blue 0.103       0.90        0.20
#37   T2       B        TR    S 0.13 0.0031    Blue 0.103       0.90        0.12
#39   T2       B        TR    C 0.33 0.0070    Blue 0.103       0.90        0.29

Upvotes: 2

Vinícius Félix

Reputation: 8811

Libraries

library(dplyr)

Code

df1 %>% 
  #Select columns 1 from 5 of df1
  select(1:5) %>%
  #Join filtered df2, by Time and Species
  left_join(
    df2 %>% 
      #Filter Variety equal blue
      filter(Variety == "Blue") %>% 
      #Remove column Value
      select(- Value)
  ) %>% 
  #Create new column, multiplying mn*Residual_p
  mutate(mn_residual = mn*Residual_p)

Result (first 5 rows just to show an example)

# A tibble: 20 x 8
   Treatment Time  Species Base     mn Variety Residual_p mn_residual
   <chr>     <chr> <chr>   <chr> <dbl> <chr>        <dbl>       <dbl>
 1 TR        T1    A       C     0.34  Blue         0.969       0.329
 2 TR        T2    A       C     0.329 Blue         0.826       0.272
 3 TR        T1    B       C     0.336 Blue         0.963       0.323
 4 TR        T2    B       C     0.328 Blue         0.897       0.294
 5 UTC       T0    UTC     C     0.312 Blue         1           0.312

Upvotes: 1

akrun

Reputation: 887213

We can use data.table join to do this in a efficient way

library(data.table)
setDT(df1)[setDT(df2)[Variety == 'Blue'], 
       mn_residual := mn * Residual_p, on = .(Species, Time)]

-output

> df1
    Treatment Time Species Base    mn       h mn_residual
 1:        TR   T1       A    C 0.340 0.00435   0.3293347
 2:        TR   T2       A    C 0.329 0.00679   0.2718869
 3:        TR   T1       B    C 0.336 0.00429   0.3234530
 4:        TR   T2       B    C 0.328 0.00701   0.2942058
 5:       UTC   T0     UTC    C 0.312 0.00542   0.3120000
 6:        TR   T1       A    H 0.350 0.00256   0.3390210
 7:        TR   T2       A    H 0.315 0.00525   0.2603172
 8:        TR   T1       B    H 0.346 0.00181   0.3330796
 9:        TR   T2       B    H 0.317 0.00400   0.2843391
10:       UTC   T0     UTC    H 0.344 0.00384   0.3440000
11:        TR   T1       A    L 0.180 0.00382   0.1743537
12:        TR   T2       A    L 0.219 0.00617   0.1809825
13:        TR   T1       B    L 0.198 0.00570   0.1906062
14:        TR   T2       B    L 0.223 0.00567   0.2000241
15:       UTC   T0     UTC    L 0.191 0.00601   0.1910000
16:        TR   T1       A    S 0.131 0.00117   0.1268907
17:        TR   T2       A    S 0.137 0.00530   0.1132173
18:        TR   T1       B    S 0.120 0.00406   0.1155189
19:        TR   T2       B    S 0.133 0.00310   0.1192969
20:       UTC   T0     UTC    S 0.153 0.00176   0.1530000

data

df1 <- structure(list(Treatment = c("TR", "TR", "TR", "TR", "UTC", "TR", 
"TR", "TR", "TR", "UTC", "TR", "TR", "TR", "TR", "UTC", "TR", 
"TR", "TR", "TR", "UTC"), Time = c("T1", "T2", "T1", "T2", "T0", 
"T1", "T2", "T1", "T2", "T0", "T1", "T2", "T1", "T2", "T0", "T1", 
"T2", "T1", "T2", "T0"), Species = c("A", "A", "B", "B", "UTC", 
"A", "A", "B", "B", "UTC", "A", "A", "B", "B", "UTC", "A", "A", 
"B", "B", "UTC"), Base = c("C", "C", "C", "C", "C", "H", "H", 
"H", "H", "H", "L", "L", "L", "L", "L", "S", "S", "S", "S", "S"
), mn = c(0.34, 0.329, 0.336, 0.328, 0.312, 0.35, 0.315, 0.346, 
0.317, 0.344, 0.18, 0.219, 0.198, 0.223, 0.191, 0.131, 0.137, 
0.12, 0.133, 0.153), h = c(0.00435, 0.00679, 0.00429, 0.00701, 
0.00542, 0.00256, 0.00525, 0.00181, 0.004, 0.00384, 0.00382, 
0.00617, 0.0057, 0.00567, 0.00601, 0.00117, 0.0053, 0.00406, 
0.0031, 0.00176)), class = "data.frame", row.names = c("1", "2", 
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", 
"15", "16", "17", "18", "19", "20"))

df2 <- structure(list(Species = c("UTC", "A", "A", "B", "B", "UTC", 
"A", "A", "B", "B"), Variety = c("Blue", "Blue", "Blue", "Blue", 
"Blue", "Green", "Green", "Green", "Green", "Green"), Time = c("T0", 
"T1", "T2", "T1", "T2", "T0", "T1", "T2", "T1", "T2"), Value = c(0, 
0.0313685, 0.17359611, 0.03734234, 0.10303113, 0, 0.04072433, 
0.0936278, 0.03092473, 0.07051702), Residual_p = c(1, 0.9686315, 
0.8264039, 0.9626577, 0.8969689, 1, 0.9592757, 0.9063722, 0.9690753, 
0.929483)), class = "data.frame", row.names = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10"))