Reputation: 59
I have 2 data frames.
Df1:
Treatment Time Species Base mn h
<fct> <fct> <fct> <fct> <dbl> <dbl>
1 TR T1 A C 0.340 0.00435
2 TR T2 A C 0.329 0.00679
3 TR T1 B C 0.336 0.00429
4 TR T2 B C 0.328 0.00701
5 UTC T0 UTC C 0.312 0.00542
6 TR T1 A H 0.350 0.00256
7 TR T2 A H 0.315 0.00525
8 TR T1 B H 0.346 0.00181
9 TR T2 B H 0.317 0.00400
10 UTC T0 UTC H 0.344 0.00384
11 TR T1 A L 0.18 0.00382
12 TR T2 A L 0.219 0.00617
13 TR T1 B L 0.198 0.00570
14 TR T2 B L 0.223 0.00567
15 UTC T0 UTC L 0.191 0.00601
16 TR T1 A S 0.131 0.00117
17 TR T2 A S 0.137 0.00530
18 TR T1 B S 0.120 0.00406
19 TR T2 B S 0.133 0.00310
20 UTC T0 UTC S 0.153 0.00176
and Df2:
Species Variety Time Value Residual_p
<fct> <fct> <fct> <dbl> <dbl>
1 UTC Blue T0 0.00000000 1.0000000
2 A Blue T1 0.03136850 0.9686315
3 A Blue T2 0.17359611 0.8264039
4 B Blue T1 0.03734234 0.9626577
5 B Blue T2 0.10303113 0.8969689
6 UTC Green T0 0.00000000 1.0000000
7 A Green T1 0.04072433 0.9592757
8 A Green T2 0.09362780 0.9063722
9 B Green T1 0.03092473 0.9690753
10 B Green T2 0.07051702 0.9294830
I create a third data frame where 1:4 columns are the same of Df1 by taping
Df_Blue<-Df1[,1:4]
Then I have to create the column 5 in Df_Blue where rows are the product of multiplying Df1$mn by Df2$Residual_p but factors Species and Time must match and only rows of Df2$Variety == "Blue" should be considered.
Hope it's clear.
Thank you!
Upvotes: 0
Views: 59
Reputation: 389047
Here is a base R version -
merge(df1, df2) |>
subset(Variety == 'Blue') |>
transform(mn_residual = mn * Residual_p)
# Time Species Treatment Base mn h Variety Value Residual_p mn_residual
#1 T0 UTC UTC C 0.31 0.0054 Blue 0.000 1.00 0.31
#3 T0 UTC UTC H 0.34 0.0038 Blue 0.000 1.00 0.34
#5 T0 UTC UTC L 0.19 0.0060 Blue 0.000 1.00 0.19
#7 T0 UTC UTC S 0.15 0.0018 Blue 0.000 1.00 0.15
#9 T1 A TR C 0.34 0.0043 Blue 0.031 0.97 0.33
#11 T1 A TR H 0.35 0.0026 Blue 0.031 0.97 0.34
#13 T1 A TR L 0.18 0.0038 Blue 0.031 0.97 0.17
#15 T1 A TR S 0.13 0.0012 Blue 0.031 0.97 0.13
#18 T1 B TR L 0.20 0.0057 Blue 0.037 0.96 0.19
#20 T1 B TR S 0.12 0.0041 Blue 0.037 0.96 0.12
#22 T1 B TR C 0.34 0.0043 Blue 0.037 0.96 0.32
#24 T1 B TR H 0.35 0.0018 Blue 0.037 0.96 0.33
#26 T2 A TR S 0.14 0.0053 Blue 0.174 0.83 0.11
#28 T2 A TR C 0.33 0.0068 Blue 0.174 0.83 0.27
#30 T2 A TR H 0.32 0.0053 Blue 0.174 0.83 0.26
#32 T2 A TR L 0.22 0.0062 Blue 0.174 0.83 0.18
#33 T2 B TR H 0.32 0.0040 Blue 0.103 0.90 0.28
#35 T2 B TR L 0.22 0.0057 Blue 0.103 0.90 0.20
#37 T2 B TR S 0.13 0.0031 Blue 0.103 0.90 0.12
#39 T2 B TR C 0.33 0.0070 Blue 0.103 0.90 0.29
Upvotes: 2
Reputation: 8811
library(dplyr)
df1 %>%
#Select columns 1 from 5 of df1
select(1:5) %>%
#Join filtered df2, by Time and Species
left_join(
df2 %>%
#Filter Variety equal blue
filter(Variety == "Blue") %>%
#Remove column Value
select(- Value)
) %>%
#Create new column, multiplying mn*Residual_p
mutate(mn_residual = mn*Residual_p)
# A tibble: 20 x 8
Treatment Time Species Base mn Variety Residual_p mn_residual
<chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
1 TR T1 A C 0.34 Blue 0.969 0.329
2 TR T2 A C 0.329 Blue 0.826 0.272
3 TR T1 B C 0.336 Blue 0.963 0.323
4 TR T2 B C 0.328 Blue 0.897 0.294
5 UTC T0 UTC C 0.312 Blue 1 0.312
Upvotes: 1
Reputation: 887213
We can use data.table
join to do this in a efficient way
library(data.table)
setDT(df1)[setDT(df2)[Variety == 'Blue'],
mn_residual := mn * Residual_p, on = .(Species, Time)]
-output
> df1
Treatment Time Species Base mn h mn_residual
1: TR T1 A C 0.340 0.00435 0.3293347
2: TR T2 A C 0.329 0.00679 0.2718869
3: TR T1 B C 0.336 0.00429 0.3234530
4: TR T2 B C 0.328 0.00701 0.2942058
5: UTC T0 UTC C 0.312 0.00542 0.3120000
6: TR T1 A H 0.350 0.00256 0.3390210
7: TR T2 A H 0.315 0.00525 0.2603172
8: TR T1 B H 0.346 0.00181 0.3330796
9: TR T2 B H 0.317 0.00400 0.2843391
10: UTC T0 UTC H 0.344 0.00384 0.3440000
11: TR T1 A L 0.180 0.00382 0.1743537
12: TR T2 A L 0.219 0.00617 0.1809825
13: TR T1 B L 0.198 0.00570 0.1906062
14: TR T2 B L 0.223 0.00567 0.2000241
15: UTC T0 UTC L 0.191 0.00601 0.1910000
16: TR T1 A S 0.131 0.00117 0.1268907
17: TR T2 A S 0.137 0.00530 0.1132173
18: TR T1 B S 0.120 0.00406 0.1155189
19: TR T2 B S 0.133 0.00310 0.1192969
20: UTC T0 UTC S 0.153 0.00176 0.1530000
df1 <- structure(list(Treatment = c("TR", "TR", "TR", "TR", "UTC", "TR",
"TR", "TR", "TR", "UTC", "TR", "TR", "TR", "TR", "UTC", "TR",
"TR", "TR", "TR", "UTC"), Time = c("T1", "T2", "T1", "T2", "T0",
"T1", "T2", "T1", "T2", "T0", "T1", "T2", "T1", "T2", "T0", "T1",
"T2", "T1", "T2", "T0"), Species = c("A", "A", "B", "B", "UTC",
"A", "A", "B", "B", "UTC", "A", "A", "B", "B", "UTC", "A", "A",
"B", "B", "UTC"), Base = c("C", "C", "C", "C", "C", "H", "H",
"H", "H", "H", "L", "L", "L", "L", "L", "S", "S", "S", "S", "S"
), mn = c(0.34, 0.329, 0.336, 0.328, 0.312, 0.35, 0.315, 0.346,
0.317, 0.344, 0.18, 0.219, 0.198, 0.223, 0.191, 0.131, 0.137,
0.12, 0.133, 0.153), h = c(0.00435, 0.00679, 0.00429, 0.00701,
0.00542, 0.00256, 0.00525, 0.00181, 0.004, 0.00384, 0.00382,
0.00617, 0.0057, 0.00567, 0.00601, 0.00117, 0.0053, 0.00406,
0.0031, 0.00176)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20"))
df2 <- structure(list(Species = c("UTC", "A", "A", "B", "B", "UTC",
"A", "A", "B", "B"), Variety = c("Blue", "Blue", "Blue", "Blue",
"Blue", "Green", "Green", "Green", "Green", "Green"), Time = c("T0",
"T1", "T2", "T1", "T2", "T0", "T1", "T2", "T1", "T2"), Value = c(0,
0.0313685, 0.17359611, 0.03734234, 0.10303113, 0, 0.04072433,
0.0936278, 0.03092473, 0.07051702), Residual_p = c(1, 0.9686315,
0.8264039, 0.9626577, 0.8969689, 1, 0.9592757, 0.9063722, 0.9690753,
0.929483)), class = "data.frame", row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10"))
Upvotes: 2