Reputation: 21
I am an only very occasional R user and this is the first time I am asking a question regarding R here or anywhere else online, so I apologize beforehand if anything remains unclear.
I have a numeric dataframe with about 100 columns in each of which there is the same number (number 10 in this example) that needs to be multiplied with a value from a numeric vector, which is specific to each column. I am completely stuck and would appreciate any help.
Here is a simplified example:
df
V1 V2 V3
1 0 0 2
2 1 0 2
3 0 0 1
4 0 0 2
5 0 0 1
6 10 0 1
7 0 0 1
8 0 0 2
9 0 10 2
10 0 0 2
11 10 0 1
12 0 0 10
13 1 2 1
14 0 0 2
15 0 0 0
16 0 1 2
17 1 0 10
18 1 1 1
19 0 0 1
20 0 0 2
The corresponding vector would look as follows:
V
v1 v2 v3
0.01256117 0.03037231 0.55444079
So, the values "10" of df column V1 would need to be multiplied by value v1 of vector V, the values "10" of df column V2 by the value v2 of vector V, etc.
Any help is very much appreciated!
Upvotes: 2
Views: 1001
Reputation: 887118
Here is a variation
df1 <- (df!=10)*df + ((df==10)*df) * vec1[col(df)]
df1
# V1 V2 V3
#1 0.0000000 0.0000000 2.000000
#2 1.0000000 0.0000000 2.000000
#3 0.0000000 0.0000000 1.000000
#4 0.0000000 0.0000000 2.000000
#5 0.0000000 0.0000000 1.000000
#6 0.1256117 0.0000000 1.000000
#7 0.0000000 0.0000000 1.000000
#8 0.0000000 0.0000000 2.000000
#9 0.0000000 0.3037231 2.000000
#10 0.0000000 0.0000000 2.000000
#11 0.1256117 0.0000000 1.000000
#12 0.0000000 0.0000000 5.544408
#13 1.0000000 2.0000000 1.000000
#14 0.0000000 0.0000000 2.000000
#15 0.0000000 0.0000000 0.000000
#16 0.0000000 1.0000000 2.000000
#17 1.0000000 0.0000000 5.544408
#18 1.0000000 1.0000000 1.000000
#19 0.0000000 0.0000000 1.000000
#20 0.0000000 0.0000000 2.000000
For big datasets, it may also be better to use lapply/Map
etc
f1 <- function(x,y) {i <- x==10
x[i] <- x[i]*y
x}
df2 <- data.frame(Map(f1, df, vec1))
df2
# V1 V2 V3
#1 0.0000000 0.0000000 2.000000
#2 1.0000000 0.0000000 2.000000
#3 0.0000000 0.0000000 1.000000
#4 0.0000000 0.0000000 2.000000
#5 0.0000000 0.0000000 1.000000
#6 0.1256117 0.0000000 1.000000
#7 0.0000000 0.0000000 1.000000
#8 0.0000000 0.0000000 2.000000
#9 0.0000000 0.3037231 2.000000
#10 0.0000000 0.0000000 2.000000
#11 0.1256117 0.0000000 1.000000
#12 0.0000000 0.0000000 5.544408
#13 1.0000000 2.0000000 1.000000
#14 0.0000000 0.0000000 2.000000
#15 0.0000000 0.0000000 0.000000
#16 0.0000000 1.0000000 2.000000
#17 1.0000000 0.0000000 5.544408
#18 1.0000000 1.0000000 1.000000
#19 0.0000000 0.0000000 1.000000
#20 0.0000000 0.0000000 2.000000
identical(df1, df2)
#[1] TRUE
Or an option with data.table
library(data.table)#v1.9.5+
setDT(df)
for(j in seq_along(df)){
set(df, i=NULL, j=j, value= as.numeric(df[[j]]))
set(df, i=which(df[[j]]==10), j=j, value= df[[j]][df[[j]]==10]*vec1[j])
}
df
# V1 V2 V3
#1: 0.0000000 0.0000000 2.000000
#2: 1.0000000 0.0000000 2.000000
#3: 0.0000000 0.0000000 1.000000
#4: 0.0000000 0.0000000 2.000000
#5: 0.0000000 0.0000000 1.000000
#6: 0.1256117 0.0000000 1.000000
#7: 0.0000000 0.0000000 1.000000
#8: 0.0000000 0.0000000 2.000000
#9: 0.0000000 0.3037231 2.000000
#10:0.0000000 0.0000000 2.000000
#11:0.1256117 0.0000000 1.000000
#12:0.0000000 0.0000000 5.544408
#13:1.0000000 2.0000000 1.000000
#14:0.0000000 0.0000000 2.000000
#15:0.0000000 0.0000000 0.000000
#16:0.0000000 1.0000000 2.000000
#17:1.0000000 0.0000000 5.544408
#18:1.0000000 1.0000000 1.000000
#19:0.0000000 0.0000000 1.000000
#20:0.0000000 0.0000000 2.000000
df <- structure(list(V1 = c(0L, 1L, 0L, 0L, 0L, 10L, 0L, 0L, 0L, 0L,
10L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L), V2 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 10L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 1L,
0L, 0L), V3 = c(2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 10L,
1L, 2L, 0L, 2L, 10L, 1L, 1L, 2L)), .Names = c("V1", "V2", "V3"
), class = "data.frame", row.names = c(NA, -20L))
vec1 <- c(v1=0.01256117, v2 =0.03037231,v3 =0.55444079)
Upvotes: 2
Reputation: 23788
Here's another suggestion:
arr <- which(df==10, arr.ind=TRUE)
df[arr] <- df[arr] * v[arr[,2]]
#> df
# V1 V2 V3
#1 0.0000000 0.0000000 2.000000
#2 1.0000000 0.0000000 2.000000
#3 0.0000000 0.0000000 1.000000
#4 0.0000000 0.0000000 2.000000
#5 0.0000000 0.0000000 1.000000
#6 0.1256117 0.0000000 1.000000
#7 0.0000000 0.0000000 1.000000
#8 0.0000000 0.0000000 2.000000
#9 0.0000000 0.3037231 2.000000
#10 0.0000000 0.0000000 2.000000
#11 0.1256117 0.0000000 1.000000
#12 0.0000000 0.0000000 5.544408
#13 1.0000000 2.0000000 1.000000
#14 0.0000000 0.0000000 2.000000
#15 0.0000000 0.0000000 0.000000
#16 0.0000000 1.0000000 2.000000
#17 1.0000000 0.0000000 5.544408
#18 1.0000000 1.0000000 1.000000
#19 0.0000000 0.0000000 1.000000
#20 0.0000000 0.0000000 2.000000
df <- structure(list(V1 = c(0L, 1L, 0L, 0L, 0L, 10L, 0L, 0L, 0L, 0L,10L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L), V2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 10L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 1L, 0L, 0L),
V3 = c(2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 10L, 1L, 2L, 0L, 2L,
10L, 1L, 1L, 2L)), .Names = c("V1", "V2", "V3"), class = "data.frame",
row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "20"))
v <- c(0.01256117, 0.03037231, 0.55444079)
Upvotes: 1
Reputation: 17611
Here is one simple approach:
# sample data
df<-data.frame(v1=c(1:10,10), v2= c(5:13,10,14), v3=8:18)
vec=c(0.1, 0.2, 0.3) # sample vector to multiply by
df
# v1 v2 v3
#1 1 5 8
#2 2 6 9
#3 3 7 10
#4 4 8 11
#5 5 9 12
#6 6 10 13
#7 7 11 14
#8 8 12 15
#9 9 13 16
#10 10 10 17
#11 10 14 18
df2 <- t(t(df==10) * vec * t(df))
df[df==10] <- 0
df + df2
# v1 v2 v3
#1 1 5 8
#2 2 6 9
#3 3 7 3
#4 4 8 11
#5 5 9 12
#6 6 2 13
#7 7 11 14
#8 8 12 15
#9 9 13 16
#10 1 2 17
#11 1 14 18
Upvotes: 3