Reputation: 259
I have a dataset like this
dt <- data.table(Score = c(0.33,0.34,00.3, -0.22, 0.232),
Id2 = c("0/0","0/1","1/0","0/0","0/0"),
Kps = c("0/1","0/0","1/1","0/1","0/0"),
Inr = c("0/0","0/1","1/1","0/0","0/1"))
I need to replace the values of each row based on the Score column as like this
Score * 2
Score
Usually, it can be done by using the base function like this
dt$Id2 <- dt$Score * 2
But here I have to consider each row and I have around 1000 columns so it can be only done with loop
The expected output
Score Id2 Kps Inr
0.330 0.66 0.330 0.66
0.340 0.340 0.68 0.340
0.300 0.300 0.6 0.6
-0.220 -0.44 -0.22 -0.44
0.232 0.464 0.464 0.232
Any suggestions?
Upvotes: 4
Views: 220
Reputation: 56259
Using matrix multiplication:
# like @akrun using a named vector for conversion, to avoid ifelse/case/switch:
keyval <- setNames(c(2, 2, 1, 1), c("0/0", "1/1", "1/0", "0/1"))
#convert and make the matrix, then multiply
matrix(keyval[ as.matrix(dt[, -1 ]) ] * dt[[ 1 ]], ncol = ncol(dt) - 1)
# [,1] [,2] [,3]
# [1,] 0.660 0.330 0.660
# [2,] 0.340 0.680 0.340
# [3,] 0.300 0.600 0.600
# [4,] -0.440 -0.220 -0.440
# [5,] 0.464 0.464 0.232
Benchmark using a bigger dataset:
library(dplyr)
library(tidyr)
#bigger data
n = 1000
set.seed(1); dt <- data.table(cbind(
Score = runif(n),
data.frame(matrix(sample(c("0/0", "0/1", "1/0", "1/1"), n * n, replace = TRUE), ncol = n))))
Matrix multiplication should give 3-7x improvement compared to dplyr:
m <- microbenchmark::microbenchmark(
matrix = {
matrix(keyval[ as.matrix(dt[, -1 ]) ] * dt[[ 1 ]], ncol = ncol(dt) - 1)
},
dplyr1 = {
dt |>
pivot_longer(-Score) |>
mutate(value = case_when(
value == '0/0' | value == "1/1" ~ Score *2,
value == '1/0' | value == "0/1" ~ Score
)) |>
pivot_wider(names_from = name, values_from = value)
},
dplyr2 = {
dt %>%
mutate(across(-Score, ~ Score * (.x %in% c("0/0", "1/1") + 1)))
})
print(m, unit = "relative")
# Unit: relative
# expr min lq mean median uq max neval
# matrix 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 100
# dplyr1 7.697692 8.468686 7.279598 8.071069 7.652855 3.423847 100
# dplyr2 3.862794 3.708899 3.399736 3.560082 3.687698 2.096620 100
Upvotes: 0
Reputation: 887951
As the input is data.table
, here is one approach with data.table
library(data.table)
dt[, (names(dt)[-1]) := lapply(.SD, \(x)
fcase(x %chin% c("0/0", "1/1"), Score *2,
x %chin% c("1/0", "0/1"), Score)), .SDcols = -1]
-output
> dt
Score Id2 Kps Inr
1: 0.330 0.660 0.330 0.660
2: 0.340 0.340 0.680 0.340
3: 0.300 0.300 0.600 0.600
4: -0.220 -0.440 -0.220 -0.440
5: 0.232 0.464 0.464 0.232
Or another option is to make use of named vector
keyval <- setNames(c(2, 2, 1, 1), c("0/0", "1/1", "1/0", "0/1"))
dt[, (names(dt)[-1]) := lapply(.SD, \(x) Score *keyval[x]), .SDcols = -1]
-output
> dt
Score Id2 Kps Inr
1: 0.330 0.660 0.330 0.660
2: 0.340 0.340 0.680 0.340
3: 0.300 0.300 0.600 0.600
4: -0.220 -0.440 -0.220 -0.440
5: 0.232 0.464 0.464 0.232
Or create a count of 1s and 0s to multiply
library(stringr)
dt[, (names(dt)[-1]) := lapply(.SD, \(x) Score * 1 +
(str_count(x, "0")!= 1)) , .SDcols = -1]
> dt
Score Id2 Kps Inr
1: 0.330 1.330 0.330 1.330
2: 0.340 0.340 1.340 0.340
3: 0.300 0.300 1.300 1.300
4: -0.220 0.780 -0.220 0.780
5: 0.232 1.232 1.232 0.232
Upvotes: 3
Reputation: 35649
With dplyr::across()
, you can apply a function across multiple columns. It supports tidy selections so that you can cleverly select variables based on their names or properties.
library(dplyr)
dt %>%
mutate(across(-Score, ~ ifelse(.x %in% c("0/0", "1/1"), Score * 2, Score)))
# Score Id2 Kps Inr
# 1: 0.330 0.660 0.330 0.660
# 2: 0.340 0.340 0.680 0.340
# 3: 0.300 0.300 0.600 0.600
# 4: -0.220 -0.440 -0.220 -0.440
# 5: 0.232 0.464 0.464 0.232
A tricky way
dt %>%
mutate(across(-Score, ~ Score * (.x %in% c("0/0", "1/1") + 1)))
Upvotes: 2
Reputation: 4534
Here is a tidyverse
-way solution.
It uses a data.frame
and makes it longer in a first step. Then with case_when
the different conditions were implemented.
pivot_wider
brought it back to a wider format.
library(tidyverse)
dt<- data.frame(Score = c(0.33,0.34,00.3, -0.22, 0.232),
Id2=c("0/0","0/1","1/0","0/0","0/0"),
Kps=c("0/1","0/0","1/1","0/1","0/0"),
Inr=c("0/0","0/1","1/1","0/0","0/1"))
dt |>
pivot_longer(-Score) |>
mutate(value = case_when(
value == '0/0' | value == "1/1" ~ Score *2,
value == '1/0' | value == "0/1" ~ Score
)) |>
pivot_wider(names_from = name, values_from = value)
#> # A tibble: 5 × 4
#> Score Id2 Kps Inr
#> <dbl> <dbl> <dbl> <dbl>
#> 1 0.33 0.66 0.33 0.66
#> 2 0.34 0.34 0.68 0.34
#> 3 0.3 0.3 0.6 0.6
#> 4 -0.22 -0.44 -0.22 -0.44
#> 5 0.232 0.464 0.464 0.232
Upvotes: 2