Reputation: 35
I have a data.frame like this:
G5_01
X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1: 0/0 0/0
2: 0/0 1/1
3: 0/1 0/0
I want to calculate variations in each cell and convert it to:
X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02 1: 0 0 2: 0 2 3: 1 0
data.table seemed to be able to deal with this and my script as shown below:
library(data.table)
G5_02<-setDT(G5_01)[,lapply(.SD,function(x) sum(as.numeric(strsplit(x,"/")[[1]][1]),
as.numeric(strsplit(x,"/")[[1]][2])))]
But it only gives me the result of the first row
X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02 1: 0 0
Any suggestions on how should I fix it?
Upvotes: 0
Views: 106
Reputation: 2678
library(data.table)
setDT(G5_01)[, X8803713069_R01C02_8803713069_R01C02 := as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 1, 1)) + as.numeric(substr(X8803713069_R01C02_8803713069_R01C02, 3, 3))][, X8803713069_R02C02_8803713069_R02C02 := as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 1, 1)) + as.numeric(substr(X8803713069_R02C02_8803713069_R02C02, 3, 3))]
G5_01
X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1: 0 0
2: 0 2
3: 1 0
Data
G5_01 <- read.table(text = 'X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
0/0 0/0
0/0 1/1
0/1 0/0', header = T)
Upvotes: 1
Reputation: 28705
Using data.table::tstrsplit
to get a transposed string-split (like e.g. purrr::transpose(strsplit(x, '/'))
) we can then convert to numeric and add them together
library(dplyr)
df %>%
mutate_all(~
data.table::tstrsplit(., '/') %>%
map(as.numeric) %>%
do.call(what = '+'))
# X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
# 1 0 0
# 2 0 2
# 3 1 0
Upvotes: 1
Reputation: 102625
Maybe you can try the code below, where nchar()
and gsub()
are used
Here are two solutions with base R:
sapply()
, apply()
or lapply()
approaches)G5_02 <- data.frame(nchar(gsub("[^1]","",as.matrix(G5_01))))
G5_02 <- data.frame(sapply(G5_01, function(x) nchar(gsub("[^1]","",x))))
such that
> G5_01
X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
1 0 0
2 0 2
3 1 0
DATA
G5_01 <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0",
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0",
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))
Upvotes: 1
Reputation: 51592
If you only have to handle 1s and 0s, then a possible solution can be to count the 1s, i.e.
library(data.table)
setDT(df)[, lapply(.SD, function(i)stringr::str_count(i, '1'))][]
# X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1: 0 0
#2: 0 2
#3: 1 0
Upvotes: 2
Reputation: 887831
In base R
, we can use rowSums
after doing the splitting with read.table
df[] <- lapply(df, function(x) rowSums(read.table(text = x,
sep="/", header = FALSE)))
df
# X8803713069_R01C02_8803713069_R01C02 X8803713069_R02C02_8803713069_R02C02
#1 0 0
#2 0 2
#3 1 0
df <- structure(list(X8803713069_R01C02_8803713069_R01C02 = c("0/0",
"0/0", "0/1"), X8803713069_R02C02_8803713069_R02C02 = c("0/0",
"1/1", "0/0")), class = "data.frame", row.names = c(NA, -3L))
Upvotes: 1