Reputation: 669
I have following matrix with numbers of 0 and 1 with always the same number of strings per column, but also containing columns with only one string. I would like to to split each number into separate columns, that only one number per column and row occurs. But I would like to leave the columns with only one string as it is:
r1 <- c("0","001","0001","01","100")
r2 <- c("1","001","0001","10","100")
r3 <- c("0","100","1000","10","010")
r4 <- c("0","010","0100","10","001")
r5<- c("0","010","0010","10","001")
n.mat <- rbind(r1,r2,r3,r4,r5)
The output:
r1 <- c("0","0","0","1","0","0","0","1","0","1","1","0","0")
r2 <- c("1","0","0","1","0","0","0","1","1","0","1","0","0")
r3 <- c("0","1","0","0","1","0","0","0","1","0","0","1","0")
r4 <- c("0","0","1","0","0","1","0","0","1","0","0","0","1")
r5 <- c("0","0","1","0","0","0","1","0","1","0","0","0","1")
n.mat_new <- rbind(r1,r2,r3,r4,r5)
My code, but it crashes, because of the columns with only one string:
n.mat <- do.call(cbind, apply(n.mat, 2, function(x) {
tmp <-strsplit(x, '')
t(sapply(tmp, `[`, 1:max(lengths(tmp))))
}))
Upvotes: 2
Views: 406
Reputation: 193517
There's no need for apply
or paste
for this specific problem. Simply t
ranspose the matrix, split all the strings, and re-construct the matrix according to the number of rows in the original matrix.
matrix(unlist(strsplit(t(n.mat), "")), nrow = nrow(n.mat), byrow = TRUE)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# [1,] "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# [2,] "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# [3,] "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# [4,] "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# [5,] "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
If you want further optimizations, you can do something like the following, which will retain the rownames
matrix(unlist(strsplit(t(n.mat), "", TRUE), use.names = FALSE),
nrow = nrow(n.mat), byrow = TRUE,
dimnames = list(rownames(n.mat), NULL))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
By avoiding apply
, you're only calling strsplit
once, so you're going to notice much better performance if you have a lot of rows to process.
On my Chromebook (so these times are likely to be slow to begin with) testing with 10,000 rows, I get the following:
nrow(n.mat)
# [1] 10000
bench::mark(am_opt(), am(), gki(), jay(), check = FALSE)
# # A tibble: 4 x 13
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
# <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
# 1 am_opt() 28.3ms 40.1ms 27.4 2.75MB 0 14 0 511ms
# 2 am() 36.1ms 41.2ms 24.6 2.75MB 0 13 0 528ms
# 3 gki() 220.3ms 229.4ms 4.39 3.43MB 0 3 0 683ms
# 4 jay() 975.8ms 975.8ms 1.02 3.51MB 1.02 1 1 976ms
# # … with 4 more variables: result <list>, memory <list>, time <list>, gc <list>
I didn't benchmark Karthik's answer because just running it once took more than 1 minute.
system.time(karthik())
# user system elapsed
# 81.341 0.000 81.343
Where the functions are directly copied from the other answers:
am_opt <- function() {
matrix(unlist(strsplit(t(n.mat), "", TRUE), use.names = FALSE),
nrow = nrow(n.mat), byrow = TRUE,
dimnames = list(rownames(n.mat), NULL))
}
am <- function() matrix(unlist(strsplit(t(n.mat), "")), nrow = nrow(n.mat), byrow = TRUE)
gki <- function() matrix(unlist(apply(n.mat, 1, strsplit, split = "")), nrow(n.mat), byrow=TRUE)
jay <- function() t(apply(n.mat, 1, function(x) el(strsplit(Reduce(paste0, x), ""))))
karthik <- function() bind_rows(apply(n.mat, 2, strsplit, split = '')) %>% t
Upvotes: 1
Reputation: 39657
You can use strsplit
in apply
, unlist
the result and create with this a matrix
.
matrix(unlist(apply(n.mat, 1, strsplit, split = "")), nrow(n.mat), byrow=TRUE)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
#[1,] "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
#[2,] "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
#[3,] "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
#[4,] "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
#[5,] "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
Upvotes: 1
Reputation: 11584
Does this work:
library(dplyr)
bind_rows(apply(n.mat, 2, strsplit, split = '')) %>% t
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
Upvotes: 1
Reputation: 72683
Collapse paste0
using Reduce
and use strsplit
on ""
.
t(apply(n.mat, 1, function(x) el(strsplit(Reduce(paste0, x), ""))))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
# r1 "0" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
# r2 "1" "0" "0" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0"
# r3 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
# r4 "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" "0" "1"
# r5 "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
Upvotes: 1