Reputation: 669
I have following matrix with numbers of 0 and 1 with always the same number of strings per column, except in the columns with NAs. I would like to to split each number into separate columns, that only one number per column and row occurs. The NAs should occur also in each column of the splitted string (as it is shown in the example below).
r1 <- c("01","001","0001","01","100")
r2 <- c("10","001",NA,"10","100")
r3 <- c("01","100","1000","10","010")
r4 <- c("01","010","0100",NA,"001")
r5<- c("01","010","0010","10","001")
n.mat <- rbind(r1,r2,r3,r4,r5)
The output:
r1 <- c("0","1","0","0","1","0","0","0","1","0","1","1","0","0")
r2 <- c("1","0","0","0","1",NA ,NA ,NA ,NA,"1","0","1","0","0")
r3 <- c("0","1","1","0","0","1","0","0","0","1","0","0","1","0")
r4 <- c("0","1","0","1","0","0","1","0","0", NA, NA,"0","0","1")
r5 <- c("0","1","0","1","0","0","0","1","0","1","0","0","0","1")
n.mat_new <- rbind(r1,r2,r3,r4,r5)
I have following code:
n.mat_new <- as.character(n.mat)
n.mat_new <- strsplit(n.mat, "")
n.mat_new <- data.frame(matrix(unlist(n.mat), nrow=length(n.mat), byrow=T))
But I don't get the correct n.mat_new
matrix I want. Can someone help me?
Upvotes: 1
Views: 165
Reputation: 887048
Here is an option with tidyverse
library(dplyr)
library(tidyr)
library(data.table)
reshape2::melt(n.mat) %>%
group_by(Var2) %>%
mutate(value = replace(value, is.na(value),
list(rep(NA, max(nchar(value[!is.na(value)])))))) %>%
ungroup %>%
unnest(c(value)) %>%
separate_rows(value, sep="(?<=.)(?=.)") %>%
mutate(rn = rowid(Var1)) %>%
select(-Var2) %>%
pivot_wider(names_from = rn, values_from = value) %>%
column_to_rownames('Var1')
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14
#r1 0 1 0 0 1 0 0 0 1 0 1 1 0 0
#r2 1 0 0 0 1 <NA> <NA> <NA> <NA> 1 0 1 0 0
#r3 0 1 1 0 0 1 0 0 0 1 0 0 1 0
#r4 0 1 0 1 0 0 1 0 0 <NA> <NA> 0 0 1
#r5 0 1 0 1 0 0 0 1 0 1 0 0 0 1
Upvotes: 1
Reputation: 388907
You can split every character in each column, make them equal length and combine the output.
do.call(cbind, apply(n.mat, 2, function(x) {
tmp <-strsplit(x, '')
t(sapply(tmp, `[`, 1:max(lengths(tmp))))
}))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#r1 "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "1" "1" "0" "0"
#r2 "1" "0" "0" "0" "1" NA NA NA NA "1" "0" "1" "0" "0"
#r3 "0" "1" "1" "0" "0" "1" "0" "0" "0" "1" "0" "0" "1" "0"
#r4 "0" "1" "0" "1" "0" "0" "1" "0" "0" NA NA "0" "0" "1"
#r5 "0" "1" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "1"
Upvotes: 1