Reputation: 599
data <- data.frame(col1 = c('0/1:60,4:0.044:4:0:1.00:2352,160:32:28', '0/1:58,4:0.041:4:0:1.00:2304,150:28:30', '0/1:25,2:0.095:1:1:0.500:908,78:9:16'))
data
col1
1 0/1:60,4:0.044:4:0:1.00:2352,160:32:28
2 0/1:58,4:0.041:4:0:1.00:2304,150:28:30
3 1/1:25,2:0.095:1:1:0.500:908,78:9:16
I want to extract the number before the second colon, namely 0/1
, 0/1
, 1/1
, 60,4
, 58,4
, 25,2
, and split it into different columns.
data
col1 col2 col3 col4 col5
1 0/1:60,4:0.044:4:0:1.00:2352,160:32:28 0 1 60 4
2 0/1:58,4:0.041:4:0:1.00:2304,150:28:30 0 1 58 4
3 1/1:25,2:0.095:1:1:0.500:908,78:9:16 1 1 25 2
Upvotes: 2
Views: 336
Reputation: 886948
Here is an option with read.csv
after extracting the substring in base R
data[paste0('col', 2:5)] <- read.csv(text = gsub("[[:punct:]]", ",",
sub('^([^,]+,\\d+):.*', "\\1", data$col1)), header = FALSE)
-output
data
# col1 col2 col3 col4 col5
#1 0/1:60,4:0.044:4:0:1.00:2352,160:32:28 0 1 60 4
#2 0/1:58,4:0.041:4:0:1.00:2304,150:28:30 0 1 58 4
#3 0/1:25,2:0.095:1:1:0.500:908,78:9:16 0 1 25 2
Upvotes: 1
Reputation: 78917
Here is a dplyr solution with strsplit
and separate
data %>%
mutate(col2 = strsplit(sub('(^[^:]+:[^:]+):(.*)$', '\\1 \\2', col1), ' ')) %>%
separate(col2, c("col2", NA), sep = " ") %>%
separate(col2, c(NA, "col2", "col3", "col4", "col5"), extra = "drop", fill = "right")
Output:
col1 col2 col3 col4 col5
1 0/1:60,4:0.044:4:0:1.00:2352,160:32:28 0 1 60 4
2 0/1:58,4:0.041:4:0:1.00:2304,150:28:30 0 1 58 4
3 0/1:25,2:0.095:1:1:0.500:908,78:9:16 0 1 25 2
Upvotes: 2
Reputation: 160407
strsplit
twice (once with :
, again with [/,]
) and [
-extraction works like this:
tmp <- do.call(rbind.data.frame, lapply(strsplit(data$col1, ":"), function(st) as.integer(unlist(strsplit(st, "[/,]")[1:2]))))
cbind(data, setNames(tmp, paste0("col", 1+seq_len(ncol(tmp)))))
# col1 col2 col3 col4 col5
# 1 0/1:60,4:0.044:4:0:1.00:2352,160:32:28 0 1 60 4
# 2 0/1:58,4:0.041:4:0:1.00:2304,150:28:30 0 1 58 4
# 3 0/1:25,2:0.095:1:1:0.500:908,78:9:16 0 1 25 2
Upvotes: 4