Reputation:
I have a data like below
df<- structure(list(data1 = c(0.013818378, 0.014362551, 0.014647562,
0.0136627, 0.015510173, 0.006818502, 0.006683564, 0.006655434,
0.006691479, 0.00666666, 0.014507653, 0.017446481, 0.014021427,
0.013963069, 0.020706391, 0.007104358, 0.006809539, 0.006680631,
0.009059533, 0.006681197, 0.015691738, 0.016709763, 0.015761994,
0.016062111, 0.015917196, 0.006816436, 0.006809539, 0.006680631,
0.009059533, 0.006681197), data2 = c(0.045378058, 0.041371486,
0.046058451, 0.040479177, 0.051143336, 0.016131932, 0.014399847,
0.014950329, 0.016408355, 0.015886182, 0.046151342, 0.05265521,
0.046046663, 0.040515428, 0.086865434, 0.019222881, 0.016926183,
0.016703444, 0.081352865, 0.132841645, 0.051641343, 0.059851738,
0.04830957, 0.047550067, 0.049228835, 0.015154055, 0.016926183,
0.016703444, 0.081352865, 0.132841645), time = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L
), place = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L), .Label = c("B02", "B03", "B04", "B05",
"B06", "C02", "C03", "C04", "C05", "C06"), class = "factor")), .Names = c("data1",
"data2", "time", "place"), class = "data.frame", row.names = c(NA,
-30L))
It has several data in it and distinguishable by time
I am trying to put separate them and re-orginise them in various data frame
each column except time and place are one data which needs to be organized
for example for data1 at time 1
B 0.013818378 0.014362551 0.014647562 0.0136627 0.015510173
C 0.006818502 0.006683564 0.006655434 0.006691479 0.00666666
data 1 at time 10
B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
etc etc
Upvotes: 0
Views: 47
Reputation: 2164
It is somewhat unclear from your question, but I think that this is what you want:
library(tidyverse)
df %>%
mutate(
column = str_extract(place, "[0-9]+"),
place = str_extract(place, "[A-Z]")
) %>%
gather(data1, data2, key = "data", value = "val") %>%
spread(column, val) %>%
split(f = .$data)
Which produces the following format:
$data1
time place data 02 03 04 05 06
1 1 B data1 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
3 1 C data1 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
5 10 B data1 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
7 10 C data1 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
9 17 B data1 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
11 17 C data1 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197
$data2
time place data 02 03 04 05 06
2 1 B data2 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
4 1 C data2 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
6 10 B data2 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
8 10 C data2 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
10 17 B data2 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
12 17 C data2 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165
Upvotes: 1
Reputation: 886978
We separate
the 'place' column into two columns by splitting between the letter and digits, and spread
into 'wide' format
library(dplyr)
library(tidyr)
df %>%
separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])") %>%
select(-data2) %>%
spread(number, data1)
# time grp 02 03 04 05 06
#1 1 B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2 1 C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3 10 B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4 10 C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5 17 B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6 17 C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197
If we want as a list
of datasets of both 'data1' and 'data2'
nm1 <- grep("data", names(df), value = TRUE)
nm1 %>%
purrr::map(~ df %>%
select(-one_of(nm1), .x) %>%
separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])") %>%
spread(number, .x) )
#[[1]]
# time grp 02 03 04 05 06
#1 1 B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2 1 C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3 10 B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4 10 C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5 17 B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6 17 C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197
#[[2]]
# time grp 02 03 04 05 06
#1 1 B 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
#2 1 C 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
#3 10 B 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
#4 10 C 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
#5 17 B 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
#6 17 C 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165
It is not clear how the output should look like when we have multiple value
columns. The dcast
from data.table
can deal with multiple value.var
columns
library(data.table)
setDT(df)[, c("grp", "number") := tstrsplit(place, "(?<=[A-Z])(?=[0-9])", perl = TRUE)]
dcast(df, grp + time ~ number, value.var = c("data1", "data2"))
Upvotes: 2