user8990745
user8990745

Reputation:

how can I reorganize a data based on two column

I have a data like below

df<- structure(list(data1 = c(0.013818378, 0.014362551, 0.014647562, 
0.0136627, 0.015510173, 0.006818502, 0.006683564, 0.006655434, 
0.006691479, 0.00666666, 0.014507653, 0.017446481, 0.014021427, 
0.013963069, 0.020706391, 0.007104358, 0.006809539, 0.006680631, 
0.009059533, 0.006681197, 0.015691738, 0.016709763, 0.015761994, 
0.016062111, 0.015917196, 0.006816436, 0.006809539, 0.006680631, 
0.009059533, 0.006681197), data2 = c(0.045378058, 0.041371486, 
0.046058451, 0.040479177, 0.051143336, 0.016131932, 0.014399847, 
0.014950329, 0.016408355, 0.015886182, 0.046151342, 0.05265521, 
0.046046663, 0.040515428, 0.086865434, 0.019222881, 0.016926183, 
0.016703444, 0.081352865, 0.132841645, 0.051641343, 0.059851738, 
0.04830957, 0.047550067, 0.049228835, 0.015154055, 0.016926183, 
0.016703444, 0.081352865, 0.132841645), time = c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L
), place = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L), .Label = c("B02", "B03", "B04", "B05", 
"B06", "C02", "C03", "C04", "C05", "C06"), class = "factor")), .Names = c("data1", 
"data2", "time", "place"), class = "data.frame", row.names = c(NA, 
-30L))

It has several data in it and distinguishable by time

I am trying to put separate them and re-orginise them in various data frame

each column except time and place are one data which needs to be organized

for example for data1 at time 1

B   0.013818378 0.014362551 0.014647562 0.0136627   0.015510173
C   0.006818502 0.006683564 0.006655434 0.006691479 0.00666666

data 1 at time 10

B   0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
C   0.007104358 0.006809539 0.006680631 0.009059533 0.006681197

etc etc

Upvotes: 0

Views: 47

Answers (2)

Peter H.
Peter H.

Reputation: 2164

It is somewhat unclear from your question, but I think that this is what you want:

library(tidyverse)

df %>% 
  mutate(
    column = str_extract(place, "[0-9]+"),
    place = str_extract(place, "[A-Z]")
  ) %>% 
  gather(data1, data2, key = "data", value = "val") %>% 
  spread(column, val) %>% 
  split(f = .$data)

Which produces the following format:

$data1
   time place  data          02          03          04          05          06
1     1     B data1 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
3     1     C data1 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
5    10     B data1 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
7    10     C data1 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
9    17     B data1 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
11   17     C data1 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197

$data2
   time place  data         02         03         04         05         06
2     1     B data2 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
4     1     C data2 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
6    10     B data2 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
8    10     C data2 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
10   17     B data2 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
12   17     C data2 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165

Upvotes: 1

akrun
akrun

Reputation: 886978

We separate the 'place' column into two columns by splitting between the letter and digits, and spread into 'wide' format

library(dplyr)
library(tidyr)
df %>% 
   separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])") %>%
   select(-data2) %>% 
   spread(number, data1)
# time grp          02          03          04          05          06
#1    1   B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2    1   C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3   10   B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4   10   C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5   17   B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6   17   C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197

If we want as a list of datasets of both 'data1' and 'data2'

nm1 <- grep("data", names(df), value = TRUE)
nm1 %>%
   purrr::map(~ df %>% 
                 select(-one_of(nm1), .x) %>%
                 separate(place, into = c("grp", "number"), "(?<=[A-Z])(?=[0-9])")  %>% 
                 spread(number, .x) )
#[[1]]
#  time grp          02          03          04          05          06
#1    1   B 0.013818378 0.014362551 0.014647562 0.013662700 0.015510173
#2    1   C 0.006818502 0.006683564 0.006655434 0.006691479 0.006666660
#3   10   B 0.014507653 0.017446481 0.014021427 0.013963069 0.020706391
#4   10   C 0.007104358 0.006809539 0.006680631 0.009059533 0.006681197
#5   17   B 0.015691738 0.016709763 0.015761994 0.016062111 0.015917196
#6   17   C 0.006816436 0.006809539 0.006680631 0.009059533 0.006681197

#[[2]]
#  time grp         02         03         04         05         06
#1    1   B 0.04537806 0.04137149 0.04605845 0.04047918 0.05114334
#2    1   C 0.01613193 0.01439985 0.01495033 0.01640835 0.01588618
#3   10   B 0.04615134 0.05265521 0.04604666 0.04051543 0.08686543
#4   10   C 0.01922288 0.01692618 0.01670344 0.08135286 0.13284165
#5   17   B 0.05164134 0.05985174 0.04830957 0.04755007 0.04922883
#6   17   C 0.01515405 0.01692618 0.01670344 0.08135286 0.13284165

It is not clear how the output should look like when we have multiple value columns. The dcast from data.table can deal with multiple value.var columns

library(data.table)
setDT(df)[, c("grp", "number") := tstrsplit(place, "(?<=[A-Z])(?=[0-9])", perl = TRUE)]
dcast(df, grp + time ~ number, value.var = c("data1", "data2"))

Upvotes: 2

Related Questions