Reputation: 115
Trying my very best for several hours now and hope someone can help me with the following.
The dataset I load into R looks like this (with many many more replications)
My output should look like that:
split.default(Mydata, rep(1:3, each = 5))
did the job in splitting the data frame into columns but I then do not know how to a) add the Replication column and b) how to combine the different tibbles into a single dataframe.
Thanks for your help in advance!
Data:
structure(list(Replication1 = c("Timestamp", "44046", "44046.02884259259",
"44046.065949074073", "44046.088472222225", "44046.0934837963",
"44046.105208333334", "44046.115613425929", "44046.35355324074",
"44046.419537037036", "44046", "44046.02884259259", "44046.065949074073",
"44046.088472222225", "44046.0934837963", "44046.105208333334",
"44046.115613425929", "44046.35355324074", "44046.419537037036",
"44049.058587962965", "44049.08630787037", "44049.184525462966",
"44049.188009259262", "44049.28429398148", "44049.373472222222",
"44049.464212962965", "44049.472627314812"), ...2 = c("a", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"),
...3 = c("b", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0"), ...4 = c("c", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "37", "37", "37", "37", "37", "37", "37", "37"),
...5 = c("d", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "20", "20",
"20", "20", "20", "20", "20", "20"), Replication2 = c("Timestamp",
"44046", "44046.02884259259", "44046.065949074073", "44046.088472222225",
"44046.0934837963", "44046.105208333334", "44046.115613425929",
"44046.35355324074", "44046.419537037036", "44049.058587962965",
"44049.08630787037", "44049.184525462966", "44049.188009259262",
"44049.28429398148", "44049.373472222222", "44049.464212962965",
"44049.472627314812", NA, NA, NA, NA, NA, NA, NA, NA, NA),
...7 = c("a", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", NA, NA, NA, NA, NA,
NA, NA, NA, NA), ...8 = c("b", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", NA,
NA, NA, NA, NA, NA, NA, NA, NA), ...9 = c("c", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "37", "37", "37", "37",
"37", "37", "37", "37", NA, NA, NA, NA, NA, NA, NA, NA, NA
), ...10 = c("d", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "20", "20", "20", "20", "20", "20", "20", "20", NA,
NA, NA, NA, NA, NA, NA, NA, NA), Replication3 = c("Timestamp",
"44094.918553240743", "44094.960196759261", "44094.960393518515",
"44095.006030092591", "44095.259652777779", "44095.275034722225",
"44095.31045138889", "44095.323263888888", "44095.386574074073",
"44095.425659722219", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), ...12 = c("a", "1", "1", "1",
"1", "2", "2", "2", "2", "2", "2", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ...13 = c("b", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ...14 = c("c",
"37", "37", "37", "37", "37", "37", "37", "37", "37", "37",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), ...15 = c("d", "20", "20", "20", "20", "20", "20", "20",
"20", "20", "20", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA)), row.names = c(NA, -27L), class = c("tbl_df",
"tbl", "data.frame"))
Upvotes: 1
Views: 128
Reputation: 39585
Try this brute force approach with a loop. We can detect the position of key variables to determine a start/end path and then use the loop to format the data. Finally, we bind the results using do.call()
and rbind()
:
#Code
index <- which(grepl('Replication',names(Mydata)))
index2 <- index[-1]
index2 <- index2-1
index2 <- c(index2,dim(Mydata)[2])
#List
List <- list()
#Loop
for(i in 1:length(index))
{
df <- Mydata[,(index[i]:index2[i])]
val <- names(df)[1]
colnames(df) <- df[1,]
df <- df[-1,]
df$Replicate <- val
List[[i]] <- df
}
#Bind
mdf <- do.call(rbind,List)
Output:
mdf
# A tibble: 78 x 6
Timestamp a b c d Replicate
<chr> <chr> <chr> <chr> <chr> <chr>
1 44046 0 0 0 0 Replication1
2 44046.02884259259 0 0 0 0 Replication1
3 44046.065949074073 0 0 0 0 Replication1
4 44046.088472222225 0 0 0 0 Replication1
5 44046.0934837963 0 0 0 0 Replication1
6 44046.105208333334 0 0 0 0 Replication1
7 44046.115613425929 0 0 0 0 Replication1
8 44046.35355324074 0 0 0 0 Replication1
9 44046.419537037036 0 0 0 0 Replication1
10 44046 0 0 0 0 Replication1
# ... with 68 more rows
Upvotes: 1
Reputation: 920
Without a sample of the data (using dput not posting an image) I give some directions:
save in txt file many files as many table replications you have
load in R each dataframe with the read.csv statement. For each step apply the replication value by using the cbind
statement
rep1_df <- read.csv('replication1.txt') rep1_df <- cbind(rep1_df, 'replication1')
create a resulting dataset with the rbind
statement:
compound_df <- rbind(rep1_df, rep2_df, etc...)
Below a working example.
Here's a couple of simplified input data file:
rep1.txt
Timestamp;a;b;c;d
03/08/2020 00:00;0;0;0;0
04/08/2020 01:00;0;2;0;0
rep2.txt
Timestamp;a;b;c;d
03/09/2020 07:00;1;0;0;0
05/09/2020 14:00;1;7;40;0
And here the code to use:
rep1_df <- read.csv(file='rep1.txt', sep = ';')
rep2_df <- read.csv(file='rep1.txt', sep = ';')
rep1_df <- cbind(rep1_df, Replication = 'Replication1')
rep2_df <- cbind(rep2_df, Replication = 'Replication2')
compound_df <- rbind(rep1_df, rep2_df)
Timestamp a b c d Replication
1 03/08/2020 00:00 0 0 0 0 Replication1
2 04/08/2020 01:00 0 2 0 0 Replication1
3 03/08/2020 00:00 0 0 0 0 Replication2
4 04/08/2020 01:00 0 2 0 0 Replication2
Upvotes: 3