Reputation: 21
I am trying to convert a wide dataframe with various dose response intensities with multiple readouts and replicates into a wide format for fitting.
the long data looks like this:
ionidx=c(1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L,
1L)
intensities=c(28396L, 11328L, 10496L, 32674L, 9832L, 28931L, 10580L, 30144L,
11031L, 29720L, 30292L, 9970L, 10140L, 32286L, 31459L, 8709L,
8332L, 31830L, 9305L, 29685L, 28933L, 11339L, 12282L, 31594L,
12763L, 31110L, 10498L, 31405L, 32290L, 11869L, 29295L, 10743L,
9760L, 29191L, 30020L, 8818L, 32139L, 9605L, 10668L, 31812L,
9685L, 30365L, 8908L, 30555L, 30919L, 7640L, 8818L, 32864L, 8174L,
30384L, 29411L, 7902L, 29947L, 8922L, 6936L, 30039L, 7798L, 8520L,
31842L, 7008L, 28703L, 8745L, 31581L, 7600L, 28301L, 7408L, 31146L,
12223L, 28793L, 33500L, 13359L, 31791L, 10325L, 11264L, 30491L,
11396L, 31127L, 10655L, 32256L, 8517L, 31297L, 9000L, 30800L,
30774L, 8845L, 7903L, 29414L, 8245L, 30337L, 8530L, 30812L, 31559L,
8280L, 27926L, 8263L, 32300L)
dose=c(5, 5, 1.6667, 1.6667, 1.6667, 1.6667, 0.5556, 0.5556, 0.5556,
0.5556, 0.1852, 0.1852, 0.1852, 0.1852, 0.0617, 0.0617, 0.0617,
0.0617, 0.0617, 0.0617, 5, 5, 5, 5, 5, 5, 1.6667, 1.6667, 1.6667,
1.6667, 0.5556, 0.5556, 0.5556, 0.5556, 0.5556, 0.5556, 0.1852,
0.1852, 0.1852, 0.1852, 0.1852, 0.1852, 0.0617, 0.0617, 0.0617,
0.0617, 0.0617, 0.0617, 0.0206, 0.0206, 0.0206, 0.0206, 0.0206,
0.0206, 0.0023, 0.0069, 0.0069, 0.0069, 0.0069, 0.0023, 0.0023,
0.0023, 0.0023, 0.0023, 0.0023, 0.0023, 0.0023, 5, 5, 5, 5, 1.6667,
1.6667, 1.6667, 1.6667, 0.5556, 0.5556, 0.1852, 0.1852, 0.0206,
0.0206, 0.0206, 0.0206, 0.0206, 0.0206, 0.0069, 0.0069, 0.0069,
0.0069, 0.0069, 0.0069, 0.0069, 0.0069, 0.0023, 0.0023, 0.0023
)
resp=data.frame(ionidx=ionidx,intensities=intensities,dose=dose)
Desired format would be first col=ionidx
and subsequent cols are [dose]_[replicate_num]
Tried using dcast:
library(data.table)
dcast(setDT(resp), ionidx~dose+rowid(ionidx), value.var="intensities")
but that does not look right. Any help would be appreciated!
Upvotes: 0
Views: 166
Reputation: 6234
It appears that the replicate number is defined as a sequence along the repeated number of doses for each dose-ionidx combination. This is not what is happening in your dcast
call, where rowid(ionidx)
creates two sequences along the levels of ionidx
. To reproduce the output shown in the image, try:
library(data.table)
setDT(resp)
## add replicate number
resp[, id := seq_len(.N), by = c("dose", "ionidx")]
## cast to wide format
dcast(resp, ionidx ~ dose + id, value.var = "intensities")
#> ionidx 0.0023_1 0.0023_2 0.0023_3 0.0023_4 0.0023_5 0.0023_6 0.0069_1
#> 1: 1 28703 31581 28301 31146 27926 32300 30039
#> 2: 2 6936 7008 8745 7600 7408 8263 7798
#> 0.0069_2 0.0069_3 0.0069_4 0.0069_5 0.0069_6 0.0206_1 0.0206_2 0.0206_3
#> 1: 31842 29414 30337 30812 31559 30384 29411 29947
#> 2: 8520 7903 8245 8530 8280 8174 7902 8922
#> 0.0206_4 0.0206_5 0.0206_6 0.0617_1 0.0617_2 0.0617_3 0.0617_4 0.0617_5
#> 1: 31297 30800 30774 31459 31830 29685 30555 30919
#> 2: 8517 9000 8845 8709 8332 9305 8908 7640
#> 0.0617_6 0.1852_1 0.1852_2 0.1852_3 0.1852_4 0.1852_5 0.1852_6 0.5556_1
#> 1: 32864 30292 32286 32139 31812 30365 32256 30144
#> 2: 8818 9970 10140 9605 10668 9685 10655 10580
#> 0.5556_2 0.5556_3 0.5556_4 0.5556_5 0.5556_6 1.6667_1 1.6667_2 1.6667_3
#> 1: 29720 29295 29191 30020 31127 32674 28931 31405
#> 2: 11031 10743 9760 8818 11396 10496 9832 10498
#> 1.6667_4 1.6667_5 1.6667_6 5_1 5_2 5_3 5_4 5_5 5_6
#> 1: 32290 31791 30491 28396 28933 31594 31110 28793 33500
#> 2: 11869 10325 11264 11328 11339 12282 12763 12223 13359
Upvotes: 1