Reputation: 2085
I've got a data like below:
ex <- structure(list(timestamp = structure(c(1502480763.554, 1502480763.554,
1502480764.968, 1502480765.554, 1502480768.554, 1502480770.554,
1502480773.519, 1502480775.72, 1502480777.43, 1502480778.278,
1502480778.288, 1502480778.759, 1502480780.472, 1502480782.815,
1502480785.521, 1502480785.531, 1502480785.707, 1502480787.639,
1502480789.1, 1502480790.682, 1502480791.554, 1502480793.322,
1502480794.363, 1502480795.923, 1502480799.239, 1502480800.27,
1502480800.554, 1502480802.554, 1502480805.63, 1502480805.959,
1502480807.327, 1502480809.554, 1502480809.564, 1502480810.554,
1502480812.8, 1502480813.838, 1502480813.848, 1502480816.24,
1502480816.24, 1502480835.56, 1502480838.576, 1502480848.384,
1502480851.859, 1502480853.554, 1502480856.375, 1502480857.688,
1502480905.554, 1502480910.554, 1502480910.945, 1502480911.816
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), order = c(NA,
NA, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, 2L, 2L, 2L, 2L, NA, NA, NA,
3L, NA, 4L, 4L, 4L, 4L, 4L, NA, 5L, 5L, 5L, 6L, 6L, 6L, NA, NA,
NA, NA, NA, 7L, NA, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 10L,
10L), cat = c(0, 0, 1, 1, 1, 1, 1, 1, 1, 99, 99, 1, 1, 1, 99,
99, 21, 1, 1, 1, 94, 1, 1, 1, 1, 1, 1, 1, 94, 1, 1, 99, 99, 1,
61, 10, 3, 4, 4, 1, 1, 1, 1, 1, 1, 16, 1, 1, 13, 94), var1 = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
1L), var2 = c(NA, NA, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, NA, NA, 0.9,
0.9, 0.9, 0.9, NA, NA, NA, NA, NA, 5.3, 5.3, 5.3, 5.3, 5.3, NA,
8.6, 8.6, 8.6, 14.5, 14.5, 14.5, NA, NA, NA, NA, NA, 7.4, NA,
7.4, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 4.6, 4.6, -4.1, -4.1),
var3 = c(NA, NA, 35.8, 59.3, 51.3, 57.3, 77.5, 82.4, 41.6,
NA, NA, 66.8, 53, 77.1, NA, NA, 55.8, 81.4, 45.8, 37.9, NA,
38.5, 32, 72, 46.9, 76.4, 76.9, 88, NA, 11.7, 49.4, NA, NA,
64.1, NA, NA, NA, NA, NA, 72.5, 77.7, 83.3, 96.4, 83.3, 95.3,
NA, 69.8, 78.9, NA, NA), var4 = c(NA, NA, 26.6, 24, 9.7,
12.7, 21, 12.7, 9.7, NA, NA, 14, 20.3, 25.6, NA, NA, 18.6,
25.3, 15.7, 10.7, NA, 12.8, 8, 41.9, 12.8, 8.5, 10.2, 14.3,
NA, 19.3, 40, NA, NA, 1.2, NA, NA, NA, NA, NA, 10, 21.9,
19, 42, 11.8, 18.4, NA, 33.5, 3.7, NA, NA), var5 = c(NA,
NA, 2.8, 5.2, 2.3, 4.4, -0.9, 0.3, -0.8, NA, NA, 1.3, 1.5,
5.2, NA, NA, -0.7, -0.9, -0.3, 2.8, NA, 0.3, 1.8, 5.3, -0.9,
4.9, 0.9, 4.8, NA, 1.6, -0.8, NA, NA, -0.7, NA, NA, NA, NA,
NA, 0.4, 0.4, 2.2, 4.2, 1.5, -0.1, NA, 0.3, 1.8, NA, NA),
var6 = c(NA, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA,
TRUE, TRUE, NA, NA, NA, NA, NA, TRUE, TRUE, NA, NA, NA, NA,
TRUE, TRUE, NA, NA, NA, NA, NA, NA, NA, TRUE, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -50L))
Within the same values of column order
I need to create (for each row) two nested sub-dataframes - one with data before and one with data in this row and after. So let's take for example a block of data where order == 1
:
ex %>% filter(order == 1) %>% print()
# A tibble: 6 x 9
timestamp order cat var1 var2 var3 var4 var5 var6
<dttm> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <lgl>
1 2017-08-11 19:46:04 1 1 1 2.5 35.8 26.6 2.8 NA
2 2017-08-11 19:46:05 1 1 1 2.5 59.3 24 5.20 NA
3 2017-08-11 19:46:08 1 1 1 2.5 51.3 9.7 2.3 NA
4 2017-08-11 19:46:10 1 1 1 2.5 57.3 12.7 4.40 TRUE
5 2017-08-11 19:46:13 1 1 1 2.5 77.5 21 -0.9 NA
6 2017-08-11 19:46:15 1 1 0 2.5 82.4 12.7 0.300 NA
I need two additional columns with nested data frames: data_before
and data_after
. For first row data_before
would be empty and data_after
would contain all the rows. For second row, data_before
would contain only first row and data_after
would contain rows from 2 to 6. For third row, data_before
would contain first two rows and data_after
would contains rows from 3 to 6 and so on... Such an operation need to be performed for every value of order
in original data frame. How it can be accomplished?
Expected output for one block of data (with order == 1
) would be:
structure(list(order = c(1, 1, 1, 1, 1, 1), data_before = list(
structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame"),
structure(list(timestamp = structure(1502480764.968, class = c("POSIXct",
"POSIXt"), tzone = "UTC"), cat = 1, var1 = 1L, var2 = 2.5,
var3 = 35.8, var4 = 26.6, var5 = 2.8, var6 = NA), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -1L)), structure(list(
timestamp = structure(c(1502480764.968, 1502480765.554
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), cat = c(1,
1), var1 = c(1L, 1L), var2 = c(2.5, 2.5), var3 = c(35.8,
59.3), var4 = c(26.6, 24), var5 = c(2.8, 5.2), var6 = c(NA,
NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-2L)), structure(list(timestamp = structure(c(1502480764.968,
1502480765.554, 1502480768.554), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), cat = c(1, 1, 1), var1 = c(1L, 1L, 1L),
var2 = c(2.5, 2.5, 2.5), var3 = c(35.8, 59.3, 51.3),
var4 = c(26.6, 24, 9.7), var5 = c(2.8, 5.2, 2.3), var6 = c(NA,
NA, NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-3L)), structure(list(timestamp = structure(c(1502480764.968,
1502480765.554, 1502480768.554, 1502480770.554), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), cat = c(1, 1, 1, 1), var1 = c(1L,
1L, 1L, 1L), var2 = c(2.5, 2.5, 2.5, 2.5), var3 = c(35.8,
59.3, 51.3, 57.3), var4 = c(26.6, 24, 9.7, 12.7), var5 = c(2.8,
5.2, 2.3, 4.4), var6 = c(NA, NA, NA, TRUE)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L)), structure(list(
timestamp = structure(c(1502480764.968, 1502480765.554,
1502480768.554, 1502480770.554, 1502480773.519), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), cat = c(1, 1, 1, 1, 1), var1 = c(1L,
1L, 1L, 1L, 1L), var2 = c(2.5, 2.5, 2.5, 2.5, 2.5), var3 = c(35.8,
59.3, 51.3, 57.3, 77.5), var4 = c(26.6, 24, 9.7, 12.7,
21), var5 = c(2.8, 5.2, 2.3, 4.4, -0.9), var6 = c(NA,
NA, NA, TRUE, NA)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L))), data_after = list(structure(list(
timestamp = structure(c(1502480764.968, 1502480765.554, 1502480768.554,
1502480770.554, 1502480773.519, 1502480775.72), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), cat = c(1, 1, 1, 1, 1, 1), var1 = c(1L,
1L, 1L, 1L, 1L, 0L), var2 = c(2.5, 2.5, 2.5, 2.5, 2.5, 2.5
), var3 = c(35.8, 59.3, 51.3, 57.3, 77.5, 82.4), var4 = c(26.6,
24, 9.7, 12.7, 21, 12.7), var5 = c(2.8, 5.2, 2.3, 4.4, -0.9,
0.3), var6 = c(NA, NA, NA, TRUE, NA, NA)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L)), structure(list(
timestamp = structure(c(1502480765.554, 1502480768.554, 1502480770.554,
1502480773.519, 1502480775.72), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), cat = c(1, 1, 1, 1, 1), var1 = c(1L, 1L,
1L, 1L, 0L), var2 = c(2.5, 2.5, 2.5, 2.5, 2.5), var3 = c(59.3,
51.3, 57.3, 77.5, 82.4), var4 = c(24, 9.7, 12.7, 21, 12.7
), var5 = c(5.2, 2.3, 4.4, -0.9, 0.3), var6 = c(NA, NA, TRUE,
NA, NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-5L)), structure(list(timestamp = structure(c(1502480768.554,
1502480770.554, 1502480773.519, 1502480775.72), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), cat = c(1, 1, 1, 1), var1 = c(1L,
1L, 1L, 0L), var2 = c(2.5, 2.5, 2.5, 2.5), var3 = c(51.3, 57.3,
77.5, 82.4), var4 = c(9.7, 12.7, 21, 12.7), var5 = c(2.3, 4.4,
-0.9, 0.3), var6 = c(NA, TRUE, NA, NA)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L)), structure(list(
timestamp = structure(c(1502480770.554, 1502480773.519, 1502480775.72
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), cat = c(1,
1, 1), var1 = c(1L, 1L, 0L), var2 = c(2.5, 2.5, 2.5), var3 = c(57.3,
77.5, 82.4), var4 = c(12.7, 21, 12.7), var5 = c(4.4, -0.9,
0.3), var6 = c(TRUE, NA, NA)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L)), structure(list(timestamp = structure(c(1502480773.519,
1502480775.72), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
cat = c(1, 1), var1 = 1:0, var2 = c(2.5, 2.5), var3 = c(77.5,
82.4), var4 = c(21, 12.7), var5 = c(-0.9, 0.3), var6 = c(NA,
NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-2L)), structure(list(timestamp = structure(1502480775.72, class = c("POSIXct",
"POSIXt"), tzone = "UTC"), cat = 1, var1 = 0L, var2 = 2.5, var3 = 82.4,
var4 = 12.7, var5 = 0.3, var6 = NA), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -1L)))), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L))
Upvotes: 4
Views: 239
Reputation: 389047
Using tidyverse
we can split on order
and for each dataframe create two new columns data_before
and data_after
which would contain a list of dataframes based on the conditions.
library(tidyverse)
ex %>%
group_split(order) %>%
map_dfr(. %>%
mutate(data_before = map(seq_len(nrow(.)), function(y) .[seq_len(y - 1), ]),
data_after = map(seq_len(nrow(.)), function(y)
if (y == nrow(.)) .[0,] else .[(y + 1):nrow(.), ]))) %>%
select(order, data_before, data_after)
# A tibble: 50 x 3
# order data_before data_after
# <int> <list> <list>
# 1 1 <tibble [0 × 9]> <tibble [5 × 9]>
# 2 1 <tibble [1 × 9]> <tibble [4 × 9]>
# 3 1 <tibble [2 × 9]> <tibble [3 × 9]>
# 4 1 <tibble [3 × 9]> <tibble [2 × 9]>
# 5 1 <tibble [4 × 9]> <tibble [1 × 9]>
# 6 1 <tibble [5 × 9]> <tibble [0 × 9]>
# 7 2 <tibble [0 × 9]> <tibble [3 × 9]>
# 8 2 <tibble [1 × 9]> <tibble [2 × 9]>
# 9 2 <tibble [2 × 9]> <tibble [1 × 9]>
#10 2 <tibble [3 × 9]> <tibble [0 × 9]>
# … with 40 more rows
This can also be translated in base R in the following way
do.call(rbind, lapply(split(ex, ex$order), function(x) {
x$data_before <- lapply(seq_len(nrow(x)), function(y) x[seq_len(y - 1), ])
x$data_after <- lapply(seq_len(nrow(x)), function(y)
if (y == nrow(x)) x[0,] else x[(y + 1):nrow(x), ])
x
}))
Upvotes: 0
Reputation: 602
Or this:
ex.list <- lapply(split(ex, ex$order), function(x){
ex.x <- as.data.frame(do.call(rbind,
lapply(1:nrow(x), function(i){
c(x$order[i], ifelse(i==1, list(data.frame()), list(x[1:(i-1), ])), list(x[i:nrow(x), ]))
})
))
names(ex.x) <- c('order', 'data_before', 'data_after')
ex.x
})
Edit: Trying to give some more explanation to the code posted before:
# lapply() applies a function (input arg 2) to each element of a list (input arg 1)
# and returns a list of return values of the function applied on each input element
ex.list <- lapply(
# the split() function returns a list of data.frames, subsets of ex
# splitted by ex$order; these will be the input for the 1. lapply() call
split(ex, ex$order),
# the following function will be applied to each of these data.farmes
# to create the return values
function(x){ # 'x' will be a data.frame, subset ox 'ex' with one single value of ex$order
list.of.rows <- lapply(# we now loop over each row in the data.frame
# containing data with one single value of ex$order,
# 'i' is the row number
1:nrow(x),
# the functions will create 1 row for the resulting data.frame
function(i){
c(# the row is 1 vector containing the following 3 values
# the first column of the putput data.frame is the value of ex$order
x$order[i],
# the value for row i of data_before
ifelse(i==1, list(data.frame()), list(x[1:(i-1), ])), # for the first row we return an empty list, else the data.frame with previous (1:(i-1)) rows
# the values for row i of data_after
list(x[i:nrow(x), ]) # subset of rows as off row i
)
})
# now that we have a list (list.of.rows) that contains one row for the output data.frame
# we rbind these into one data.frame
ex.x <- as.data.frame(do.call(rbind, # do.call(rbind, ...) cobines elements of ... using rbind()
list.of.rows
))
names(ex.x) <- c('order', 'data_before', 'data_after') # give column names to the output data.frame
ex.x # define the return value of the function of the 1. lapply() call
})
Upvotes: 1
Reputation: 1054
Check this:
library(tidyverse)
slice_dataframe <- function(r, ord = 1) {
tibble("order" = ord,
"data_before" = list(slice(ex, row_number() <= (r - ord))),
"data_after" = list(slice(ex, row_number() >= (r + ord))))
}
map_df(1:nrow(ex), slice_dataframe)
Upvotes: 1