Nick Knauer
Nick Knauer

Reputation: 4243

Dataframes within a column of dataframe

I have dataframe_A and within that there is a column where every row contains a dataframe.

Here is the dput of the dataframe I am talking about:

structure(list(spend = c("840.99", "1145.39", "1043.88", "1169.15", 
"1249.75", "1197.57"), actions = list(structure(list(action_type = c("like", 

"link_click", "post", "post_reaction", "page_engagement", "post_engagement"
), value = c("18", "205", "2", "28", "253", "235")), .Names = 
c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 6L)), structure(list(
action_type = c("like", "link_click", "post", "post_reaction", 
"page_engagement", "post_engagement"), value = c("26", "268", 
"3", "33", "330", "304")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
action_type = c("like", "link_click", "post", "post_reaction", 
"page_engagement", "post_engagement"), value = c("16", "214", 
"2", "28", "260", "244")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
action_type = c("comment", "like", "link_click", "post", 
"post_reaction", "page_engagement", "post_engagement"), value = c("2", 
"14", "255", "2", "44", "317", "303")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
action_type = c("comment", "like", "link_click", "post", 
"post_reaction", "page_engagement", "post_engagement"), value = c("1", 
"15", "240", "1", "23", "280", "265")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
action_type = c("like", "link_click", "post", "post_reaction", 
"page_engagement", "post_engagement"), value = c("16", "252", 
"3", "32", "303", "287")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L))), date_start = c("2017-10-31", 
"2017-11-01", "2017-11-02", "2017-11-03", "2017-11-04", "2017-11-05"
)), .Names = c("spend", "actions", "date_start"), class = c("data.table", 
"data.frame"), row.names = c(NA, -6L), .internal.selfref = <pointer: 
0x0000000000310788>) 

The output looks like this:

     spend      actions date_start
1:  840.99 <data.frame> 2017-10-31
2: 1145.39 <data.frame> 2017-11-01
3: 1043.88 <data.frame> 2017-11-02
4: 1169.15 <data.frame> 2017-11-03
5: 1249.75 <data.frame> 2017-11-04
6: 1197.57 <data.frame> 2017-11-05

If I look at just the actions column, it looks like this:

df$actions
[[1]]
      action_type value
1            like    18
2      link_click   205
3            post     2
4   post_reaction    28
5 page_engagement   253
6 post_engagement   235

[[2]]
      action_type value
1            like    26
2      link_click   268
3            post     3
4   post_reaction    33
5 page_engagement   330
6 post_engagement   304

[[3]]
      action_type value
1            like    16
2      link_click   214
3            post     2
4   post_reaction    28
5 page_engagement   260
6 post_engagement   244

[[4]]
      action_type value
1         comment     2
2            like    14
3      link_click   255
4            post     2
5   post_reaction    44
6 page_engagement   317
7 post_engagement   303

[[5]]
      action_type value
1         comment     1
2            like    15
3      link_click   240
4            post     1
5   post_reaction    23
6 page_engagement   280
7 post_engagement   265

[[6]]
      action_type value
1            like    16
2      link_click   252
3            post     3
4   post_reaction    32
5 page_engagement   303
6 post_engagement   287

You can see that each dataframe corresponds to a row in the original dataframe. How do I extract these dataframes and make the row names new columns that will attach to the original dataframe. I want my final result to be this (the formatting was weird when I was making this so I couldn't write the last 3 columns but those should be populated too):

     spend      actions date_start    comment    like   link_click    post    post_reaction    page_engagement     post_engagement   
1:  840.99 <data.frame> 2017-10-31          0      18          205       2 
2: 1145.39 <data.frame> 2017-11-01          0      26          268       3              
3: 1043.88 <data.frame> 2017-11-02          0      16          214       2                  
4: 1169.15 <data.frame> 2017-11-03          2      14          255       2                          
5: 1249.75 <data.frame> 2017-11-04          1      15          240       1                             
6: 1197.57 <data.frame> 2017-11-05          0      16          252       3  

Not sure if this is the right route but I was thinking something along these lines and rbinding the results and cbinding it to the original dataframe:

t(df$actions[[1]])

Any help would be great, thanks!

Upvotes: 3

Views: 2915

Answers (1)

missuse
missuse

Reputation: 19756

perhaps this approach:

library(tidyverse) 

   z1 %>%
      as.tibble() %>%
      unnest() %>%
      spread(action_type, value) #convert to wide format
#output
  spend   date_start comment like  link_click page_engagement post  post_engagement post_reaction
* <chr>   <chr>      <chr>   <chr> <chr>      <chr>           <chr> <chr>           <chr>        
1 1043.88 2017-11-02 NA      16    214        260             2     244             28           
2 1145.39 2017-11-01 NA      26    268        330             3     304             33           
3 1169.15 2017-11-03 2       14    255        317             2     303             44           
4 1197.57 2017-11-05 NA      16    252        303             3     287             32           
5 1249.75 2017-11-04 1       15    240        280             1     265             23           
6 840.99  2017-10-31 NA      18    205        253             2     235             28   

data:

> dput(z1)
structure(list(spend = c("840.99", "1145.39", "1043.88", "1169.15", 
"1249.75", "1197.57"), actions = list(structure(list(action_type = c("like", 
"link_click", "post", "post_reaction", "page_engagement", "post_engagement"
), value = c("18", "205", "2", "28", "253", "235")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 6L)), structure(list(
    action_type = c("like", "link_click", "post", "post_reaction", 
    "page_engagement", "post_engagement"), value = c("26", "268", 
    "3", "33", "330", "304")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
    action_type = c("like", "link_click", "post", "post_reaction", 
    "page_engagement", "post_engagement"), value = c("16", "214", 
    "2", "28", "260", "244")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L)), structure(list(
    action_type = c("comment", "like", "link_click", "post", 
    "post_reaction", "page_engagement", "post_engagement"), value = c("2", 
    "14", "255", "2", "44", "317", "303")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
    action_type = c("comment", "like", "link_click", "post", 
    "post_reaction", "page_engagement", "post_engagement"), value = c("1", 
    "15", "240", "1", "23", "280", "265")), .Names = c("action_type", 
"value"), class = "data.frame", row.names = c(NA, 7L)), structure(list(
    action_type = c("like", "link_click", "post", "post_reaction", 
    "page_engagement", "post_engagement"), value = c("16", "252", 
    "3", "32", "303", "287")), .Names = c("action_type", "value"
), class = "data.frame", row.names = c(NA, 6L))), date_start = c("2017-10-31", 
"2017-11-01", "2017-11-02", "2017-11-03", "2017-11-04", "2017-11-05"
)), .Names = c("spend", "actions", "date_start"), class = c("data.table", 
"data.frame"), row.names = c(NA, -6L))

Upvotes: 3

Related Questions