Ahdee
Ahdee

Reputation: 4949

Insert dummy multiple X rows between groups with dplyr?

HI suppose I have the following dataframe

df <- data.frame(group = rep(c("group_1","group_2","group_3", "group_4", "group_5", "group_6"), each=3),
                 X = paste(letters[1:18]),
                 Y = c(1:18))

 df
     group X  Y
1  group_1 a  1
2  group_1 b  2
3  group_1 c  3
4  group_2 d  4
5  group_2 e  5
6  group_2 f  6
7  group_3 g  7
8  group_3 h  8
9  group_3 i  9
10 group_4 j 10
11 group_4 k 11
12 group_4 l 12
13 group_5 m 13
14 group_5 n 14
15 group_5 o 15
16 group_6 p 16
17 group_6 q 17
18 group_6 r 18

What I want is to insert 5 rows with values of -10 in the Y columns but retain the group id

I tried doing this based on a similar cross post but cannot seem to retain group id and this only seem to work to insert 1 row.

> df %>%
+   group_split(group) %>% 
+   map_dfr(~ .x %>% 
+             add_row(Y = -10, .after = 0))
# A tibble: 24 x 3
   group   X         Y
   <chr>   <chr> <dbl>
 1 NA      NA      -10
 2 group_1 a         1
 3 group_1 b         2
 4 group_1 c         3
 5 NA      NA      -10
 6 group_2 d         4
 7 group_2 e         5
 8 group_2 f         6
 9 NA      NA      -10

the example above fails because it only inserts 1 row moreover the group id is loss, ideally for example, there should be 5 rows with group_1 inserted with the Y values of -10.

is this possible? thanks in advance!

Upvotes: 2

Views: 473

Answers (5)

Ronak Shah
Ronak Shah

Reputation: 388982

For each group you can add 5 dummy rows and bind to the original df.

Base R option :

result <- rbind(df, data.frame(group = rep(unique(df$group), each = 5), X = NA, Y = -10))
result <- result[order(result$group), ]
rownames(result) <- NULL
result

#     group    X   Y
#1  group_1    a   1
#2  group_1    b   2
#3  group_1    c   3
#4  group_1 <NA> -10
#5  group_1 <NA> -10
#6  group_1 <NA> -10
#7  group_1 <NA> -10
#8  group_1 <NA> -10
#9  group_2    d   4
#10 group_2    e   5
#11 group_2    f   6
#12 group_2 <NA> -10
#13 group_2 <NA> -10
#...

Upvotes: 1

MrFlick
MrFlick

Reputation: 206207

If you want to keep the group ID, I suggest group_modify. First, a helper function to add a blank row to a group

add_blank <- function(x, n=5) {
  tibble::add_row(x, X=rep(NA, n), Y=rep(-10, n))
}

And then you can do

df %>% 
  group_by(group) %>% 
  group_modify(~add_blank(., 5))

Upvotes: 2

TarJae
TarJae

Reputation: 78927

Do you look for this solution?

df1 <- df %>% 
  group_by(group) %>% 
  do({ df <- . 
  last_row           <- df %>% slice(n())
  last_row$Y  <- -10
  df                 <- bind_rows(df, last_row)
  })

Output:

   group   X         Y
   <chr>   <chr> <dbl>
 1 group_1 a         1
 2 group_1 b         2
 3 group_1 c         3
 4 group_1 c       -10
 5 group_2 d         4
 6 group_2 e         5
 7 group_2 f         6
 8 group_2 f       -10
 9 group_3 g         7
10 group_3 h         8
11 group_3 i         9
12 group_3 i       -10
13 group_4 j        10
14 group_4 k        11
15 group_4 l        12
16 group_4 l       -10
17 group_5 m        13
18 group_5 n        14
19 group_5 o        15
20 group_5 o       -10
21 group_6 p        16
22 group_6 q        17
23 group_6 r        18
24 group_6 r       -10

Upvotes: 1

akrun
akrun

Reputation: 887108

We could add uncount after the add_row to replicate the first row 5 times

library(dplyr)
library(tidyr)
library(purrr)
library(tibble)
df %>% 
   group_split(group) %>% 
   map_dfr(~ .x %>% 
                add_row(group = first(.x$group), Y = -10, .after = 0) %>%
                uncount(rep(c(5, 1), c(1, n()-1))))

-output

# A tibble: 48 x 3
#   group   X         Y
#   <chr>   <chr> <dbl>
# 1 group_1 <NA>    -10
# 2 group_1 <NA>    -10
# 3 group_1 <NA>    -10
# 4 group_1 <NA>    -10
# 5 group_1 <NA>    -10
# 6 group_1 a         1
# 7 group_1 b         2
# 8 group_1 c         3
# 9 group_2 <NA>    -10
#10 group_2 <NA>    -10
# … with 38 more rows

Or as the number of columns are only 3, we could also do this in dplyr alone with summarise after grouping by the 'group'. In the newer versions of dplyr, summarise doesn't have the constraint to return a single row per group

df %>%
    group_by(group) %>%
    summarise(X = c(rep(NA_character_, 5), X), 
              Y = c(rep(-10, 5), Y), .groups = 'drop')
# A tibble: 48 x 3
#   group   X         Y
#   <chr>   <chr> <dbl>
# 1 group_1 <NA>    -10
# 2 group_1 <NA>    -10
# 3 group_1 <NA>    -10
# 4 group_1 <NA>    -10
# 5 group_1 <NA>    -10
# 6 group_1 a         1
# 7 group_1 b         2
# 8 group_1 c         3
# 9 group_2 <NA>    -10
#10 group_2 <NA>    -10
# … with 38 more rows

Upvotes: 2

ThomasIsCoding
ThomasIsCoding

Reputation: 101335

A base R option using split + rbind

do.call(
  rbind,
  c(
    make.row.names = FALSE,
    lapply(
      split(df, df$group),
      function(x) {
        rbind(setNames(data.frame(NA, NA, -10), names(x)), x)
      }
    )
  )
)

gives

     group    X   Y
1     <NA> <NA> -10
2  group_1    a   1
3  group_1    b   2
4  group_1    c   3
5     <NA> <NA> -10
6  group_2    d   4
7  group_2    e   5
8  group_2    f   6
9     <NA> <NA> -10
10 group_3    g   7
11 group_3    h   8
12 group_3    i   9
13    <NA> <NA> -10
14 group_4    j  10
15 group_4    k  11
16 group_4    l  12
17    <NA> <NA> -10
18 group_5    m  13
19 group_5    n  14
20 group_5    o  15
21    <NA> <NA> -10
22 group_6    p  16
23 group_6    q  17
24 group_6    r  18

Upvotes: 1

Related Questions