Thomas
Thomas

Reputation: 1302

Combine rows in data frame without grouping

tibble::tibble(
  col1 = c("A","","C","",""),
  col2 = c("string1 part 1","string1 part 2",
           "string2 part 1", "string2 part 2",
           "string3"),
  col3 = c(1, "", 2, "", 3)
)

I'd like to merge the rows in col2, obtaining a tibble without empty strings like this

tibble::tibble(
  col1 = c("A","C",NA),
  col2 = c("string1 part 1 string1 part 2", "string2 part 1 string2 part 2", "string3"),
  col3 = c(1,2,3)
)

# A tibble: 3 x 3
  col1  col2                           col3
  <chr> <chr>                         <dbl>
1 A     string1 part 1 string1 part 2     1
2 C     string2 part 1 string2 part 2     2
3 NA    string3                           3

Grouping, as I found in other answers, does not seems to be an option while I don't have any reference column

Upvotes: 4

Views: 322

Answers (4)

M--
M--

Reputation: 28825

A more conventional dplyr solution:

require(dplyr)

df1 %>% 
  rowwise %>% mutate(grp = strsplit(col2, " ")[[1]][1]) %>% 
  ungroup %>% group_by(grp) %>% 
  summarise_all(list(~trimws(as.character(paste0(., collapse = " "))))) %>% 
  select(-grp) %>% 
  mutate_all(na_if,"")

#> # A tibble: 3 x 3
#>   col1  col2                          col3 
#>   <chr> <chr>                         <chr>
#> 1 A     string1 part 1 string1 part 2 1    
#> 2 C     string2 part 1 string2 part 2 2    
#> 3 <NA>  string3                       3

Upvotes: 1

IceCreamToucan
IceCreamToucan

Reputation: 28685

Ignoring the "without grouping" part and grouping by the cumsum of an indicator for col1 not being blank + splitting out the first two rows from each group identified by the previous cumsum, you can paste the non-empty elements in the group together and return NA if the result is empty.

library(data.table)
setDT(df)

df[, cs := cumsum(col1 != '')]
df[, lapply(.SD, function(x){ 
        out <- paste(x[x != ''], collapse = ' ')
        if(out == '') NA_character_
        else out })
   , by  = .(cs, r = rowid(cs) <= 2)
  ][, -c('cs', 'r')]

#    col1                          col2 col3
# 1:    A string1 part 1 string1 part 2    1
# 2:    C string2 part 1 string2 part 2    2
# 3: <NA>                       string3    3

Upvotes: 0

ThomasIsCoding
ThomasIsCoding

Reputation: 101343

Here is a solution with base R, where findInterval() and split() are used:

# split df according to col3 values
dfs <- split(df,findInterval(1:nrow(df), which(nchar(as.vector(df$col3))>0)))

# merge rows and form new data frame
dfout <- Reduce(rbind,lapply(dfs, function(v) data.frame(lapply(v, function(x) trimws(paste(x,collapse = " "))))))

such that

> dfout
  col1                          col2 col3
1    A string1 part 1 string1 part 2    1
2    C string2 part 1 string2 part 2    2
3                            string3    3

DATA

df <- structure(list(col1 = structure(c(2L, 1L, 3L, 1L, 1L), .Label = c("", 
"A", "C"), class = "factor"), col2 = structure(1:5, .Label = c("string1 part 1", 
"string1 part 2", "string2 part 1", "string2 part 2", "string3"
), class = "factor"), col3 = structure(c(2L, 1L, 3L, 1L, 4L), .Label = c("", 
"1", "2", "3"), class = "factor")), class = "data.frame", row.names = c(NA, 
-5L))

Upvotes: 0

Jet
Jet

Reputation: 690

This works with dplyr

tibble::tibble(
  col1 = c("A","","C","",""),
  col2 = c("string1 part 1","string1 part 2",
           "string2 part 1", "string2 part 2",
           "string3"),
  col3 = c(1, "", 2, "", 3)
) %>% 
# fill empty values on col3
mutate(col3 = case_when(
    col3!="" ~ 1,
    T ~ 0
)) %>%
mutate(col3 = cumsum(col3)) %>%
# fill empty values on col1
group_by(col3) %>%
mutate(col1 = first(col1)) %>%
# group & summarise
group_by(col1, col3) %>%
summarise(col2 = paste(col2, collapse=' ')) %>%
# replace empty string by NA & arrange by col3
ungroup() %>%
mutate(col1 = case_when(
    col1=="" ~ as.character(NA),
    T ~ col1
)) %>%
arrange(col3) %>%
select(col1, col2, col3)

Output :

# A tibble: 3 x 3
#  col1  col2                           col3
#  <chr> <chr>                         <dbl>
#1 A     string1 part 1 string1 part 2     1
#2 C     string2 part 1 string2 part 2     2
#3 <NA>  string3                           3

Upvotes: 1

Related Questions