Alex
Alex

Reputation: 2778

Partial gather of 4 columns to make 2 columns dplyr

This is a toy dataframe.

>library(tidyverse)
>a

id  e0  e1  e2  ee
1   0    1   2   3
1   0    1   2   3  
1   0    1   2   3
2   6    7   8   9
2   6    7   8   9
2   6    7   8   9

I would like to group the first three columns as start and the last three columns as end in such a way that I get the following

id  start  end
1   0       1
1   1       2
1   2       3 
2   6       7    
2   7       8
2   8       9

Upvotes: 1

Views: 87

Answers (2)

Sotos
Sotos

Reputation: 51592

A base R option would be,

data.frame(ID = a$id, start = unique(c(t(a[2:4]))), end = unique(c(t(a[3:5]))))
#  ID start end
#1  1     0   1
#2  1     1   2
#3  1     2   3
#4  2     6   7
#5  2     7   8
#6  2     8   9

Upvotes: 1

r2evans
r2evans

Reputation: 160872

txt <- "id  e0  e1  e2  ee
1   0    1   2   3
1   0    1   2   3  
1   0    1   2   3
2   6    7   8   9
2   6    7   8   9
2   6    7   8   9"
a <- read.table(text = txt, header = TRUE)

With dplyr:

library(dplyr)
a2 <- distinct(a)
bind_rows(
  select(a2, id, start = e0, end = e1),
  select(a2, id, start = e1, end = e2),
  select(a2, id, start = e2, end = ee)
)
#   id start end
# 1  1     0   1
# 2  2     6   7
# 3  1     1   2
# 4  2     7   8
# 5  1     2   3
# 6  2     8   9

With base R:

do.call("rbind.data.frame",
        list(
          setNames(a2[,c("id","e0","e1")], c("id", "start", "end")),
          setNames(a2[,c("id","e1","e2")], c("id", "start", "end")),
          setNames(a2[,c("id","e2","ee")], c("id", "start", "end"))
        ))

Edit: based on comments, if it is safe to assume that each id will have exactly as many rows as the number of e-columns minus one, then you can do this:

nc <- 3
a %>%
  group_by(id) %>%
  mutate(
    n = (row_number() - 1) %% nc + 1,
    start = recode(n, e0, e1, e2),
    end = recode(n, e1, e2, ee)
  ) %>%
  ungroup() %>%
  select(id, start, end)
# # A tibble: 6 × 3
#      id start   end
#   <int> <int> <int>
# 1     1     0     1
# 2     1     1     2
# 3     1     2     3
# 4     2     6     7
# 5     2     7     8
# 6     2     8     9

Actually, this works even if there aren't the correct number of rows, though it may be an incorrect result if you don't.

Upvotes: 2

Related Questions