Reputation: 1389
I need to pivot longer grouped by column string prefix. the toy example below has two groups "A" and "B" but I need a general tidyverse solution for any number of groups by prefix.
#toy df
set.seed(1)
df <- data.table(
date = rep(seq(as.Date("2020-01-01"),as.Date("2020-01-05"),by="day"),each=6),
k = rep(c("A.mean","A.median","A.min","B.mean","B.median","B.min"),5),
v = runif(30,0,50)
) %>%
pivot_wider(names_from = k, values_from = v)
df %>% head
date A.mean A.median A.min B.mean B.median B.min
<date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2020-01-01 13.3 18.6 28.6 45.4 10.1 44.9
2 2020-01-02 47.2 33.0 31.5 3.09 10.3 8.83
3 2020-01-03 34.4 19.2 38.5 24.9 35.9 49.6
4 2020-01-04 19.0 38.9 46.7 10.6 32.6 6.28
5 2020-01-05 13.4 19.3 0.670 19.1 43.5 17.0
#pivot longer by group prefix
df %>%
select(date,matches("A\\.")) %>%
rename_with(~str_replace(.x,"A\\.","")) %>%
mutate( k = "A") %>%
bind_rows(
df %>%
select(date,matches("B\\.")) %>%
rename_with(~str_replace(.x,"B\\.","")) %>%
mutate( k = "B")
)
date mean median min k
<date> <dbl> <dbl> <dbl> <chr>
1 2020-01-01 13.3 18.6 28.6 A
2 2020-01-02 47.2 33.0 31.5 A
3 2020-01-03 34.4 19.2 38.5 A
4 2020-01-04 19.0 38.9 46.7 A
5 2020-01-05 13.4 19.3 0.670 A
6 2020-01-01 45.4 10.1 44.9 B
7 2020-01-02 3.09 10.3 8.83 B
8 2020-01-03 24.9 35.9 49.6 B
9 2020-01-04 10.6 32.6 6.28 B
10 2020-01-05 19.1 43.5 17.0 B
Upvotes: 6
Views: 1755
Reputation: 24079
Here is a two step process (shown in two lines for demonstration purposes). First pivot longer to create columns for k, stat name and value, then pivot wider to create the desired result.
The edited code below, one can obtain the answer is one step, by using the ".value" wild card option in the "names_to" specifier.
library(tidyr)
set.seed(1)
df <- data.frame(
date = rep(seq(as.Date("2020-01-01"),as.Date("2020-01-05"),by="day"),each=6),
k = rep(c("A.mean","A.median","A.min","B.mean","B.median","B.min"),5),
v = runif(30,0,50)
) %>%
pivot_wider(names_from = k, values_from = v)
#temp <- pivot_longer(df, -date, names_sep = "\\.", names_to = c("k", "stat"))
#answer <- pivot_wider(temp, id_cols = c("date", "k"), names_from= "stat", values_from="value")
#updated answer simplified down to just the pivot longer function
answer <- pivot_longer(df, -date, names_sep = "\\.", names_to = c("k", ".value"))
print(head(answer))
# A tibble: 6 x 5
date k mean median min
<date> <chr> <dbl> <dbl> <dbl>
1 2020-01-01 A 13.3 18.6 28.6
2 2020-01-01 B 45.4 10.1 44.9
3 2020-01-02 A 47.2 33.0 31.5
4 2020-01-02 B 3.09 10.3 8.83
5 2020-01-03 A 34.4 19.2 38.5
6 2020-01-03 B 24.9 35.9 49.6
Upvotes: 4
Reputation: 2944
Hopefully this works:
df %>% pivot_longer(cols = contains(".")) %>%
mutate(k = substr(name,1,1), name = substr(name,3,nchar(name))) %>%
pivot_wider(names_from = name, values_from = value) %>%
arrange(k)
As an example:
df
# A tibble: 5 x 7
# date A.mean A.median A.min B.mean B.median B.min
# <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2020-01-01 17.9 40.2 12.6 32.7 17.9 14.3
#2 2020-01-02 49.5 29.8 50.0 36.5 0.788 49.7
#3 2020-01-03 0.375 48.2 20.7 14.9 33.0 12.1
#4 2020-01-04 5.42 10.1 16.8 35.5 49.4 10.7
#5 2020-01-05 17.9 28.2 5.64 25.8 31.3 10.8
df %>% pivot_longer(cols = contains(".")) %>%
mutate(k = substr(name,1,1), name = substr(name,3,nchar(name))) %>%
pivot_wider(names_from = name, values_from = value) %>%
arrange(k)
# A tibble: 10 x 5
# date k mean median min
<date> <chr> <dbl> <dbl> <dbl>
# 1 2020-01-01 A 17.9 40.2 12.6
# 2 2020-01-02 A 49.5 29.8 50.0
# 3 2020-01-03 A 0.375 48.2 20.7
# 4 2020-01-04 A 5.42 10.1 16.8
# 5 2020-01-05 A 17.9 28.2 5.64
# 6 2020-01-01 B 32.7 17.9 14.3
# 7 2020-01-02 B 36.5 0.788 49.7
# 8 2020-01-03 B 14.9 33.0 12.1
# 9 2020-01-04 B 35.5 49.4 10.7
#10 2020-01-05 B 25.8 31.3 10.8
Upvotes: 2