Greg
Greg

Reputation: 3660

Using pivot_longer with existing names_to column

Take an example dataframe like so (the real dataframe has more columns):

df <- data.frame(A = seq(1, 3, 1),
                 B = seq(4, 6, 1))

I can use pivot_longer to collect my columns of interest (A and B) like so:

library(dplyr)
library(tidyr)
df <- df %>% 
  pivot_longer(cols = c("A", "B"), names_to = "Letter", values_to = "Number")
df
  Letter Number
  <chr>   <dbl>
1 A           1
2 B           4
3 A           2
4 B           5
5 A           3
6 B           6

Now let's say I have another column C in my dataframe, making it no longer tidy

C <- seq(7, 12, 1)

df_2 <- data.frame(df, C)
df_2
  Letter Number  C
1      A      1  7
2      B      4  8
3      A      2  9
4      B      5 10
5      A      3 11
6      B      6 12

I want to use pivot_longer again to make df_2 tidy and get this output:

data.frame(Letter = c(rep("A", 3), rep("B", 3), rep("C", 3)),
                        Number = seq(1, 12, 1))
   Letter Number
1       A      1
2       A      2
3       A      3
4       B      4
5       B      5
6       B      6
7       C      7
8       C      8
9       C      9
10      C     10
11      C     11
12      C     12

Using the same strategy creates an error though:

df_2 %>% 
  pivot_longer(cols = "C", names_to = "Letter", values_to = "Number")
Error: Failed to create output due to bad names.
* Choose another strategy with `names_repair`

Setting names_repair to minimal runs but doesn't produce the output I want.

Upvotes: 7

Views: 3740

Answers (3)

AnilGoyal
AnilGoyal

Reputation: 26218

Follow it like this

library(tidyverse)
df <- data.frame(A = seq(1, 3, 1),
                 B = seq(4, 6, 1))
df <- df %>% 
  pivot_longer(cols = c("A", "B"), names_to = "Letter", values_to = "Number")

C <- seq(7, 12, 1)
df_2 <- data.frame(C)
df_2 <- df_2 %>% pivot_longer(cols = C, names_to = "Letter", values_to = "Number")

df_result <- rbind(df, df_2)

Output

> df_result
# A tibble: 12 x 2
   Letter Number
   <chr>   <dbl>
 1 A           1
 2 B           4
 3 A           2
 4 B           5
 5 A           3
 6 B           6
 7 C           7
 8 C           8
 9 C           9
10 C          10
11 C          11
12 C          12

Upvotes: 5

akrun
akrun

Reputation: 887251

We could do this easily with stack

library(dplyr)
stack(df_2)[2:1] %>% 
     distinct %>%
     set_names(c("Letter", "Number"))

-output

#    Letter Number
#1       A      1
#2       A      2
#3       A      3
#4       B      4
#5       B      5
#6       B      6
#7       C      7
#8       C      8
#9       C      9
#10      C     10
#11      C     11
#12      C     12

Or an option with unnest/enframe

library(tidyr)
library(tibble)
unclass(df_2) %>% 
     enframe(name = "Letter", value = "Number") %>% 
     unnest(c(Number)) %>% 
     distinct

Or using melt

library(reshape2)
melt(df_2) %>% 
      distinct()

Or in a single line in base R

unique(stack(df_2)[2:1])

Upvotes: 1

Duck
Duck

Reputation: 39605

Maybe try this if it is helpful:

library(tidyverse)
#Code
df_2 %>% pivot_longer(everything()) %>%
  arrange(name) %>% group_by(name) %>%
  filter(!duplicated(value))

Output:

# A tibble: 12 x 2
# Groups:   name [3]
   name  value
   <chr> <dbl>
 1 A         1
 2 A         2
 3 A         3
 4 B         4
 5 B         5
 6 B         6
 7 C         7
 8 C         8
 9 C         9
10 C        10
11 C        11
12 C        12

Upvotes: 2

Related Questions