Reputation: 663
Let's say I have a three-column data frame
df <- structure(list(extra.sd.3.sq. = structure(c(38L, 40L, 39L, 49L,
47L, 36L), .Label = c("0_62433_1", "0_62433_49", "0_62433_54",
"0_62534_1", "0_62534_17", "0_62771_56", "10R_R22_4", "10R_R22_5",
"10R_R22_9", "11_14_14.211", "11_14_14.222", "11_14_14.223",
"17_14_1", "19_L0022_L0022A", "19_L0031_L0031A", "19_L0031_L0031D",
"21_ND121_3", "21_ND165_1", "21_ND165_3", "25_8_151", "26_BCR_BCR18846",
"26_EGE_EGE63828", "27_104_62571", "27_104_84750", "27_176_85817",
"4_557_10", "4_557_21", "4_557_99", "4_637_99", "4_650_5", "8_64021_11",
"8_64034_15", "A-LOAD-LD003528", "A-LOAD-LD006880", "A-LOAD-LD006898",
"A-LOAD-LD006901", "A-LOAD-LD006905", "A-LOAD-LD007218", "A-LOAD-LD007230",
"A-LOAD-LD007250", "A-MIA-UM000661", "A-MIA-UM000663", "A-MIA-UM001974",
"A-MIA-UM001975", "A-NCRD-NC002972", "A-NCRD-NC003203", "A-UPN-UP000022",
"A-UPN-UP000131", "A-UPN-UP000175", "MAP_86633"), class = "factor"),
extra.sd.4.sq. = structure(c(9L, 10L, 7L, 6L, 8L, 2L), .Label = c("10R_R22_5",
"19_L0031_L0031D", "26_LDD_LDD83801", "27_176_85817", "4_557_99",
"4H_328_3", "4H_328_4", "4H_328_99.WGS", "A-LOAD-LD006898",
"A-LOAD-LD006905"), class = "factor"), extra.sd.5.sq. = structure(c(2L,
1L, NA, NA, NA, NA), .Label = c("4_557_99", "4H_328_4"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
I wanted to melt all the columns into long format and get a table like this below:
Samples SD
A-LOAD-LD007218 extra.sd.3.sq.
A-LOAD-LD007250 extra.sd.3.sq.
A-LOAD-LD007230 extra.sd.3.sq.
A-UPN-UP000175 extra.sd.3.sq.
A-UPN-UP000022 extra.sd.3.sq.
A-LOAD-LD006901 extra.sd.3.sq.
A-LOAD-LD006898 extra.sd.4.sq.
A-LOAD-LD006905 extra.sd.4.sq.
4H_328_4 extra.sd.4.sq.
4H_328_3 extra.sd.4.sq.
4H_328_99.WGS extra.sd.4.sq.
19_L0031_L0031D extra.sd.4.sq.
4H_328_4 extra.sd.5.sq.
4_557_99 extra.sd.5.sq.
I was thinking something like this, but I do not have "ID" or "var".
melt(setDT(df), id.vars = "ID", variable.name = "var")
Upvotes: 1
Views: 255
Reputation: 160687
In cases like these, it sometimes helps to add a column to be discarded later, so that melt
has an "id" column.
Up front, let's convert the factor
s to character
, since otherwise they are unlikely to work well together.
df[] <- lapply(df, as.character)
df$rn <- seq(nrow(df))
transform(
reshape2::melt(df, id.vars = "rn",
variable.name = "SD", value.name = "Samples",
na.rm = TRUE),
rn = NULL)
# SD Samples
# 1 extra.sd.3.sq. A-LOAD-LD007218
# 2 extra.sd.3.sq. A-LOAD-LD007250
# 3 extra.sd.3.sq. A-LOAD-LD007230
# 4 extra.sd.3.sq. A-UPN-UP000175
# 5 extra.sd.3.sq. A-UPN-UP000022
# 6 extra.sd.3.sq. A-LOAD-LD006901
# 7 extra.sd.4.sq. A-LOAD-LD006898
# 8 extra.sd.4.sq. A-LOAD-LD006905
# 9 extra.sd.4.sq. 4H_328_4
# 10 extra.sd.4.sq. 4H_328_3
# 11 extra.sd.4.sq. 4H_328_99.WGS
# 12 extra.sd.4.sq. 19_L0031_L0031D
# 13 extra.sd.5.sq. 4H_328_4
# 14 extra.sd.5.sq. 4_557_99
Upvotes: 1
Reputation: 79184
We could use pivot_longer
from tidyr
package:
library(tidyr)
library(dplyr)
df %>%
pivot_longer(
everything(),
names_to = "SD",
values_to = "Samples"
) %>%
na.omit() %>%
arrange(Samples)
SD Samples
<chr> <fct>
1 extra.sd.4.sq. 19_L0031_L0031D
2 extra.sd.5.sq. 4_557_99
3 extra.sd.4.sq. A-LOAD-LD006898
4 extra.sd.3.sq. A-LOAD-LD006901
5 extra.sd.4.sq. A-LOAD-LD006905
6 extra.sd.3.sq. A-LOAD-LD007218
7 extra.sd.3.sq. A-LOAD-LD007230
8 extra.sd.3.sq. A-LOAD-LD007250
9 extra.sd.3.sq. A-UPN-UP000022
10 extra.sd.3.sq. A-UPN-UP000175
11 extra.sd.4.sq. 4H_328_3
12 extra.sd.5.sq. 4H_328_4
13 extra.sd.4.sq. 4H_328_4
14 extra.sd.4.sq. 4H_328_99.WGS
Upvotes: 4