Reputation: 111
I have a data frame in R this is a portion of it:
Kif21a PTHR24115 ENSMUSG00000022629
Acss3 PTHR24115 ENSMUSG00000035948
Nr1h4 PTHR24082 ENSMUSG00000047638
Rarg PTHR24082 ENSMUSG00000001288
Vdr PTHR24082 ENSMUSG00000022479
Pamr1 PTHR24254 ENSMUSG00000027188
What I'd like to achieve is ordering the data frame by the most common element (more occurrences) in the second column, this is the desired result:
Nr1h4 PTHR24082 ENSMUSG00000047638
Rarg PTHR24082 ENSMUSG00000001288
Vdr PTHR24082 ENSMUSG00000022479
Kif21a PTHR24115 ENSMUSG00000022629
Acss3 PTHR24115 ENSMUSG00000035948
Pamr1 PTHR24254 ENSMUSG00000027188
Thanks a lot!
Upvotes: 0
Views: 90
Reputation: 887118
An option would be
library(dplyr)
df1 %>%
group_by(col2) %>%
mutate(n = n()) %>%
ungroup %>%
arrange(desc(n))
Another option is add_count
df1 %>%
add_count(col2) %>%
arrange(desc(n))
# A tibble: 6 x 4
# col1 col2 col3 n
# <chr> <chr> <chr> <int>
#1 Nr1h4 PTHR24082 ENSMUSG00000047638 3
#2 Rarg PTHR24082 ENSMUSG00000001288 3
#3 Vdr PTHR24082 ENSMUSG00000022479 3
#4 Kif21a PTHR24115 ENSMUSG00000022629 2
#5 Acss3 PTHR24115 ENSMUSG00000035948 2
#6 Pamr1 PTHR24254 ENSMUSG00000027188 1
Or using base R
with ave
df1[with(df1, order(-ave(seq_along(col2), col2, FUN = length))),]
df1 <- structure(list(col1 = c("Kif21a", "Acss3", "Nr1h4", "Rarg", "Vdr",
"Pamr1"), col2 = c("PTHR24115", "PTHR24115", "PTHR24082", "PTHR24082",
"PTHR24082", "PTHR24254"), col3 = c("ENSMUSG00000022629", "ENSMUSG00000035948",
"ENSMUSG00000047638", "ENSMUSG00000001288", "ENSMUSG00000022479",
"ENSMUSG00000027188")), class = "data.frame", row.names = c(NA,
-6L))
Upvotes: 2
Reputation: 388982
A base R way is to count the occurrence of V2
using table
, sort
them in decreasing order convert it into a dataframe using stack
and merge
with original dataframe
merge(df, stack(sort(table(df$V2), decreasing = TRUE)), by.x = "V2", by.y = "ind")
# V2 V1 V3 values
#1 PTHR24082 Nr1h4 ENSMUSG00000047638 3
#2 PTHR24082 Rarg ENSMUSG00000001288 3
#3 PTHR24082 Vdr ENSMUSG00000022479 3
#4 PTHR24115 Kif21a ENSMUSG00000022629 2
#5 PTHR24115 Acss3 ENSMUSG00000035948 2
#6 PTHR24254 Pamr1 ENSMUSG00000027188 1
You can remove the values
column which is the count of frequency for each V2
if not needed.
In dplyr
, we can do this with inner_join
library(dplyr)
df %>%
count(V2) %>%
arrange(desc(n)) %>%
inner_join(df) %>%
select(-n)
Upvotes: 1
Reputation: 28685
If your columns are named A, B, C you can use the code below. This will add an N
column to df
, so if you don't want that, either add df <-
at the begeninning to have this output overwrite df
, or replace df
with copy(df)
library(data.table)
setDT(df)
df[, N := .N, B][order(-N)][, -'N']
# A B C
# 1: Nr1h4 PTHR24082 ENSMUSG00000047638
# 2: Rarg PTHR24082 ENSMUSG00000001288
# 3: Vdr PTHR24082 ENSMUSG00000022479
# 4: Kif21a PTHR24115 ENSMUSG00000022629
# 5: Acss3 PTHR24115 ENSMUSG00000035948
# 6: Pamr1 PTHR24254 ENSMUSG00000027188
Upvotes: 1
Reputation: 1253
Using base:
df <-as.data.frame(matrix(c("Kif21a", "PTHR24115", "ENSMUSG00000022629",
"Acss3", "PTHR24115", "ENSMUSG00000035948",
"Nr1h4", "PTHR24082", "ENSMUSG00000047638",
"Rarg", "PTHR24082", "ENSMUSG00000001288",
"Vdr", "PTHR24082", "ENSMUSG00000022479",
"Pamr1", "PTHR24254", "ENSMUSG00000027188"),ncol =3, byrow = T))
V1 V2 V3
1 Kif21a PTHR24115 ENSMUSG00000022629
2 Acss3 PTHR24115 ENSMUSG00000035948
3 Nr1h4 PTHR24082 ENSMUSG00000047638
4 Rarg PTHR24082 ENSMUSG00000001288
5 Vdr PTHR24082 ENSMUSG00000022479
6 Pamr1 PTHR24254 ENSMUSG00000027188
tmp <- table(df$V2)
df[order(tmp[levels(df$V2)[df$V2]], decreasing = T),]
V1 V2 V3
3 Nr1h4 PTHR24082 ENSMUSG00000047638
4 Rarg PTHR24082 ENSMUSG00000001288
5 Vdr PTHR24082 ENSMUSG00000022479
1 Kif21a PTHR24115 ENSMUSG00000022629
2 Acss3 PTHR24115 ENSMUSG00000035948
6 Pamr1 PTHR24254 ENSMUSG00000027188
Upvotes: 1