Reputation: 2940
How can I return the column name and value of row-wise maximum and second-maximum of a data.frame?
Example data:
example_df <- data.frame(
mycol1 =c(101,-108,140,140,-150),
mycol2 = c(111,-105,128,-220,-150),
mycol3 = c(-103,-102,123,-210,-180),
mycol4 =c(106,-102,140,-200,-180),
mycol5 =c(-105,-106,120,-150,-180)
)
Desired data:
desired_df <- data.frame(
firstmax=c("mycol2 111","mycol3 -102","mycol1 140","mycol1 140", "mycol1 -150"),
secondmax=c("mycol4 106", "mycol4 -102", "mycol4 140","mycol5 -150","mycol2 -150"),
stringsAsFactors = F
)
Upvotes: 2
Views: 105
Reputation: 971
This is a perfect use case for using topn
from the kit
package and calling it twice will still be faster than sorting for bigger data.frame
library(data.table)
library(kit)
dt <- data.table(mycol1=c(101,-108,140,140,-150), mycol2 = c(111,-105,128,-220,-150), mycol3 = c(-103,-102,123,-210,-180), mycol4 =c(106,-102,140,-200,-180), mycol5 =c(-105,-106,120,-150,-180))
res <- as.data.table(t(apply(dt, 1, function(x) paste(colnames(dt)[topn(x, 2L)], topn(x, 2L, index=FALSE)))))
res
#> V1 V2
#> 1: mycol2 111 mycol4 106
#> 2: mycol3 -102 mycol4 -102
#> 3: mycol1 140 mycol4 140
#> 4: mycol1 140 mycol5 -150
#> 5: mycol1 -150 mycol2 -150
Upvotes: 0
Reputation: 24732
Here is a data.table approach:
library(data.table)
df_long = melt(setDT(example_df)[, i:=.I],"i")[
order(-value),.SD[1:2,.(paste(variable,value),c("firstmax", "secondmax"))],i]
dcast(df_long, i~V2, value.var="V1")[,c(2,3)]
Output:
firstmax secondmax
1: mycol2 111 mycol4 106
2: mycol3 -102 mycol4 -102
3: mycol1 140 mycol4 140
4: mycol1 140 mycol5 -150
5: mycol1 -150 mycol2 -150
Upvotes: 2
Reputation: 177
Here is my take on the query using loops
example_list<-list() #stores col name and value of highest and second highest
for (i in 1:nrow(example_df)){
example_list[[i]]<-sort(example_df[i,],decreasing = TRUE)[1:2]}
example_list2<-list() #list with 1st sublevel containing highest colname and value and 2nd sublevel containing 2nd highest colname and value for each level of the list
for (i in 1:length(example_list)){
jnk<-list()
for (j in 1:length(example_list[[i]])){
jnk[[j]]<-c(colnames(example_list[[i]][j]),as.numeric(example_list[[i]][j]))
example_list2[[i]]<-jnk}}
first_max<-list() #list with colname and value of highest
for (i in 1:length(example_list2)){
first_max[[i]]<-example_list2[[i]][[1]]}
second_max<-list() #list with colname and value of 2nd highest
for (i in 1:length(example_list2)){
second_max[[i]]<-example_list2[[i]][[2]]}
first_max2<-as.character() #colname and value of highest pasted together
for (i in 1:length(first_max)){
first_max2[i]<-paste0(first_max[[i]],sep=" ",collapse = "")}
second_max2<-as.character() #colname and value of 2nd highest pasted together
for (i in 1:length(second_max)){
second_max2[i]<-paste0(second_max[[i]],sep=" ",collapse = "")}
final_df<-data.frame(firstmax=first_max2,secondmax=second_max2) #desired df
Upvotes: 0
Reputation: 887251
Here is one way with max.col
- get the max column index per row, use that index to replace
the values to -Inf
, extract the second index and create a data.frame after paste
ing with the column names extracted
first <- max.col(example_df, 'first')
rn <- seq_len(nrow(example_df))
second <- max.col(replace(example_df, cbind(rn, first), -Inf), "first")
data.frame(firstmax = paste(names(example_df)[first],
example_df[cbind(rn, first)]),
secondmax = paste(names(example_df)[second], example_df[cbind(rn, second)]))
-output
firstmax secondmax
1 mycol2 111 mycol4 106
2 mycol3 -102 mycol4 -102
3 mycol1 140 mycol4 140
4 mycol1 140 mycol5 -150
5 mycol1 -150 mycol2 -150
Upvotes: 1
Reputation: 51592
This is probably not the most efficient way to go about it but it gets the job done,
vals <- t(apply(example_df, 1, \(i)sort(i, decreasing = TRUE))[1:2,])
nms <- t(apply(example_df, 1, \(i)names(i)[order(i, decreasing = TRUE)])[1:2,])
mapply(\(x, y) paste(x, y, sep = ' '), data.frame(nms), data.frame(vals))
X1 X2
[1,] "mycol2 111" "mycol4 106"
[2,] "mycol3 -102" "mycol4 -102"
[3,] "mycol1 140" "mycol4 140"
[4,] "mycol1 140" "mycol5 -150"
[5,] "mycol1 -150" "mycol2 -150"
You can tidy up the output the way you want. You can also play around with max.col()
to somewhat vectorize it
Upvotes: 4