Prometheus
Prometheus

Reputation: 2017

Rank values column-wise

I want to rank values column-wise.

I have the following dataframe:

dput(test)
structure(list(Name = c("A", "B", "C", "D"), Margin = c(744, 
3196.4722, 0, 394), T1 = c(420, 200, 2150, 70), T2 = c(630, 285, 
2365, 84), T3 = c(630, 335, 2580, 105), T4 = c(666, 410, 2795, 
128), T5 = c(2244, 2961.7931, 3010, 142), T6 = c(2244, 3652.472, 
3440, 151), T7 = c(2244, 3722.472, 3870, 168), T8 = c(2244, 3887.472, 
5160, 187), T9 = c(2244, 4112.472, 6450, 225), T10 = c(2244, 
4337.472, 6450, 225), T11 = c(798, 3567.472, 4300, 112), T12 = c(630, 
3582.472, 4300, 111), T13 = c(702, 3582.472, 4300, 112), T14 = c(3600, 
4637.472, 3440, 78), T15 = c(744, 3067.306, 2580, 274), T16 = c(744, 
2770.5666, 2580, 197), T17 = c(744, 3138.806, 2580, 80), T18 = c(2244, 
3920.0836, 3870, 401), T19 = c(2244, 2789.1117, 1290, 127)), .Names = c("Name", 
"Margin", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", 
"T10", "T11", "T12", "T13", "T14", "T15", "T16", "T17", "T18", 
"T19"), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"
))

Each row has unique ID in name, and I want to rank the columns to determine which column is equal or least small to the value in the margin column.

The ideal output would be:

Name    Margin    Closest_Column
 A      744.000        T15

Break ties could be random.

My attempt:

nm1 <- paste("rank", names(test)[3:21], sep="_")
test[nm1] <-  mutate_all(test[3:21],funs(rank(., ties.method="first")))

Upvotes: 1

Views: 401

Answers (3)

David Arenburg
David Arenburg

Reputation: 92282

I would go for the long format

library(tidyr)
library(dplyr)

test %>%
  gather(Variable, Value, -(Name:Margin)) %>%
  group_by(Name, Margin) %>%
  summarise(Closest = Variable[which.min(abs(Value - Margin))])

# A tibble: 4 x 3
# Groups:   Name [?]
#    Name   Margin Closest
#   <chr>    <dbl>   <chr>
# 1     A  744.000     T15
# 2     B 3196.472     T17
# 3     C    0.000     T19
# 4     D  394.000     T18

Or using data.table

library(data.table)
melt(setDT(test), 1:2
     )[, .(Closest = variable[which.min(abs(value - Margin))]),
         by = .(Name, Margin)]
#    Name   Margin Closest
# 1:    A  744.000     T15
# 2:    B 3196.472     T17
# 3:    C    0.000     T19
# 4:    D  394.000     T18

Upvotes: 3

akrun
akrun

Reputation: 886938

If we need to use tidyverse, one approach is rowwise and then find the index of the minimum difference between the 'Margin' and other columns to get the column names

test %>% 
      rowwise() %>% 
      do(data.frame(.[1:2], Closest_column = names(.)[3:21][which.min(abs(.[[2]]-
                        unlist(.[3:21])))]))
# A tibble: 4 x 3
#    Name   Margin Closest_column
#* <chr>    <dbl>          <chr>
#1     A  744.000            T15
#2     B 3196.472            T17
#3     C    0.000            T19
#4     D  394.000            T18

Or another option is

library(tidyverse)
gather(test, Closest_column, val, T1:T19) %>%
        group_by(Name) %>% 
        slice(which.min(abs(Margin - val))) %>%
        select(-val)
# A tibble: 4 x 3
# Groups:   Name [4]
#    Name   Margin Closest_column
#  <chr>    <dbl>          <chr>
#1     A  744.000            T15
#2     B 3196.472            T17
#3     C    0.000            T19
#4     D  394.000            T18

With base R an efficient option would be max.col

cbind(test[1:2], 
    Closest_column = names(test)[3:21][max.col(-abs(test[3:21]-test[[2]]), 'first')])
#    Name   Margin Closest_column
#1    A  744.000            T15
#2    B 3196.472            T17
#3    C    0.000            T19
#4    D  394.000            T18

Upvotes: 1

IRTFM
IRTFM

Reputation: 263301

Use cbind.data.frame to align the first two columns to a column constructed from the name chosen as the one with the minimum of absolute value of column minus Margin:

cbind( test[1:2], Closest_Column =
    apply(test[-1], 1, function(x) names(x[-1])[which.min( abs(x[-1]-x[1]))] ) )
  Name   Margin Closest_Column
1    A  744.000            T15
2    B 3196.472            T17
3    C    0.000            T19
4    D  394.000            T18

Upvotes: 2

Related Questions