luser
luser

Reputation: 355

Ordering multiple columns using cut-off values in R

Given n columns, I'd like the first column to be in descending order, until a cutoff is reached and then the second column would be ordered, and so on. It's a little tricky to explain, so here's an example for the most simple case, with two columns:

df <- structure(list(x = c(0.92, 0.9, 0.9, 0.88, 0.86, 0.52, 0.35, 0.28, 0.02, -0.03, 
-0.02, -0.06, -0.06, -0.01, -0.01, 0.01, -0.03, -0.03, 0.01, 0.05, -0.01), 
y = c(0.03, -0.01, -0.03, 0, 0.02, 0.01, -0.08, 0.04, 0.71, 0.71, 0.69, 0.57,
 0.55, 0.52, 0.4, -0.37, 0.37, 0.36, -0.34, -0.33, -0.32)), 
.Names = c("x", "y"), row.names = c(NA, 21L), class = "data.frame")

After ordering, df looks like this:

df[with(df, order (-abs(df$x), -abs(df$y))),]
       x     y
1   0.92  0.03
3   0.90 -0.03
2   0.90 -0.01
4   0.88  0.00
5   0.86  0.02
6   0.52  0.01
7   0.35 -0.08
8   0.28  0.04
12 -0.06  0.57
13 -0.06  0.55
20  0.05 -0.33
10 -0.03  0.71
17 -0.03  0.37
18 -0.03  0.36
9   0.02  0.71
11 -0.02  0.69
14 -0.01  0.52
15 -0.01  0.40
16  0.01 -0.37
19  0.01 -0.34
21 -0.01 -0.32

Only, column x is ordered, as expected. However, if I wanted to institute a cutoff of .32, where variables in the first column are arranged in descending order until the value < .32, whereupon it switches to ordering the second column, how might I do this?

I can do it by assigning NA to values < .32, but I need to keep all the values, just order them in a similar way!

df.na<-df
df.na[abs(df.na)<.32]<-NA
df.na[with(df.na, order (-abs(df.na$x), -abs(df.na$y))),]
      x     y
1  0.92    NA
2  0.90    NA
3  0.90    NA
4  0.88    NA
5  0.86    NA
6  0.52    NA
7  0.35    NA
9    NA  0.71
10   NA  0.71
11   NA  0.69
12   NA  0.57
13   NA  0.55
14   NA  0.52
15   NA  0.40
16   NA -0.37
17   NA  0.37
18   NA  0.36
19   NA -0.34
20   NA -0.33
21   NA -0.32
8    NA    NA

The solution will end up being used on a df with 10+ columns, so scalability is an issue!

Upvotes: 1

Views: 1329

Answers (2)

Sam Dickson
Sam Dickson

Reputation: 5239

Create two more variables: one to indicate whether or not the x is greater than 0.32 and the other containing the values of x if it is greater than 0.32 and y otherwise:

ord1 <- apply(as.matrix(df),1,function(x) min(which(abs(x)>=0.32),ncol(df)))
ord2 <- df[cbind(1:nrow(df),ord1)]

df[order(ord1,-abs(ord2)),]
#        x     y
# 1   0.92  0.03
# 2   0.90 -0.01
# 3   0.90 -0.03
# 4   0.88  0.00
# 5   0.86  0.02
# 9   0.02  0.71
# 10 -0.03  0.71
# 11 -0.02  0.69
# 12 -0.06  0.57
# 13 -0.06  0.55
# 14 -0.01  0.52
# 15 -0.01  0.40
# 16  0.01 -0.37
# 17 -0.03  0.37
# 18 -0.03  0.36
# 19  0.01 -0.34
# 20  0.05 -0.33
# 21 -0.01 -0.32
# 7   0.35 -0.08
# 8   0.28  0.04
# 6   0.52  0.01

This can be generalized to multiple columns as follows:

set.seed(123)
df <- data.frame(x1=runif(21,-1,1),
                 x2=runif(21,-1,1),
                 x3=runif(21,-1,1),
                 x4=runif(21,-1,1),
                 x5=runif(21,-1,1))

ord1 <- apply(as.matrix(df),1,function(x) min(which(abs(x)>=0.7),ncol(df)))
ord2 <- df[cbind(1:nrow(df),ord1)]

df[order(ord1,-abs(ord2)),]
#             x1          x2         x3          x4          x5
# 18 -0.91588093 -0.36363798 -0.2510744 -0.51276105 -0.33435292
# 11  0.91366669  0.80459809  0.5978497 -0.99875045 -0.35925352
# 20  0.90900730 -0.71439996 -0.8103187 -0.16470644  0.90894765
# 6  -0.90888700  0.08813205 -0.0680751  0.58868464 -0.64989470
# 5   0.88093457  0.41706094 -0.5339318  0.62477902  0.77293812
# 16  0.79964994  0.51691908  0.5066157 -0.29640418  0.02301092
# 15 -0.79415063 -0.04440806 -0.7449367  0.22554201 -0.06644192
# 8   0.78483809 -0.42168053  0.7156554  0.50895032  0.30620385
# 21  0.77907863 -0.17090733 -0.2320607  0.57639167 -0.03419521
# 4   0.76603481  0.31141160 -0.7223879  0.62012871  0.78610223
# 3  -0.18204616  0.98853955 -0.6951105 -0.10296732  0.96991396
# 14  0.14526680 -0.95077263 -0.5869372 -0.24036692 -0.81281003
# 10 -0.08677053  0.92604847 -0.1155999  0.42036480  0.31351626
# 9   0.10287003 -0.70577271 -0.9083377  0.25844226 -0.31296706
# 17 -0.50782453 -0.56718413  0.7900907 -0.77772915  0.19997792
# 12 -0.09333169  0.38141056 -0.7562015 -0.04936685 -0.62461776
# 1  -0.42484496  0.38560681 -0.1725513 -0.45123271 -0.79427071
# 7   0.05621098  0.18828404 -0.4680547 -0.12033662 -0.73860862
# 13  0.35514127  0.59093484  0.1218960 -0.55976223  0.56458860
# 2   0.57661027  0.28101363 -0.2623091  0.62928008 -0.13021452
# 19 -0.34415856 -0.53674843  0.3302304  0.33611117 -0.02277393

Upvotes: 1

bshelt141
bshelt141

Reputation: 1223

I would just create a "cutoff" variable > split the data frame in two at the cutoff variable > rearrange the independent data frames > rbind them back together.

library(dplyr)

df <- structure(list(x = c(0.92, 0.9, 0.9, 0.88, 0.86, 0.52, 0.35, 0.28, 0.02, -0.03, 
                       -0.02, -0.06, -0.06, -0.01, -0.01, 0.01, -0.03, -0.03, 0.01, 0.05, -0.01), 
                 y = c(0.03, -0.01, -0.03, 0, 0.02, 0.01, -0.08, 0.04, 0.71, 0.71, 0.69, 0.57,
                       0.55, 0.52, 0.4, -0.37, 0.37, 0.36, -0.34, -0.33, -0.32)), 
            .Names = c("x", "y"), row.names = c(NA, 21L), class = "data.frame")

cutoff <- abs(0.32)

new_df <- rbind(df %>% filter(x >= cutoff ) %>% arrange(desc(abs(x))), 
                df %>% filter(x < cutoff) %>% arrange(desc(abs(y))))

Upvotes: 1

Related Questions