Anshul S
Anshul S

Reputation: 281

Sort dataframe based on column criteria

I need to sort the below dataframe1 based on another dataframe2 columns:

Dataframe1:

LC       Item     Rolledfcst
MW92    1358576     2125
RM11    1358576     3955
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
SW92    1267890     670
RM11    1267890     890
WK14    1267890     245

Dataframe 2:

      Item   LC1    LC2     LC3
    1358576 RM11    MW92    SW92
    1358576 RM11    WK14    NA
    1267890 MW92    SW92    NA
    1267890 RM11    WK14    NA

Now for each Item in Dataframe1 it should sort the LC based on Dataframe2 columns in the order such that first column LC1 elements then LC2 and after that LC3.

Note: For each Item, in LC1 or LC2 or LC3 there are 2 LCs in any column then there order doesn't matter

Output Dataframe:

LC       Item     Rolledfcst
RM11    1358576     3955
MW92    1358576     2125
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
RM11    1267890     890
SW92    1267890     670
WK14    1267890     245

Upvotes: 4

Views: 225

Answers (3)

thothal
thothal

Reputation: 20399

A tidyverse solution:

df1 <- structure(list(LC         = c("MW92", "RM11", "WK14", "SW92", "MW92", 
                                     "SW92","RM11", "WK14"), 
                      Item       = c(1358576L, 1358576L, 1358576L, 1358576L, 
                                     1267890L, 1267890L, 1267890L, 1267890L), 
                      Rolledfcst = c(2125L, 3955L, 307L, 190L, 200L, 670L, 
                                     890L, 245L)), 
                      class      = "data.frame", 
                      row.names  = c(NA, -8L))

df2 <- structure(list(Item      = c(1358576L, 1358576L, 1267890L, 1267890L), 
                      LC1       = c("RM11", "RM11", "MW92", "RM11"), 
                      LC2       = c("MW92", "WK14", "SW92", "WK14"), 
                      LC3       = c("SW92", NA, NA, NA)), 
                      class     = "data.frame",  
                      row.names = c(NA, -4L))

First determine an order via df2 in long format:

library(tidyverse)
(ord <- df2 %>%
    gather(LC, value, -Item) %>% 
    arrange(Item, LC) %>% 
    group_by(Item) %>% 
    mutate(order = as.numeric(factor(value, unique(value)))) %>% 
    filter(!is.na(order)) %>% unique())

# # A tibble: 8 x 4
# # Groups:   Item [2]
#      Item LC    value order
#     <int> <chr> <chr> <dbl>
# 1 1267890 LC1   MW92      1
# 2 1267890 LC1   RM11      2
# 3 1267890 LC2   SW92      3
# 4 1267890 LC2   WK14      4
# 5 1358576 LC1   RM11      1
# 6 1358576 LC2   MW92      2
# 7 1358576 LC2   WK14      3
# 8 1358576 LC3   SW92      4

Now, join df1 and ord and sort to item and order:

left_join(df1, 
          ord %>% select(-LC), 
          by = c("Item", LC = "value")) %>% 
   arrange(desc(Item), order) %>% select(-order)

#     LC    Item Rolledfcst
# 1 RM11 1358576       3955
# 2 MW92 1358576       2125
# 3 WK14 1358576        307
# 4 SW92 1358576        190
# 5 MW92 1267890        200
# 6 RM11 1267890        890
# 7 SW92 1267890        670
# 8 WK14 1267890        245

As per the comments here a pure base R solution:

## transform data to long format
ord <- reshape(df2, varying=names(df2)[-1], direction = "long", v.names = "LC")
## sort according to item and time
ord <- ord[order(-ord$Item, ord$time), ]
## remove NAs
ord <- ord[!is.na(ord$LC),]
## remove duplicates
ord <- ord[!duplicated(ord[, c(1, 3)]), ]
## add the order
split(ord$id, ord$Item) <- lapply(split(ord$id, ord$Item), seq_along)
## merge the data
df.res <- merge(df1, ord[, -2], by = c("Item", "LC"))
## sort according to order
df.res[order(-df.res$Item, df.res$id), -4]
#      Item   LC Rolledfcst
# 6 1358576 RM11       3955
# 5 1358576 MW92       2125
# 8 1358576 WK14        307
# 7 1358576 SW92        190
# 1 1267890 MW92        200
# 2 1267890 RM11        890
# 3 1267890 SW92        670
# 4 1267890 WK14        245

Upvotes: 3

Ronak Shah
Ronak Shah

Reputation: 389235

A base R approach using split and mapply. We convert Item to factor based on it's occurrence in df1 then split row numbers of df1 based on Item column and match those LC values with unlisted values of df2 to get original row numbers in sorted manner to subset.

df1$Item <- factor(df1$Item, levels = unique(df1$Item))
df2$Item <- factor(df2$Item, levels = unique(df1$Item))

df1[c(mapply(function(x, y) y[match(df1$LC[y], unique(unlist(x)))], 
           split(df2[-1], df2$Item), split(seq_along(df1$LC), df1$Item))), ]


#    LC    Item Rolledfcst
#2 RM11 1358576       3955
#1 MW92 1358576       2125
#3 WK14 1358576        307
#4 SW92 1358576        190
#5 MW92 1267890        200
#7 RM11 1267890        890
#6 SW92 1267890        670
#8 WK14 1267890        245

Upvotes: 2

Chris
Chris

Reputation: 3996

Read in Data:

df1 <- read.table(text = 'LC       Item     Rolledfcst
MW92    1358576     2125
RM11    1358576     3955
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
SW92    1267890     670
RM11    1267890     890
WK14    1267890     245', header = T)


df2 <- read.table(text = ' Item   LC1    LC2     LC3
    1358576 RM11    MW92    SW92
    1358576 RM11    WK14    NA
    1267890 MW92    SW92    NA
    1267890 RM11    WK14    NA', header = T)

Gather LC columns into single column and create an ID column containing the sort order

library(tidyr)
library(dplyr)

df2 <- df2 %>%
  gather(LC, value,2:4) %>%
  mutate(sort_id = paste0(Item,value))

#     Item  LC value     sort_id
#1  1358576 LC1  RM11 1358576RM11
#2  1358576 LC1  RM11 1358576RM11
#3  1267890 LC1  MW92 1267890MW92
#4  1267890 LC1  RM11 1267890RM11
#5  1358576 LC2  MW92 1358576MW92
#6  1358576 LC2  WK14 1358576WK14
#7  1267890 LC2  SW92 1267890SW92
#8  1267890 LC2  WK14 1267890WK14
#9  1358576 LC3  SW92 1358576SW92
#10 1358576 LC3  <NA>   1358576NA
#11 1267890 LC3  <NA>   1267890NA
#12 1267890 LC3  <NA>   1267890NA

Create a sort_id column in the same way for df1 and apply the factor levels from df2. The factor levels will govern the sort order when using arrange.

df1 %>%
  mutate(sort_id = factor(paste0(Item,LC), levels = unique(df2$sort_id)),
         Item = factor(Item, levels = unique(df2$Item))) %>%
  group_by(Item) %>%
  arrange(sort_id, .by_group = T) %>%
  select(-sort_id)

#LC    Item    Rolledfcst
#  <fct> <fct>        <int>
#1 RM11  1358576       3955
#2 MW92  1358576       2125
#3 WK14  1358576        307
#4 SW92  1358576        190
#5 MW92  1267890        200
#6 RM11  1267890        890
#7 SW92  1267890        670
#8 WK14  1267890        245

Upvotes: 1

Related Questions