Reputation: 281
I need to sort the below dataframe1 based on another dataframe2 columns:
Dataframe1:
LC Item Rolledfcst
MW92 1358576 2125
RM11 1358576 3955
WK14 1358576 307
SW92 1358576 190
MW92 1267890 200
SW92 1267890 670
RM11 1267890 890
WK14 1267890 245
Dataframe 2:
Item LC1 LC2 LC3
1358576 RM11 MW92 SW92
1358576 RM11 WK14 NA
1267890 MW92 SW92 NA
1267890 RM11 WK14 NA
Now for each Item in Dataframe1 it should sort the LC based on Dataframe2 columns in the order such that first column LC1 elements then LC2 and after that LC3.
Note: For each Item, in LC1 or LC2 or LC3 there are 2 LCs in any column then there order doesn't matter
Output Dataframe:
LC Item Rolledfcst
RM11 1358576 3955
MW92 1358576 2125
WK14 1358576 307
SW92 1358576 190
MW92 1267890 200
RM11 1267890 890
SW92 1267890 670
WK14 1267890 245
Upvotes: 4
Views: 225
Reputation: 20399
A tidyverse
solution:
df1 <- structure(list(LC = c("MW92", "RM11", "WK14", "SW92", "MW92",
"SW92","RM11", "WK14"),
Item = c(1358576L, 1358576L, 1358576L, 1358576L,
1267890L, 1267890L, 1267890L, 1267890L),
Rolledfcst = c(2125L, 3955L, 307L, 190L, 200L, 670L,
890L, 245L)),
class = "data.frame",
row.names = c(NA, -8L))
df2 <- structure(list(Item = c(1358576L, 1358576L, 1267890L, 1267890L),
LC1 = c("RM11", "RM11", "MW92", "RM11"),
LC2 = c("MW92", "WK14", "SW92", "WK14"),
LC3 = c("SW92", NA, NA, NA)),
class = "data.frame",
row.names = c(NA, -4L))
First determine an order via df2
in long format:
library(tidyverse)
(ord <- df2 %>%
gather(LC, value, -Item) %>%
arrange(Item, LC) %>%
group_by(Item) %>%
mutate(order = as.numeric(factor(value, unique(value)))) %>%
filter(!is.na(order)) %>% unique())
# # A tibble: 8 x 4
# # Groups: Item [2]
# Item LC value order
# <int> <chr> <chr> <dbl>
# 1 1267890 LC1 MW92 1
# 2 1267890 LC1 RM11 2
# 3 1267890 LC2 SW92 3
# 4 1267890 LC2 WK14 4
# 5 1358576 LC1 RM11 1
# 6 1358576 LC2 MW92 2
# 7 1358576 LC2 WK14 3
# 8 1358576 LC3 SW92 4
Now, join df1
and ord
and sort to item
and order
:
left_join(df1,
ord %>% select(-LC),
by = c("Item", LC = "value")) %>%
arrange(desc(Item), order) %>% select(-order)
# LC Item Rolledfcst
# 1 RM11 1358576 3955
# 2 MW92 1358576 2125
# 3 WK14 1358576 307
# 4 SW92 1358576 190
# 5 MW92 1267890 200
# 6 RM11 1267890 890
# 7 SW92 1267890 670
# 8 WK14 1267890 245
As per the comments here a pure base R
solution:
## transform data to long format
ord <- reshape(df2, varying=names(df2)[-1], direction = "long", v.names = "LC")
## sort according to item and time
ord <- ord[order(-ord$Item, ord$time), ]
## remove NAs
ord <- ord[!is.na(ord$LC),]
## remove duplicates
ord <- ord[!duplicated(ord[, c(1, 3)]), ]
## add the order
split(ord$id, ord$Item) <- lapply(split(ord$id, ord$Item), seq_along)
## merge the data
df.res <- merge(df1, ord[, -2], by = c("Item", "LC"))
## sort according to order
df.res[order(-df.res$Item, df.res$id), -4]
# Item LC Rolledfcst
# 6 1358576 RM11 3955
# 5 1358576 MW92 2125
# 8 1358576 WK14 307
# 7 1358576 SW92 190
# 1 1267890 MW92 200
# 2 1267890 RM11 890
# 3 1267890 SW92 670
# 4 1267890 WK14 245
Upvotes: 3
Reputation: 389235
A base R approach using split
and mapply
. We convert Item
to factor
based on it's occurrence in df1
then split
row numbers of df1
based on Item
column and match
those LC
values with unlisted values of df2
to get original row numbers in sorted manner to subset.
df1$Item <- factor(df1$Item, levels = unique(df1$Item))
df2$Item <- factor(df2$Item, levels = unique(df1$Item))
df1[c(mapply(function(x, y) y[match(df1$LC[y], unique(unlist(x)))],
split(df2[-1], df2$Item), split(seq_along(df1$LC), df1$Item))), ]
# LC Item Rolledfcst
#2 RM11 1358576 3955
#1 MW92 1358576 2125
#3 WK14 1358576 307
#4 SW92 1358576 190
#5 MW92 1267890 200
#7 RM11 1267890 890
#6 SW92 1267890 670
#8 WK14 1267890 245
Upvotes: 2
Reputation: 3996
Read in Data:
df1 <- read.table(text = 'LC Item Rolledfcst
MW92 1358576 2125
RM11 1358576 3955
WK14 1358576 307
SW92 1358576 190
MW92 1267890 200
SW92 1267890 670
RM11 1267890 890
WK14 1267890 245', header = T)
df2 <- read.table(text = ' Item LC1 LC2 LC3
1358576 RM11 MW92 SW92
1358576 RM11 WK14 NA
1267890 MW92 SW92 NA
1267890 RM11 WK14 NA', header = T)
Gather LC columns into single column and create an ID column containing the sort order
library(tidyr)
library(dplyr)
df2 <- df2 %>%
gather(LC, value,2:4) %>%
mutate(sort_id = paste0(Item,value))
# Item LC value sort_id
#1 1358576 LC1 RM11 1358576RM11
#2 1358576 LC1 RM11 1358576RM11
#3 1267890 LC1 MW92 1267890MW92
#4 1267890 LC1 RM11 1267890RM11
#5 1358576 LC2 MW92 1358576MW92
#6 1358576 LC2 WK14 1358576WK14
#7 1267890 LC2 SW92 1267890SW92
#8 1267890 LC2 WK14 1267890WK14
#9 1358576 LC3 SW92 1358576SW92
#10 1358576 LC3 <NA> 1358576NA
#11 1267890 LC3 <NA> 1267890NA
#12 1267890 LC3 <NA> 1267890NA
Create a sort_id column in the same way for df1
and apply the factor levels from df2
. The factor levels will govern the sort order when using arrange
.
df1 %>%
mutate(sort_id = factor(paste0(Item,LC), levels = unique(df2$sort_id)),
Item = factor(Item, levels = unique(df2$Item))) %>%
group_by(Item) %>%
arrange(sort_id, .by_group = T) %>%
select(-sort_id)
#LC Item Rolledfcst
# <fct> <fct> <int>
#1 RM11 1358576 3955
#2 MW92 1358576 2125
#3 WK14 1358576 307
#4 SW92 1358576 190
#5 MW92 1267890 200
#6 RM11 1267890 890
#7 SW92 1267890 670
#8 WK14 1267890 245
Upvotes: 1