Reputation: 449
I have a simple dataframe as shown below:
structure(list(DAILY_INJ_DATE = c("2018-01", "2018-02", "2018-03",
"2018-04", "2018-05", "2018-06", "2018-07", "2018-08", "2018-09",
"2018-10", "2018-11", "2018-12", "2019-01", "2019-02", "2019-03",
"2019-04", "2019-05", "2019-06", "2019-07", "2019-08", "2019-09",
"2019-10", "2019-11", "2019-12", "2020-01", "2020-02", "2020-03",
"2020-04", "2020-05", "2020-06", "2020-07", "2020-08", "2020-09",
"2020-10", "2020-11", "2020-12", "2018-01", "2018-02", "2018-03",
"2018-04", "2018-05", "2018-06", "2018-07", "2018-08", "2018-09",
"2018-10", "2018-11", "2018-12", "2019-01", "2019-02", "2019-06",
"2019-07", "2019-08", "2019-09", "2019-10", "2019-11", "2019-12",
"2020-01", "2020-02", "2020-03", "2020-04", "2020-05", "2020-06",
"2020-07", "2020-08", "2020-09", "2020-10", "2020-11", "2020-12"
), PID = c("42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135311180000", "42135311180000", "42135311180000",
"42135311180000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000", "42135335900000", "42135335900000",
"42135335900000", "42135335900000"), InjIndex = c(3.1488349310755e-05,
7.16470821042452e-05, 3.08198068625437e-05, 0.00365977544989287,
0.000102146739534363, 6.97288098968181e-05, 6.67030385322113e-05,
0.000101198808641258, 6.96471158898905e-05, 0.000100457907956119,
0.002770103468248, 0.000141272149337637, 3.71747211895911e-05,
0, 0, 0, NA, NA, 0.00261196063305948, 0.0020329847793613, 0.0268256888287629,
0.0190615086256689, 0.00165037617202441, 0.00823890291192408,
0.0149562009694358, 3.82198063529811e-05, 0.00703837718531629,
0.0460765131610604, 0.0571638755572333, 0.0600559821857274, 0.0636357826177028,
0.0643659884529977, 0.0577969845601966, 0.0588167585535698, 0.0593479205060031,
0.0478238114640216, 0.0579565073781893, 0.0439869629670818, 0.056714771440236,
0.122274207049878, 0.136105301010138, 0.133225772135695, 0.126920643583703,
0.128496063591315, 0.14043302451169, 0.113191351198699, 0.125443452699286,
0.146339474772728, 0.0191599802822513, NA, 0.133221262910392,
0.216814720357711, 0.606926958546271, NA, NA, 0.131402308568841,
NA, 0.355567523506574, NA, 0.0234750006884004, 0.0416741137140514,
NA, NA, 0.0585083175072382, NA, 0.0852075310970539, 0.0691143041976479,
NA, NA)), row.names = c(NA, 69L), class = "data.frame")
I intend to sort this table based first on increasing 'PID
' column and then descending order of 'Daily_Inj_Date
' column.
I used the below syntax as:
df1 <- df[order(df$PID, -as.numeric(df$DAILY_INJ_DATE)),]
but the result I get is incorrect. For ex., the last date for the PID ='42135311180000' , is '2020-12' but after performing the ordering,the resultant data frame shows:
Upvotes: 2
Views: 336
Reputation: 616
You are trying to convert a string (DAILY_INJ_DATE
) to a numeric:
as.numeric(df$DAILY_INJ_DATE)
# [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
# [28] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
# [55] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
But this date string will sort well as a character without converting to a numeric or date.
df[order(df$PID, df$DAILY_INJ_DATE, decreasing = c(FALSE, TRUE), method = 'radix'),]
The 'radix' sort method allows you to pass a vector values to the decreasing=
argument, one logical value for each argument.
Output:
# DAILY_INJ_DATE PID InjIndex
# 36 2020-12 42135311180000 4.782381e-02
# 35 2020-11 42135311180000 5.934792e-02
# 34 2020-10 42135311180000 5.881676e-02
# 33 2020-09 42135311180000 5.779698e-02
# 32 2020-08 42135311180000 6.436599e-02
# 31 2020-07 42135311180000 6.363578e-02
# 30 2020-06 42135311180000 6.005598e-02
# 29 2020-05 42135311180000 5.716388e-02
# 28 2020-04 42135311180000 4.607651e-02
# 27 2020-03 42135311180000 7.038377e-03
# 26 2020-02 42135311180000 3.821981e-05
# 25 2020-01 42135311180000 1.495620e-02
# 24 2019-12 42135311180000 8.238903e-03
# 23 2019-11 42135311180000 1.650376e-03
# 22 2019-10 42135311180000 1.906151e-02
# 21 2019-09 42135311180000 2.682569e-02
# 20 2019-08 42135311180000 2.032985e-03
# 19 2019-07 42135311180000 2.611961e-03
# 18 2019-06 42135311180000 NA
# 17 2019-05 42135311180000 NA
# 16 2019-04 42135311180000 0.000000e+00
# 15 2019-03 42135311180000 0.000000e+00
# 14 2019-02 42135311180000 0.000000e+00
# 13 2019-01 42135311180000 3.717472e-05
# 12 2018-12 42135311180000 1.412721e-04
# 11 2018-11 42135311180000 2.770103e-03
# 10 2018-10 42135311180000 1.004579e-04
# 9 2018-09 42135311180000 6.964712e-05
# 8 2018-08 42135311180000 1.011988e-04
# 7 2018-07 42135311180000 6.670304e-05
# 6 2018-06 42135311180000 6.972881e-05
# 5 2018-05 42135311180000 1.021467e-04
# 4 2018-04 42135311180000 3.659775e-03
# 3 2018-03 42135311180000 3.081981e-05
# 2 2018-02 42135311180000 7.164708e-05
# 1 2018-01 42135311180000 3.148835e-05
# 69 2020-12 42135335900000 NA
# 68 2020-11 42135335900000 NA
# 67 2020-10 42135335900000 6.911430e-02
# 66 2020-09 42135335900000 8.520753e-02
# 65 2020-08 42135335900000 NA
# 64 2020-07 42135335900000 5.850832e-02
# 63 2020-06 42135335900000 NA
# 62 2020-05 42135335900000 NA
# 61 2020-04 42135335900000 4.167411e-02
# 60 2020-03 42135335900000 2.347500e-02
# 59 2020-02 42135335900000 NA
# 58 2020-01 42135335900000 3.555675e-01
# 57 2019-12 42135335900000 NA
# 56 2019-11 42135335900000 1.314023e-01
# 55 2019-10 42135335900000 NA
# 54 2019-09 42135335900000 NA
# 53 2019-08 42135335900000 6.069270e-01
# 52 2019-07 42135335900000 2.168147e-01
# 51 2019-06 42135335900000 1.332213e-01
# 50 2019-02 42135335900000 NA
# 49 2019-01 42135335900000 1.915998e-02
# 48 2018-12 42135335900000 1.463395e-01
# 47 2018-11 42135335900000 1.254435e-01
# 46 2018-10 42135335900000 1.131914e-01
# 45 2018-09 42135335900000 1.404330e-01
# 44 2018-08 42135335900000 1.284961e-01
# 43 2018-07 42135335900000 1.269206e-01
# 42 2018-06 42135335900000 1.332258e-01
# 41 2018-05 42135335900000 1.361053e-01
# 40 2018-04 42135335900000 1.222742e-01
# 39 2018-03 42135335900000 5.671477e-02
# 38 2018-02 42135335900000 4.398696e-02
# 37 2018-01 42135335900000 5.795651e-02
Upvotes: 1
Reputation: 887138
The date is not a Date
class and it can be converted to Date
by paste
ing the day as well, then convert to Date
with as.Date
, coerce to numeric
and then do the order
df1 <- df[order(df$PID, -as.numeric(as.Date(paste0(df$DAILY_INJ_DATE, "-01")))),]
-checking the output
subset(df1, substr(DAILY_INJ_DATE, 1, 4) == '2018')
# DAILY_INJ_DATE PID InjIndex
#12 2018-12 42135311180000 1.412721e-04
#11 2018-11 42135311180000 2.770103e-03
#10 2018-10 42135311180000 1.004579e-04
#9 2018-09 42135311180000 6.964712e-05
#8 2018-08 42135311180000 1.011988e-04
#7 2018-07 42135311180000 6.670304e-05
#6 2018-06 42135311180000 6.972881e-05
#5 2018-05 42135311180000 1.021467e-04
#4 2018-04 42135311180000 3.659775e-03
#3 2018-03 42135311180000 3.081981e-05
#2 2018-02 42135311180000 7.164708e-05
#1 2018-01 42135311180000 3.148835e-05
#48 2018-12 42135335900000 1.463395e-01
#47 2018-11 42135335900000 1.254435e-01
#46 2018-10 42135335900000 1.131914e-01
#45 2018-09 42135335900000 1.404330e-01
#44 2018-08 42135335900000 1.284961e-01
#43 2018-07 42135335900000 1.269206e-01
#42 2018-06 42135335900000 1.332258e-01
#41 2018-05 42135335900000 1.361053e-01
#40 2018-04 42135335900000 1.222742e-01
#39 2018-03 42135335900000 5.671477e-02
#38 2018-02 42135335900000 4.398696e-02
#37 2018-01 42135335900000 5.795651e-02
Or using tidyverse
, we can do this directly on the Date
class converted column
library(dplyr)
library(lubridate)
df1 <- df %>%
arrange(PID, desc(ymd(DAILY_INJ_DATE, truncated = 2)))
Upvotes: 1
Reputation: 388982
Maybe it will be helpful for you if you separate the date and month column so that you can use arrange
/order
easily.
library(dplyr)
library(tidyr)
df %>%
separate(DAILY_INJ_DATE, c('Year', 'Month'), sep = '-', convert = TRUE) %>%
arrange(PID, desc(Year), desc(Month))
# Year Month PID InjIndex
#1 2020 12 42135311180000 4.7824e-02
#2 2020 11 42135311180000 5.9348e-02
#3 2020 10 42135311180000 5.8817e-02
#4 2020 9 42135311180000 5.7797e-02
#5 2020 8 42135311180000 6.4366e-02
#6 2020 7 42135311180000 6.3636e-02
#7 2020 6 42135311180000 6.0056e-02
#8 2020 5 42135311180000 5.7164e-02
#9 2020 4 42135311180000 4.6077e-02
#...
If you want to combine the columns again you can add unite
to above :
%>% unite(DAILY_INJ_DATE, Year, Month, sep = '-')
Upvotes: 1