user4381526
user4381526

Reputation:

Using filter() to subset dataframe

This is my dataframe:

structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 
1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 1976L, 1976L), Month = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L), A = c(0.419727177, 
0.411522634, 0.415627598, 0.425350915, 0.431778929, 0.455892409, 
0.464252553, 0.473933649, 0.48947626, 0.481231954, 0.495049505, 
0.49419323, 0.4927322, 0.493778392), log.S.grb = c(-0.86815035734372, 
-0.887891256732457, -0.877965616764487, -0.854840768392932, -0.839841560146749, 
-0.785498442482632, -0.767326579572197, -0.746687948097975, -0.714419316655485, 
-0.731405892265036, -0.703097511313113, -0.704828684428783, -0.707789457396815, 
-0.705668461635217), D = c(10.2641, 9.7704, 9.3694, 9.2403, 9.4459, 
9.4826, 10.4272, 10.3805, 10.4835, 11.4103, 10.988, 10.7708, 
9.2987, 8.6161), E = c(5.68, 5.4, 5.53, 5.5, 5.2, 5.86, 6.25, 
6.36, 6.58, 5.51, 5.54, 5.2, 4.73, 5), J = c(1.15663289, 1.05923536, 
0.938740721, 0.890710069, 1.012043355, 0.843618397, 0.850583558, 
0.957856493, 0.888553262, 1.391339534, 1.309574432, 1.322714922, 
1.247746749, 0.894350421), st3_st.grb = c(NA, NA, NA, 1.33095889507878, 
4.80496965857083, 9.24671742818556, 8.75141888207357, 9.31536120487736, 
7.10791258271466, 3.59206873071611, 4.35904367848617, 0.959063222670167, 
2.36164348682203, -0.257095032210358), idiff_3m.grb = c(1.08206512614859, 
1.03439401057093, 0.909556730440464, 0.886569121738501, 1.00676358283349, 
0.856864819463321, 0.983497311932147, 0.946390153374566, 0.917819573656709, 
1.38813518035337, 1.28343598984861, 1.31491300418167, 1.08566149047631, 
0.860507535403032), EXCESS = c("NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")), row.names = c(NA, 
14L), class = "data.frame")

Im using this command to filter data between 03/1975 to 01/1976 but its not working.

library(dplyr)

datawork %>%  filter(Year>=1975 & Month>=5 & Year <=1976 & Month <=1 ) 

What am I doing wrong?

Upvotes: 0

Views: 110

Answers (1)

Dean
Dean

Reputation: 499

Given Year and Month are just columns of integers the logical predicate relating to Month will return an empty data frame. One solution would be to convert to date format and use dplyr::between in the filter line. (see reproducible example below).

As commented by @TCZhang, without date conversion, you can think of Year = 1976 Month = 3 as Year = 1976 + 3/12 Years. So by multiplying the fractions you can get (1976 * 12) + 3 which you can use directly in dplyr::filter

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:base':
#> 
#>     date

 datawork <- structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 
1975L, 1975L, 1975L, 1975L, 1975L, 1975L, 1976L, 1976L), Month = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L), A = c(0.419727177, 
0.411522634, 0.415627598, 0.425350915, 0.431778929, 0.455892409, 
0.464252553, 0.473933649, 0.48947626, 0.481231954, 0.495049505, 
0.49419323, 0.4927322, 0.493778392), log.S.grb = c(-0.86815035734372, 
-0.887891256732457, -0.877965616764487, -0.854840768392932, -0.839841560146749, 
-0.785498442482632, -0.767326579572197, -0.746687948097975, -0.714419316655485, 
-0.731405892265036, -0.703097511313113, -0.704828684428783, -0.707789457396815, 
-0.705668461635217), D = c(10.2641, 9.7704, 9.3694, 9.2403, 9.4459, 
9.4826, 10.4272, 10.3805, 10.4835, 11.4103, 10.988, 10.7708, 
9.2987, 8.6161), E = c(5.68, 5.4, 5.53, 5.5, 5.2, 5.86, 6.25, 
6.36, 6.58, 5.51, 5.54, 5.2, 4.73, 5), J = c(1.15663289, 1.05923536, 
0.938740721, 0.890710069, 1.012043355, 0.843618397, 0.850583558, 
0.957856493, 0.888553262, 1.391339534, 1.309574432, 1.322714922, 
1.247746749, 0.894350421), st3_st.grb = c(NA, NA, NA, 1.33095889507878, 
4.80496965857083, 9.24671742818556, 8.75141888207357, 9.31536120487736, 
7.10791258271466, 3.59206873071611, 4.35904367848617, 0.959063222670167, 
2.36164348682203, -0.257095032210358), idiff_3m.grb = c(1.08206512614859, 
1.03439401057093, 0.909556730440464, 0.886569121738501, 1.00676358283349, 
0.856864819463321, 0.983497311932147, 0.946390153374566, 0.917819573656709, 
1.38813518035337, 1.28343598984861, 1.31491300418167, 1.08566149047631, 
0.860507535403032), EXCESS = c("NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")), row.names = c(NA, 
14L), class = "data.frame")

datawork %>% 
  mutate(date = make_date(year = Year, month = Month)) %>% 
  filter(between(date, as.Date("1975-03-01"), as.Date("1976-01-01")))
#>    Year Month         A  log.S.grb       D    E         J st3_st.grb
#> 1  1975     3 0.4156276 -0.8779656  9.3694 5.53 0.9387407         NA
#> 2  1975     4 0.4253509 -0.8548408  9.2403 5.50 0.8907101  1.3309589
#> 3  1975     5 0.4317789 -0.8398416  9.4459 5.20 1.0120434  4.8049697
#> 4  1975     6 0.4558924 -0.7854984  9.4826 5.86 0.8436184  9.2467174
#> 5  1975     7 0.4642526 -0.7673266 10.4272 6.25 0.8505836  8.7514189
#> 6  1975     8 0.4739336 -0.7466879 10.3805 6.36 0.9578565  9.3153612
#> 7  1975     9 0.4894763 -0.7144193 10.4835 6.58 0.8885533  7.1079126
#> 8  1975    10 0.4812320 -0.7314059 11.4103 5.51 1.3913395  3.5920687
#> 9  1975    11 0.4950495 -0.7030975 10.9880 5.54 1.3095744  4.3590437
#> 10 1975    12 0.4941932 -0.7048287 10.7708 5.20 1.3227149  0.9590632
#> 11 1976     1 0.4927322 -0.7077895  9.2987 4.73 1.2477467  2.3616435
#>    idiff_3m.grb EXCESS       date
#> 1     0.9095567     NA 1975-03-01
#> 2     0.8865691     NA 1975-04-01
#> 3     1.0067636     NA 1975-05-01
#> 4     0.8568648     NA 1975-06-01
#> 5     0.9834973     NA 1975-07-01
#> 6     0.9463902     NA 1975-08-01
#> 7     0.9178196     NA 1975-09-01
#> 8     1.3881352     NA 1975-10-01
#> 9     1.2834360     NA 1975-11-01
#> 10    1.3149130     NA 1975-12-01
#> 11    1.0856615     NA 1976-01-01

Created on 2018-08-31 by the reprex package (v0.2.0).

Upvotes: 2

Related Questions