NBE
NBE

Reputation: 651

Filtering dates for time series plot using dplyr

I'm trying to filter my dataset to only have rows from the year 1996-2015 and only the columns orgid,stdate,locid,charnam. The entire dataset contains years from 1988 to 2015 and a great deal of different columns. I just recently learned about the dplyr package and figured this would be the way to go. However I keep getting this error and I don't understand why I'm getting it. The error I keep getting is Error in is_character(x) : object 'rlang_is_character' not found

This is my code so far:

    ########## download necessary packages to make script run #########################################################################
if (!require(pacman)) {
  install.packages('pacman')

}

pacman::p_load("tidyverse")
#### Read in the necessary data ######
roadsalt_data<-read.table("QADportaldata_1988-2015.tsv",header=T,sep="\t",fill=T,stringsAsFactors = F)
#Convert date column from a character class to a date class so ggplot can  display as a continuous variable ###
roadsalt_data$stdate <- as.Date(roadsalt_data$stdate)
## Filter dataset to only contain years 1996-2015 ########
roadsalt_data_sub<-roadsalt_data %>%
                     select(orgid,stdate,locid,charnam) %>%
                     filter(between(roadsalt_data, as.Date("1996-01-01"), as.Date("2015-07-01")))

Heres a preview of the dataset:

structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"
), actid = c("nwisnj.01.01300274", "nwisnj.01.01300273", "nwisnj.01.01300247", 
"nwisnj.01.01300242", "nwisnj.01.01300238", "nwisnj.01.01300237", 
"nwisnj.01.01300189", "nwisnj.01.01300189", "nwisnj.01.01300189", 
"nwisnj.01.01300190"), actyp = c("Sample-Routine", "Sample-Routine", 
"Sample-Routine", "Sample-Routine", "Sample-Routine", "Sample-Routine", 
"Sample-Routine", "Sample-Routine", "Sample-Routine", "Sample-Routine"
), stdate = structure(c(15755, 15755, 15748, 15748, 15748, 15748, 
15740, 15740, 15740, 15740), class = "Date"), sttime = c("09:30:00", 
"11:00:00", "10:30:00", "12:00:00", "11:00:00", "11:30:00", "09:25:00", 
"09:25:00", "09:25:00", "09:30:00"), actdep = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_), actdepun = c("", "", "", "", "", "", "", "", "", ""
), locid = c("USGS-01407760", "USGS-01445030", "USGS-01380075", 
"USGS-01368820", "USGS-01409815", "USGS-01411400", "USGS-01458570", 
"USGS-01458570", "USGS-01458570", "USGS-01445160"), actcom = c("A-0520044 TPCN = 64mL filtered", 
"A-0520046 TPCN = 124mL filtered", "A-0460036 TPCN = 124mL filtered L-0460036 Received February 14, 2013", 
"A-0460025 TPCN = 125mL filtered L-0460025 Received February 14, 2013", 
"A-0460027 TPCN = 64mL filtered.  ATTN: H.Ardourel, LL ANC and LL pH L-0460027 Received February 14, 2013", 
"A-0460028 TPCN = 125mL filtered. L-0460028 Received February 14, 2013", 
"A-0370012 TPCN = 125mL filtered", "A-0370012 TPCN = 125mL filtered", 
"A-0370012 TPCN = 125mL filtered", "A-0370011 TPCN = 125mL filtered"
), hydcond = c("Stable, normal stage", "Stable, normal stage", 
"Stable, normal stage", "Stable, normal stage", "Stable, high stage", 
"Falling stage", "Stable, normal stage", "Stable, normal stage", 
"Stable, normal stage", "Stable, high stage"), hydev = c("Routine sample", 
"Routine sample", "Routine sample", "Routine sample", "Routine sample", 
"Routine sample", "Routine sample", "Routine sample", "Routine sample", 
"Routine sample"), metcont = c("USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398"), metnam = c("Multiple verticals", 
"Multiple verticals", "Multiple verticals", "Multiple verticals", 
"Single vertical", "Multiple verticals", "Grab sample  (dip)", 
"Grab sample  (dip)", "Grab sample  (dip)", "Multiple verticals"
), detcond = c("", "", "", "", "Not Detected", "", "", "", "", 
""), charnam = c("Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Inorganic nitrogen (nitrate and nitrite)", "Phosphorus", "Phosphorus", 
"Kjeldahl nitrogen"), samfrac = c("Dissolved", "Dissolved", "Dissolved", 
"Dissolved", "Dissolved", "Dissolved", "Dissolved", "Total", 
"Dissolved", "Dissolved"), val = c("0.84", "1.2", "0.46", "0.28", 
"", "0.66", "3.10", "0.032", "0.028", "0.21"), valunit = c("mg/l", 
"mg/l", "mg/l", "mg/l", "", "mg/l", "mg/l as N", "mg/l as P", 
"mg/l as P", "mg/l as N"), valqual = c("", "", "", "", "", "", 
"", "", "", ""), valstat = c("Accepted", "Accepted", "Accepted", 
"Accepted", "Accepted", "Accepted", "Accepted", "Accepted", "Accepted", 
"Accepted"), statcode = c("", "", "", "", "", "", "", "", "", 
""), valtype = c("Actual", "Actual", "Actual", "Actual", "Actual", 
"Actual", "Actual", "Actual", "Actual", "Actual"), precval = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), valcom = c("", "", "", "", "", "", "Report level code updated Oct., Nov. 2015. Reference: NWQL Rapi-Note 2011-21 (RLC: IRL => LT-MDL)", 
"", "", ""), valdep = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
    valdepun = c("", "", "", "", "", "", "", "", "", ""), valmetnam = c("Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "NO3+NO2, wf, FCC,NaR, DA", 
    "P, wu, WCA, persulfate CF color", "P, wf, FCA, persulfate CF color", 
    "NH3+org-N, wf, FCA, Kjeldahl, CF"), metdesc = c("NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "", "", "", 
    "USGS OF 00-170"), labcom = c("", "", "", "", "", "", "", 
    "", "", ""), detlimnam = c("", "", "", "", "Historical Lower Reporting Limit", 
    "", "Long Term Method Detection Level", "Long Term Method Detection Level", 
    "Long Term Method Detection Level", "Long Term Method Detection Level"
    ), detlimval = c("", "", "", "", "0.23", "", "0.04", "0.004", 
    "0.0040", "0.07"), detlimun = c("", "", "", "", "mg/l", "", 
    "mg/l as N", "mg/l as P", "mg/l as P", "mg/l as N"), V63 = c("NWIS", 
    "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", 
    "NWIS")), .Names = c("orgid", "actid", "actyp", "stdate", 
"sttime", "actdep", "actdepun", "locid", "actcom", "hydcond", 
"hydev", "metcont", "metnam", "detcond", "charnam", "samfrac", 
"val", "valunit", "valqual", "valstat", "statcode", "valtype", 
"precval", "valcom", "valdep", "valdepun", "valmetnam", "metdesc", 
"labcom", "detlimnam", "detlimval", "detlimun", "V63"), row.names = c(NA, 
10L), class = "data.frame")

Any help would be greatly appreciated! Thanks in advance!

Upvotes: 1

Views: 2900

Answers (2)

knapply
knapply

Reputation: 667

If I remember correctly, between() didn't work with Dates at one point, even when the left and right arguments were converted with as.Date().

Here are some alternatives. Since all of your sample data fall between the specified years, these all filter for dates between 2013-02-04 and 2013-02-12. Adjust accordingly.

library(dplyr)

roadsalt_data <- as_tibble(roadsalt_data) # not necessary, just convenient console output 

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>%
  filter(stdate >= "2013-02-04", stdate <= "2013-02-12")

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>%
  filter(between(stdate, as.Date("2013-02-04"), as.Date("2013-02-12")))

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

# How I would've done it
library(lubridate)

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>% 
  # filter(between(year(stdate), 1996, 2015)) # for years instead of days
  filter(between(day(stdate), 4, 12))

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

# If {lubridate} isn't installed, this is all year() and day() do...
get_day <- function(date) as.POSIXlt(date, tz = tz(date))$mday
# get_year <- function(date) as.POSIXlt(date, tz = tz(date))$year + 1900 # for years instead of days

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>%
  # filter(between(get_year(stdate), 1996, 2015)) # for years instead of days
  filter(between(get_day(stdate), 4, 12))

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

# Base R
roadsalt_data <- roadsalt_data[, c("orgid", "stdate", "locid", "charnam")]
roadsalt_data[roadsalt_data$stdate >= as.Date("2013-02-04") & roadsalt_data$stdate <= as.Date("2013-02-12") ,]

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

Created on 2018-05-23 by the reprex package (v0.2.0).

==============================================================

If none of these work, there's something else entirely going on.

Upvotes: 2

Matt Summersgill
Matt Summersgill

Reputation: 4242

If you get tired of battling tidyverse dependency hell, you could try data.table instead:

library(data.table)

## Convert to a data.table by reference
setDT(roadsalt_data) 

## Filter on date and only return a subset of columns
roadsalt_data[between(stdate, as.Date("1996-01-01"), as.Date("2015-07-01")), .(orgid,stdate,locid,charnam)]

#       orgid     stdate         locid                                                      charnam
#  1: USGS-NJ 2013-02-19 USGS-01407760 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  2: USGS-NJ 2013-02-19 USGS-01445030 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  3: USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  4: USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  5: USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  6: USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
#  7: USGS-NJ 2013-02-04 USGS-01458570                     Inorganic nitrogen (nitrate and nitrite)
#  8: USGS-NJ 2013-02-04 USGS-01458570                                                   Phosphorus
#  9: USGS-NJ 2013-02-04 USGS-01458570                                                   Phosphorus
# 10: USGS-NJ 2013-02-04 USGS-01445160                                            Kjeldahl nitrogen

Upvotes: 1

Related Questions