Reputation: 97
I have a datavectorthat looks like this:
And a dataframe with a columns of dates, that looks like this:
Where the number of rows in this dataframe is equal to the length of new_first_dates.
I want to subtract new_first_dates, from each row of the dataframe. I tried using sweep but it won't work.
Does anyone have any ideas?
Cheers
Attached is the data:
structure(list(`>50 gatherings` = structure(c(18346, 18342, 18339, 18341, 18346, 18339, 18339, 18355, 18340, 18346, 18339, 18338, 18346, 18349, 18343, 18347, 18345, 18340, 18337, 18351, 18338, 18345, 18338, 21915, 18347, 18343, 18340, 18346, 18339), class = "Date"), `>500 gatherings` = structure(c(18346, 18335, 18339, 18341, 18335, 18339, 18339, 18355, 18340, 18335, 18339, 18335, 18346, 18349, 18343, 18347, 18334, 18340, 18337, 18351, 18338, 18345, 18338, 21915, 18347, 18336, 18334, 18346, 18339), class = "Date"), `public schools` = structure(c(18341, 18338, 18338, 18341, 18345, 18338, 18340, 18355, 18339, 18341, 18340, 18338, 18338, 18340, 18341, 18342, 18338, 18338, 18339, 18339, 18338, 18338, 18338, 18338, 18342, 18345, 18338, 18338, 18340), class = "Date"), `restaurant dine-in` = structure(c(18339, 18341, 18341, 18337, 18342, 18342, 18342, 18339, 18340, 18339, 18346, 18338, 18338, 18339, 18343, 18347, 18339, 18340, 18337, 18354, 18339, 18339, 18345, 18345, 18345, 18341, 18340, 18345, 18339), class = "Date"), `entertainment/gym` = structure(c(18339, 18350, 18341, 18337, 18339, 18342, 18342, 18339, 18340, 18339, 18346, 18338, 18338, 18339, 18359, 18347, 18345, 18340, 18344, 18354, 18339, 18339, 18345, 21915, 18345, 18341, 18340, 18338, 18339), class = "Date"), `federal guidelines` = structure(c(18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338, 18338), class = "Date"), `foreign travel ban` = structure(c(18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333, 18333), class = "Date"), `stay at home rollback` = structure(c(18377, 18383, 18398, 18392, 18379, 18432, 18383, 21915, 18413, 18387, 18398, 18415, 18380, 18398, 18387, 18379, 18403, 18391, 18424, 18419, 18392, 18387, 18409, 21915, 18398, 18419, 21915, 18396, 18396), class = "Date"), `>50 gatherings rollback` = structure(c(18391, 21915, 18415, 21915, 21915, 21915, 21915, 21915, 18426, 18405, 18398, 21915, 21915, 18387, 18429, 18415, 21915, 21915, 18404, 21915, 21915, 21915, 21915, 18384, 21915, 21915, 21915, 21915, 21915), class = "Date"), `>500 gatherings rollback` = structure(c(18405, 21915, 18415, 21915, 21915, 21915, 21915, 21915, 18426, 18405, 18398, 21915, 21915, 18387, 18429, 18415, 21915, 21915, 21915, 21915, 21915, 21915, 21915, 18384, 21915, 21915, 21915, 21915, 21915), class = "Date"), `restaurant dine-in rollback` = structure(c(18405, 18394, 21915, 18392, 18391, 18432, 18380, 18394, 18391, 21915, 18384, 18390, 18380, 18424, 18429, 18384, 18387, 18411, 21915, 18391, 21915, 21915, 18409, 21915, 18387, 18398, 18384, 18394, 21915 ), class = "Date"), `entertainment/gym rollback` = structure(c(18405, 18394, 21915, 18392, 18391, 18432, 18377, 18394, 18391, 21915, 18384, 18390, 18380, 18424, 18429, 18384, 21915, 18415, 21915, 18391, 21915, 21915, 18409, 21915, 18387, 18398, 18384, 18394, 21915), class = "Date"), State = c("AK", "AL", "AZ", "CA", "CO", "FL", "GA", "IA", "IL", "IN", "KS", "MD", "MI", "MN", "MO", "MS", "NC", "NM", "OH", "OK", "OR", "PA", "RI", "SD", "TN", "TX", "UT", "VA", "WI"), cluster = c(3, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 2, 3, 1, 3, 1, 1, 1, 1, 3, 1, 2, 2, 3, 1, 1, 1, 1, 1)), row.names = c(NA, -29L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(State = c("AK", "AL", "AZ", "CA", "CO", "FL", "GA", "IA", "IL", "IN", "KS", "MD", "MI", "MN", "MO", "MS", "NC", "NM", "OH", "OK", "OR", "PA", "RI", "SD", "TN", "TX", "UT", "VA", "WI"), .rows = list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L)), row.names = c(NA, -29L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
structure(c(18338, 18345, 18340, 18325, 18332, 18334, 18333, 18333, 18339, 18339, 18340, 18337, 18335, 18337, 18339, 18340, 18340, 18336, 18337, 18340, 18335, 18334, 18339, 18346, 18335, 18334, 18338, 18335, 18337), class = "Date")
Upvotes: 1
Views: 154
Reputation: 78
If your dataset is large, you may want to look into using the data.table
package for the modify-in-place semantics.
library( data.table )
dates_dataset = data.table( dates_dataset )
# Find the columns that are date class or otherwise select the columns you want to subtract
date_columns = names(dates_dataset)[ sapply( dates_dataset, class ) == "Date" ]
# Use the := in-place assignment operator to update the columns
dates_dataset[ , eval(date_columns) := {
lapply( date_columns, function(x){
get( x ) - new_first_dates
} )
} ]
You have the option to change eval(date_columns)
to something else if you want to create new columns.
The output is:
get_vars = c(">50 gatherings", ">500 gatherings")
head( dates_dataset[,mget(get_vars) ] )
>50 gatherings >500 gatherings
1: 8 days 8 days
2: -3 days -10 days
3: -1 days -1 days
4: 16 days 16 days
5: 14 days 3 days
6: 5 days 5 days
Upvotes: 1
Reputation: 887541
Some of the columns are not Date
class. So, we need to exclude those before doing the subtraction. Also, based on the structure of OP's post, there are some attributes in the dataset, which can be removed with ungroup
ing
library(dplyr) # version >= 1.0.0
df1 %>%
ungroup %>%
mutate(across(-c(State, cluster), ~ . - as.Date(v1)))
-output
# A tibble: 29 x 14
# `>50 gatherings` `>500 gathering… `public schools` `restaurant din… `entertainment/… `federal guidel… `foreign travel… `stay at home r…
# <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn> <drtn>
# 1 8 days 8 days 3 days 1 days 1 days 0 days -5 days 39 days
# 2 -3 days -10 days -7 days -4 days 5 days -7 days -12 days 38 days
# 3 -1 days -1 days -2 days 1 days 1 days -2 days -7 days 58 days
# 4 16 days 16 days 16 days 12 days 12 days 13 days 8 days 67 days
# 5 14 days 3 days 13 days 10 days 7 days 6 days 1 days 47 days
# 6 5 days 5 days 4 days 8 days 8 days 4 days -1 days 98 days
# 7 6 days 6 days 7 days 9 days 9 days 5 days 0 days 50 days
# 8 22 days 22 days 22 days 6 days 6 days 5 days 0 days 3582 days
# 9 1 days 1 days 0 days 1 days 1 days -1 days -6 days 74 days
#10 7 days -4 days 2 days 0 days 0 days -1 days -6 days 48 days
# … with 19 more rows, and 6 more variables: `>50 gatherings rollback` <drtn>, `>500 gatherings rollback` <drtn>, `restaurant dine-in
# rollback` <drtn>, `entertainment/gym rollback` <drtn>, State <chr>, cluster <dbl>
Or if the OP prefer sweep
df1[1:12] <- sweep(as.data.frame(df1[1:12]), 1, as.Date(v1), '-')
Or using lapply
from base R
df1[1:12] <- lapply(df1[1:12], `-`, as.Date(v1))
Upvotes: 1