Akira
Akira

Reputation: 2870

User-defined function to generate new columns and new dataframes

I'm trying to define a function applied on dataframe disease_df to achieve two things:

My code is

# Import data and packages

library(outbreaks)
library(lubridate)
library(dplyr)

disease_df <- rename(measles_hagelloch_1861, date = date_of_prodrome)[, 3, drop = FALSE]
disease_df$date = sort(disease_df$date)
disease_df$disease <- 1

# Define functions to construct tables of number of disease occurence per day and per week respectively

function1 <- function(df)
      {
      df <- get(df)

      # First date
      first_date <- min(df$date)

      # Last date
      last_date <- max(df$date)

      # Commulative number of days
      df$cum_days <- as.numeric(difftime(df$date, first_date, units = 'days'))

      # Week of the date
      df$week <- ifelse(df$cum_days / 7 <= 1, 1, ceiling(df$cum_days / 7))

      # Construct a table of number of disease occurence per day
      occurence_per_day <- aggregate(df$disease, by = list(Category = df$date), FUN = sum)

      # Construct a table of number of disease occurence per week
      occurence_per_week <- aggregate(df$disease, by = list(Category = df$week), FUN = sum)

      occurence_per_day
      }

function1(disease_df)

When I run it, the error Error in get(df) : invalid first argument appears.

Could you please help me achieve my goals? Thank you so much!

Upvotes: 1

Views: 30

Answers (1)

akrun
akrun

Reputation: 887851

Here, we don't need the get (required, when the object names are passed as string. Here, it is unquoted)

function1 <- function(df) {


      # First date
      first_date <- min(df[["date"]])

      # Last date
      last_date <- max(df[["date"]])

      # Commulative number of days
      df$cum_days <- as.numeric(difftime(df[["date"]], first_date, units = 'days'))

      # Week of the date
      df$week <- ifelse(df[["cum_days"]] / 7 <= 1, 1, ceiling(df[["cum_days"]] / 7))

      # Construct a table of number of disease occurence per day
      occurence_per_day <- aggregate(df["disease"], by = list(Category = df[["date"]]), FUN = sum)

      # Construct a table of number of disease occurence per week
      occurence_per_week <- aggregate(df["disease"], 
                by = list(Category = df[["week"]]), FUN = sum)

      occurence_per_day
      }

-testing

function1(disease_df)
#     Category disease
#1  1861-10-30       1
#2  1861-11-01       1
#3  1861-11-07       2
#4  1861-11-08       1
#5  1861-11-11       2
#6  1861-11-12       1
#7  1861-11-13       1
#8  1861-11-15       2
#9  1861-11-17       1
#10 1861-11-18       1
#11 1861-11-19       1
#12 1861-11-20       4
#13 1861-11-21      14
#14 1861-11-22      12
# ..

Upvotes: 1

Related Questions