Reputation: 43
I am new to R, but I am trying to write a function for some longitudinal data. Basically, I have a dataset in which participants would do a survey every week for 25 weeks. I want to create a column that states which week data the row contained. Keep in mind that each participant started and finished on a different week. For example,
participant 111 would have 25 rows, and I want the weeknum
row to state 'week 2' for the week 2 row, etc. I was able to create some code to do this for 1 participant, but I want a function to do this for the whole data set. Here is the code I have so far.
df <- read_csv("cppdatacombined2.csv", col_names = TRUE)
view(df)
p111 <- filter(df, df$RecipientLastName == 111)
view(p111)
intake_time <- p111$RecordedDate.x
intake_time <- intake_time[1]
#weekly dates
weekly <- p111$RecordedDate.y
week1date<-min(p111$RecordedDate.y,na.rm=FALSE)
week1date <- as.Date(week1date)
week2date <- as.Date(week1date + 7)
week3date <- as.Date(week2date + 7)
week4date <- as.Date(week3date + 7)
week5date <- as.Date(week4date + 7)
week6date <- as.Date(week5date + 7)
week7date <- as.Date(week6date + 7)
week8date <- as.Date(week7date + 7)
week9date <- as.Date(week8date + 7)
week10date <- as.Date(week9date + 7)
week11date <- as.Date(week10date + 7)
week12date <- as.Date(week11date + 7)
week13date <- as.Date(week12date + 7)
week14date <- as.Date(week13date + 7)
week15date <- as.Date(week14date + 7)
week16date <- as.Date(week15date + 7)
week17date <- as.Date(week16date + 7)
week18date <- as.Date(week17date + 7)
week19date <- as.Date(week18date + 7)
week20date <- as.Date(week19date + 7)
week21date <- as.Date(week20date + 7)
week22date <- as.Date(week21date + 7)
week23date <- as.Date(week22date + 7)
week24date <- as.Date(week23date + 7)
week25date <- as.Date(week24date + 7)
weeknum <- list(week1date, week2date, week3date, week4date, week5date, week6date, week7date, week8date, week9date, week10date, week11date, week12date, week13date, week14date, week15date, week16date, week17date, week18date, week19date, week20date, week21date, week22date, week23date, week24date, week25date)
p111$weeknum <- weeknum
desired_length <- 25 # or whatever length you want
empty_list <- vector(mode = "list", length = desired_length)
p111$list <- empty_list
if (week1date = weeknum) {
empty_list[1] = "Week 1"
}
if (week1date = weeknum) {
empty_list[2] = "Week 2"
}
if (week1date = weeknum) {
empty_list[3] = "Week 3"
}
if (week1date = weeknum) {
empty_list[4] = "Week 4"
}if (week1date = weeknum) {
empty_list[5] = "Week 5"
}if (week1date = weeknum) {
empty_list[6] = "Week 6"
}if (week1date = weeknum) {
empty_list[7] = "Week 7"
}if (week1date = weeknum) {
empty_list[8] = "Week 8"
}if (week1date = weeknum) {
empty_list[9] = "Week 9"
}if (week1date = weeknum) {
empty_list[10] = "Week 10"
}if (week1date = weeknum) {
empty_list[11] = "Week 11"
}if (week1date = weeknum) {
empty_list[12] = "Week 12"
}if (week1date = weeknum) {
empty_list[13] = "Week 13"
}if (week1date = weeknum) {
empty_list[14] = "Week 14"
}if (week1date = weeknum) {
empty_list[15] = "Week 15"
}if (week1date = weeknum) {
empty_list[16] = "Week 16"
}if (week1date = weeknum) {
empty_list[17] = "Week 17"
}if (week1date = weeknum) {
empty_list[18] = "Week 18"
}if (week1date = weeknum) {
empty_list[19] = "Week 19"
}if (week1date = weeknum) {
empty_list[20] = "Week 20"
}if (week1date = weeknum) {
empty_list[21] = "Week 21"
}if (week1date = weeknum) {
empty_list[22] = "Week 22"
}if (week1date = weeknum) {
empty_list[23] = "Week 23"
}if (week1date = weeknum) {
empty_list[24] = "Week 24"
}
if (week1date = weeknum) {
empty_list[25] = "Week 25"
}
p111$list <- empty_list
view(p111)
This did what I wanted for P111, and here is my beginning attempt at making the function. My main issue is that I don't know how to ask R to create a variable for each unique value in RecipientLastName
(participant ID) that can be matched to the dataset.
unique <- unique(df$RecipientLastName, incomparables = FALSE, fromLast = FALSE,
nmax = NA)
make_vars <- function(df) {
for (i in unique) {
#make each of the unique values call all the data from that participant (all 25 rows)##
weekly <- (unique participant last name)$RecordedDate.y
week1date<-min((unique participant last name)$RecordedDate.y,na.rm=FALSE)
.
.
.
week25date <- max((unique participant last name))$RecordedDate.y, na.rm = FALSE)
#reproduce the rest of the code for each participant
}
}
make_vars(df)
dput(head(df, 10))
My Desired output for df
RecipientLastName ...othercols.... Recorded Date Weeknum
1 *week 1 data* July 7 Week1
1 *week 2 data* July 14 Week2
.
(weeks 3-24)
.
1 *week 25 data* Dec 29 Week25
2 *week 1 data* Aug 14 Week1
2 *week 2 data* Aug 21 Week2
.
(weeks 3- 24)
2 *week 25 data* Feb 5 Week25
3 *week 1 data* Jan 3 Week 1
3 *week 2 data* Jan 10 Week 2
etc.
My current output (shortened) for 1 participant (massive data set so i can't post all of it)
"[email protected]", "[email protected]"), Q112 = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), Q98 = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1), Q99 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), name = c("Joyee",
"Shane", "Shane", "Shane", "Shane", "Shane", "Shane", "Shane",
"Shane", "Shane"), date_one_days = c(20, 18, 18, 18, 18, 18,
18, 18, 18, 18), date_one_weeks = c(3, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5), date_one_months = c(1, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5), `Need for Cognition_1` = c(4, 7, 7,
7, 7, 7, 7, 7, 7, 7), `Need for Cognition_2` = c(4, 1, 1, 1,
1, 1, 1, 1, 1, 1), `Need for Cognition_3` = c(4, 2, 2, 2, 2,
2, 2, 2, 2, 2), `Need for Cognition_4` = c(4, 1, 1, 1, 1, 1,
1, 1, 1, 1), `Need for Cognition_5` = c(4, 7, 7, 7, 7, 7, 7,
7, 7, 7), `Need for Cognition_6` = c(4, 1, 1, 1, 1, 1, 1, 1,
1, 1), `Need for Cognition_7` = c(4, 7, 7, 7, 7, 7, 7, 7, 7,
7), `Need for Cognition_8` = c(4, 7, 7, 7, 7, 7, 7, 7, 7, 7),
`Need for Cognition_9` = c(4, 2, 2, 2, 2, 2, 2, 2, 2, 2),
`Need for Cognition_10` = c(4, 2, 2, 2, 2, 2, 2, 2, 2, 2), RecordedDate.y = structure(c(NA,
1544891009, 1544891009, 1544891009, 1544891009, 1544891009,
1544891009, 1544891009, 1544891009, 1544891009), tzone = "UTC", class = c("POSIXct",
"POSIXt")),
Upvotes: 1
Views: 92
Reputation: 388817
Try the following :
library(dplyr)
df <- df %>%
group_by(RecipientLastName) %>%
summarise(Weeknum = paste0('Week', row_number()))
This can also be done with base R and data.table
.
#Base R
df$Weeknum <- with(df, paste0('Week', ave(RecipientLastName,
RecipientLastName, FUN = seq_along)))
#data.table
library(data.table)
setDT(df)[, Weeknum := paste0('Week', seq_len(.N)), RecipientLastName]
Upvotes: 1