Reputation: 95
A filtered data set is provided here:
ID Date Location Method Lines Session_Number Start_Session End_Session
1 1257 2017-02-02 FSZ5 Trolling 2 1 07:11 <NA>
2 1258 2017-02-02 FSZ5 Trolling 2 1 07:11 <NA>
3 1259 2017-02-02 FSZ5 Trolling 2 1 07:11 07:49
4 1260 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
5 1261 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
6 1262 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
7 1263 2017-02-02 FSZ6 Bottom 5 2 08:05 07:49
93 1349 2017-03-26 FSZ1 Bottom 3 3 18:28 18:23
94 1350 2017-03-26 FSZ1 Bottom 3 3 18:28 18:23
95 1351 2017-03-26 FSZ1 Bottom 3 3 18:28 18:45
Session_Length Species
1 NA Aprion virescens
2 NA Euthynnus affinis
3 NA <NA>
4 NA Epinephelus multinotatus
5 NA Caranx melampygus
6 NA Caranx melampygus
7 NA Lutjanus bohar
93 NA Epinephelus multinotatus
94 NA Lethrinus olivaceus
95 NA <NA>
stack.example <- structure(list(ID = c(1257L, 1258L, 1259L, 1260L, 1261L, 1262L,1263L, 1349L, 1350L, 1351L), Date = structure(c(17199, 17199,17199, 17199, 17199, 17199, 17199, 17251, 17251, 17251), class = "Date"),Location = structure(c(5L, 5L, 5L, 6L, 6L, 6L, 6L, 1L, 1L,1L), .Label = c("FSZ1", "FSZ2", "FSZ3", "FSZ4", "FSZ5", "FSZ6","Other location"), class = "factor"), Method = structure(c(2L,2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Bottom","Trolling"), class = "factor"), Lines = c("2", "2", "2","5", "5", "5", "5", "3", "3", "3"), Session_Number = c("1","1", "1", "2", "2", "2", "2", "3", "3", "3"), Start_Session = c("07:11","07:11", "07:11", "08:05", "08:05", "08:05", "08:05", "18:28","18:28", "18:28"), End_Session = c(NA, NA, "07:49", "07:49","07:49", "07:49", "07:49", "18:23", "18:23", "18:45"), Session_Length = c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA), Species = structure(c(3L,13L, NA, 10L, 5L, 5L, 24L, 10L, 19L, NA), .Label = c("Acanthocybium solandri","Aethaloperca rogaa", "Aprion virescens", "Caranx ignobilis","Caranx melampygus", "Cephalopholis sonnerati", "Elagatis bipinnulata","Epinephelus fasciatus", "Epinephelus fuscoguttatus", "Epinephelus multinotatus","Epinephelus polyphekadion", "Epinephelus tukula", "Euthynnus affinis","Gymnosarda unicolor", "Lethrinus conchyliatus", "Lethrinus lentjan","Lethrinus microdon", "Lethrinus nebulosus", "Lethrinus olivaceus","Lethrinus rubrioperculatus", "Lethrinus variegatus", "Lutjanidae spp.","Lutjanus bengalensis", "Lutjanus bohar", "Lutjanus rivulatus","Momotaxis gradoculis", "Other species", "Plectropomus pessuliferus","Sphyraena barracuda", "Thunnus albacares", "Variola louti"), class = "factor")), .Names = c("ID", "Date", "Location","Method", "Lines", "Session_Number", "Start_Session", "End_Session","Session_Length", "Species"), row.names = c(1L, 2L, 3L, 4L, 5L,6L, 7L, 93L, 94L, 95L), class = "data.frame")
I am trying to achieve a column i.e. Session_Length
where minutes are calculated
using the columns Session_Start
and Session_End
.
The length in minutes should correspond to a specific Date, Location, Method, and Session_Number
. Sestraightforwardward.
Where I am struggling is that my dataset has an incorrect start and end session times for each fish caught, with the exception of a summarised row produced at the bottom of each fishing session that provides correct start and end times of a fishing session (raw data exported from CyberTracker).
So, I am trying to:
Example
On 2017-02-02
2
fish were caught Trolling
A row is produced that gives the start 07:11 and end 07:49
time for Session_Number
1
Therefore Session_Length for Session_Number 1
would have row values of 48
for the 2 fish caught during the fishing session.
I could go about it manually but I would take some time with 1000
's of observations. Can anyone provide a means of doing this?
I'm thinking about dplyr
- group_by
function but having trouble piecing it all together.
To remove the summarised row after calculating and applying the Session_Length
value would be a cherry on the top.
Session_Start
and Session_End
columns are character classes with NA
values present.
EDIT NEW SAMPLE DATA
stack.example2 <- structure(list(ID = structure(1257:1351, class = "integer"),Date = structure(c(17199, 17199, 17199, 17199, 17199, 17199,17199, 17199, 17199, 17199, 17199, 17199, 17199, 17199, 17199,17199, 17199, 17199, 17199, 17199, 17199, 17199, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226,17226, 17226, 17226, 17226, 17226, 17226, 17226, 17226, 17232,17232, 17232, 17232, 17232, 17232, 17232, 17232, 17232, 17232,17232, 17232, 17250, 17250, 17250, 17250, 17250, 17250, 17250,17250, 17250, 17250, 17251, 17251, 17251, 17251, 17251, 17251,17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251,17251, 17251, 17251, 17251, 17251, 17251, 17251, 17251), class = "Date"),Location = structure(c(5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("FSZ1","FSZ2", "FSZ3", "FSZ4", "FSZ5", "FSZ6", "Other location"), class = "factor"),Method = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Bottom","Trolling"), class = "factor"),
Lines = structure(c("2","2", "2", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5","5", "5", "5", "5", "5", "5", "5", "5", "2", "2", "2", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "4", "4", "2","2", "2", "2", "2", "2", "2", "2", "2", "2", "4", "4", "4","4", "4", "7", "7", "7", "7", "7", "7", "2", "2", "2", "3","3", "3", "3", "2", "2", "2", "2", "2", "3", "3", "3", "3","3", "3", "3", "3", "3", "3", "3", "3", "3", "3"), class = "integer"),Session_Number = structure(c("1", "1", "1", "2", "2", "2","2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2","2", "2", "2", "3", "1", "1", "2", "2", "2", "3", "4", "4","4", "4", "5", "5", "5", "6", "6", "6", "6", "6", "7", "8","8", "8", "8", "8", "9", "9", "10", "10", "1", "1", "1","1", "2", "2", "3", "3", "4", "4", "4", "5", "1", "2", "2","2", "2", "2", "2", "3", "3", "4", "1", "1", "1", "1", "2","2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3","3", "3", "3", "3", "3", "3"), class = "integer"),
Start_Session = structure(c("07:11","07:11", "07:11", "08:05", "08:05", "08:05", "08:05", "08:05","08:05", "08:05", "08:05", "08:05", "08:05", "08:05", "08:05","08:05", "08:05", "08:05", "08:05", "08:05", "08:05", "10:31","07:19", "07:19", "07:29", "07:29", "07:29", "07:57", "08:08","08:08", "08:08", "08:08", "08:23", "08:23", "08:23", "08:36","08:36", "08:36", "08:36", "08:36", "08:52", "09:06", "09:06","09:06", "09:06", "09:06", "09:27", "09:27", "09:46", "09:46","10:47", "10:47", "10:47", "10:47", "11:03", "11:03", "11:51","11:51", "12:31", "12:31", "12:31", "12:48", "16:54", "17:08","17:08", "17:08", "17:08", "17:08", "17:08", "17:51", "17:51","18:13", "18:18", "18:18", "18:18", "18:18", "18:28", "18:28","18:28", "18:28", "18:28", "18:28", "18:28", "18:28", "18:28","18:28", "18:28", "18:28", "18:28", "18:28", "18:28", "18:28","18:28", "18:28", "18:28"), class = "character"),
End_Session = structure(c(NA,NA, "07:49", "07:49", "07:49", "07:49", "07:49", "07:49","07:49", "07:49", "07:49", "07:49", "07:49", "07:49", "07:49","07:49", "07:49", "07:49", "07:49", "07:49", "10:30", "10:41",NA, "07:28", "07:28", "07:28", "07:47", "08:08", "08:08","08:08", "08:08", "08:17", "08:17", "08:17", "08:34", "08:34","08:34", "08:34", "08:34", "08:51", "09:03", "09:03", "09:03","09:03", "09:03", "09:26", "09:26", "09:38", "09:38", "10:34","10:34", "10:34", "10:34", "11:03", "11:03", "11:51", "11:51","12:09", "12:09", "12:09", "12:47", "13:03", "17:03", "17:03","17:03", "17:03", "17:03", "17:03", "17:44", "17:44", "18:12","18:27", "18:27", "18:27", "18:27", "18:23", "18:23", "18:23","18:23", "18:23", "18:23", "18:23", "18:23", "18:23", "18:23","18:23", "18:23", "18:23", "18:23", "18:23", "18:23", "18:23","18:23", "18:23", "18:45"), class = "character"), Session_Length = structure(c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA), class = "character"),
Species = structure(c(3L,13L, NA, 10L, 5L, 5L, 24L, 8L, 11L, 3L, 10L, 3L, 25L, 9L,24L, 10L, 2L, 11L, 31L, 12L, NA, NA, 19L, NA, 18L, 18L, NA,NA, 18L, 10L, 18L, NA, 6L, 19L, NA, 3L, 6L, 3L, 18L, NA,NA, 6L, 18L, 18L, 18L, NA, 6L, NA, 7L, NA, 4L, 4L, 29L, NA,1L, NA, 4L, NA, 10L, 10L, NA, NA, NA, 10L, 24L, 12L, 12L,24L, NA, 4L, NA, NA, 24L, 24L, 31L, NA, 14L, 1L, 1L, 1L,30L, 20L, 20L, 20L, 24L, 24L, 24L, 31L, 31L, 31L, 10L, 10L,10L, 19L, NA), .Label = c("Acanthocybium solandri", "Aethaloperca rogaa","Aprion virescens", "Caranx ignobilis", "Caranx melampygus","Cephalopholis sonnerati", "Elagatis bipinnulata", "Epinephelus fasciatus","Epinephelus fuscoguttatus", "Epinephelus multinotatus","Epinephelus polyphekadion", "Epinephelus tukula", "Euthynnus affinis","Gymnosarda unicolor", "Lethrinus conchyliatus", "Lethrinus lentjan","Lethrinus microdon", "Lethrinus nebulosus", "Lethrinus olivaceus","Lethrinus rubrioperculatus", "Lethrinus variegatus", "Lutjanidae spp.","Lutjanus bengalensis", "Lutjanus bohar", "Lutjanus rivulatus","Momotaxis gradoculis", "Other species", "Plectropomus pessuliferus","Sphyraena barracuda", "Thunnus albacares", "Variola louti"), class = "factor")), .Names = c("ID", "Date", "Location","Method", "Lines", "Session_Number", "Start_Session", "End_Session","Session_Length", "Species"), class = "data.frame", row.names = c(NA,-95L))
Upvotes: 0
Views: 67
Reputation: 19746
Here is a possible approach:
library(tidyverse)
library(lubridate)
stack.example %>%
mutate(End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/"))) %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1)) %>%
mutate(Session_Length = End_Session - Start_Session) %>%
filter(row_number()!=n())
#output
# A tibble: 7 x 10
# Groups: Location, Date, Method, Session_Number [3]
ID Date Location Method Lines Session_Number Start_Session End_Session Session_Length Species
<int> <date> <fct> <fct> <chr> <chr> <dttm> <dttm> <time> <fct>
1 1257 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 38 Aprion virescens
2 1258 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 38 Euthynnus affinis
3 1260 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 07:49:00 -16 Epinephelus multinotatus
4 1261 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 07:49:00 -16 Caranx melampygus
5 1262 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 07:49:00 -16 Caranx melampygus
6 1349 2017-03-26 FSZ1 Bottom 3 3 2017-03-26 18:28:00 2017-03-26 18:45:00 17 Epinephelus multinotatus
7 1350 2017-03-26 FSZ1 Bottom 3 3 2017-03-26 18:28:00 2017-03-26 18:45:00 17 Lethrinus olivaceus
First convert the time to POSIXct
, then make the groups. Within each group replace Start
and End
values with the last value for that group. Calculate the length
and remove the last row for each group.
The result don't have much sense, but I trust this is due to unfortunate row sample provided in the question.
UPDATE: with the new data. I added an additional line to convert the Session_Length
from seconds to H:M:S using seconds.to.hms
from kimisc
. The warning massages are just to inform you there were NA values.
library(kimisc)
stack.example2 %>%
mutate(End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/"))) %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1)) %>%
mutate(Session_Length = End_Session - Start_Session,
Session_Length = seconds.to.hms(Session_Length)) %>%
filter(row_number()!=n())
#output
# A tibble: 70 x 10
# Groups: Location, Date, Method, Session_Number [19]
ID Date Location Method Lines Session_Number Start_Session End_Session Session_Length Species
<int> <date> <fct> <fct> <chr> <chr> <dttm> <dttm> <chr> <fct>
1 1257 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 00:38:00 Aprion ~
2 1258 2017-02-02 FSZ5 Trolling 2 1 2017-02-02 07:11:00 2017-02-02 07:49:00 00:38:00 Euthynn~
3 1260 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
4 1261 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Caranx ~
5 1262 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Caranx ~
6 1263 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Lutjanu~
7 1264 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
8 1265 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
9 1266 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Aprion ~
10 1267 2017-02-02 FSZ6 Bottom 5 2 2017-02-02 08:05:00 2017-02-02 10:30:00 02:25:00 Epineph~
If you would like to avoid the warnings you can do:
stack.example2 %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1),
End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/")),
Session_Length = End_Session - Start_Session,
Session_Length = seconds.to.hms(Session_Length)) %>%
filter(row_number()!=n())
EDIT:
stack.example2 %>%
group_by(Location, Date, Method, Session_Number) %>%
mutate(End_Session = tail(End_Session, 1),
Start_Session = tail(Start_Session, 1),
End_Session = ymd_hm(paste(Date, End_Session, sep ="/")),
Start_Session = ymd_hm(paste(Date, Start_Session, sep ="/")),
Session_Length = End_Session - Start_Session,
Session_Length = seconds.to.hms(Session_Length),
n = n()) %>%
filter(n == 1 | row_number() != n ) %>%
select(-n)
Upvotes: 1