Reputation: 47
I have a dataset of fish tag signals, and I want to calculate the duration of different behaviours based on swimming speed, e.g. static, cruise, burst, so I can calculate behaviour state frequencies. I have done this using a for
loop, but it's very slow on my large datasets. I'm sure this can be done using one of R's apply
functions, but I can't figure out how to do it.
This is what my data looks like:
Period PEN SEC BLSEC BS BScount CountTF BSdur
380 7045 7 7 0.204 cruise 2 FALSE NA
381 7045 7 7 0.694 cruise 3 FALSE NA
382 7045 7 7 0.325 cruise 4 TRUE 21
383 7045 7 7 0.000 static 1 TRUE 7
384 7045 7 7 0.197 cruise 1 FALSE NA
385 7045 7 7 0.312 cruise 2 FALSE NA
386 7045 7 7 0.242 cruise 3 TRUE 21
387 7045 7 7 0.096 static 1 TRUE 7
388 7045 7 7 0.274 cruise 1 FALSE NA
389 7045 7 7 0.268 cruise 2 FALSE NA
390 7045 7 7 0.312 cruise 3 FALSE NA
391 7045 7 7 0.694 cruise 4 FALSE NA
392 7045 7 7 0.268 cruise 5 FALSE NA
SEC is the number of seconds between tag pings (it's not always 7!), BLSEC is body lengths per second (i.e. normalised distance swam by the fish between tag pings). I've calculated BS, BScount and CountTF by doing:
static = 0.1
cruise = 1
bsffile$BS <- ifelse(bsffile$BLSEC <= static, 'static', ifelse(bsffile$BLSEC > static & bsffile$BLSEC <= cruise, 'cruise', 'burst'))
bsffile$BScount <- sequence(rle(bsffile$BS)$lengths)
bsffile$CountTF <- c(ifelse(diff(bsffile$BScount, 1, 1) < 1, T, F), F)
BSdur is the sum of SECs for consecutive behaviour states. I calculated it using:
bssum <- 0
for (i in 1:nrow(bsffile)){
bssum <- bssum + bsffile[i, 'SEC']
if(bsffile[i, 'CountTF'] == T & is.na(bsffile[i, 'SEC']) == F){
bsffile[i,'BSdur'] <- bssum
bssum <- 0
} else {
bsffile[i,'BSdur'] <- NA
}
}
It takes about five minutes to run on my dataset. Any suggestions how I can make this faster, e.g using one of the apply
functions?
Here is some dput
to play with:
structure(list(Period = c(7045, 7045, 7045, 7045, 7045, 7045,
7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045, 7045,
7045, 7045, 7045, 7045), PEN = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("7", "8"), class = "factor"), SEC = c(7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 7), BLSEC = c(0.204,
0.694, 0.325, 0, 0.197, 0.312, 0.242, 0.096, 0.274, 0.268, 0.312,
0.694, 0.268, 0.541, 0.796, 0.306, 0.089, 0.93, 0.389, 0.452,
0.917), BS = c("cruise", "cruise", "cruise", "static", "cruise",
"cruise", "cruise", "static", "cruise", "cruise", "cruise", "cruise",
"cruise", "cruise", "cruise", "cruise", "static", "cruise", "cruise",
"cruise", "cruise"), BScount = c(2L, 3L, 4L, 1L, 1L, 2L, 3L,
1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 1L, 2L, 3L, 4L), CountTF = c(FALSE,
FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
TRUE), BSdur = c(NA, NA, 21, 7, NA, NA, 21, 7, NA, NA, NA, NA,
NA, NA, NA, 57, 7, NA, NA, NA, 28)), row.names = 380:400, .Names = c("Period",
"PEN", "SEC", "BLSEC", "BS", "BScount", "CountTF", "BSdur"
), class = "data.frame")
Upvotes: 0
Views: 39
Reputation: 887153
We can do this with ave
from base R
df1$BSdur <- with(df1, ave(SEC, cumsum(c(TRUE, BS[-1]!= BS[-nrow(df1)])), FUN = sum)*CountTF)
df1$BSdur
#[1] 0 0 21 7 0 0 21 7 0 0 0 0 0 0 0 57 7 0 0 0 28
Upvotes: 0
Reputation: 2070
Easy with data.table
library(data.table)
setDT(bsffile)
bsffile[,BSdur:=ifelse(CountTF==T,sum(SEC),0),by=.(rleid(BS))]
Upvotes: 2