Reputation: 1023
I am modelling the effects of a variety of variables on the growth of fish within a particular haul. I want to calculate a three year average catch rate for each haul based on its geographical grid location. I need a formula to take the 'Stat_sq' (geographic grid square) and the 'Year' of the fishing trip (contained in the first data frame) and generate an average catch rate for the three preivous years by aggregating the mean catch rates by 'Stat_sq' and 'Year' from the second data frame.
The year range is 93,94,95,96,97,98,99,00,01,02,03,04,05,06,07,08,09,10,11,12,13,14
I would also like it to just return a two year average or just a single year value if there is NA/missing values.
Dataframe1
Trip_Id Stat_sq Year
0113A_1 48E8 13
0113A_10 49E8 13
0113A_11 49E8 12
0113A_12 49E8 13
0113A_13 49E8 12
0113A_15 49E8 11
0113A_16 49E8 10
0113A_18 50E8 13
0113A_19 50E8 12
Dataframe2
Stat_sq Year Catch_Rate
48E8 13 12.353719
48E8 12 16.508482
48E8 11 2.841493
42E8 10 12.721584
50E8 13 12.419484
50E9 12 22.461538
50E8 11 28.141433
44E7 00 29.681110
44E8 00 5.427430
45E6 00 13.259770
45E7 00 18.250903
45E8 00 62.222222
For exmaple for haul 0113A_1 i would like the average catch rate in square 48E8 for years 13,12,11 (3 years including the current year) = 10.567898
Any help appreciated.
Upvotes: 1
Views: 67
Reputation: 887088
May be this helps
df1$Yr <- with(df1, as.numeric(ifelse(as.numeric(Year)>=93,
paste0(19,Year), paste0(20,Year))))
df2$Yr <- with(df2, as.numeric(ifelse(as.numeric(Year)>=93,
paste0(19,Year), paste0(20,Year))))
res <- unsplit(lapply(split(df1, df1$Stat_sq),
function(x) {
x1 <- df2[df2$Stat_sq %in% unique(x$Stat_sq),]
x$Avg <- sapply(seq_len(nrow(x)), function(i) {
x2 <- x[i,]
indx <- x1$Yr %in% seq(x2$Yr-2, x2$Yr)
if(length(indx)>0) mean(x1$Catch_Rate[indx], na.rm=TRUE)
else NA})
x}),
df1$Stat_sq)
head(res[,-4],2)
# Trip_Id Stat_sq Year Avg
#1 0113A_1 48E8 13 10.5679
#2 0113A_10 49E8 13 NA
df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11",
"0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8",
"49E8", "50E8", "50E8"), Year = c("13", "13", "12", "13", "12",
"11", "10", "13", "12")), .Names = c("Trip_Id", "Stat_sq", "Year"
), class = "data.frame", row.names = c(NA, -9L))
df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8",
"50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c("13",
"12", "11", "10", "13", "12", "11", "00", "00", "00", "00", "00"
), Catch_Rate = c(12.353719, 16.508482, 2.841493, 12.721584,
12.419484, 22.461538, 28.141433, 29.68111, 5.42743, 13.25977,
18.250903, 62.222222)), .Names = c("Stat_sq", "Year", "Catch_Rate"
), class = "data.frame", row.names = c(NA, -12L))
Upvotes: 2
Reputation: 738
Here, this should give you your desired result:
df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11",
"0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8",
"49E8", "50E8", "50E8"), Year = c(13, 13, 12, 13, 12, 11, 10,
13, 12)), .Names = c("Trip_Id", "Stat_sq", "Year"), class = "data.frame",
row.names = c(NA, -9L))
df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8",
"50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c(13,
12, 11, 10, 13, 12, 11, 0, 0, 0, 0, 0), Catch_Rate = c(12.353719,
16.508482, 2.841493, 12.721584, 12.419484, 22.461538, 28.141433,
29.68111, 5.42743, 13.25977, 18.250903, 62.222222)), .Names = c("Stat_sq",
"Year", "Catch_Rate"), class = "data.frame", row.names = c(NA, -12L))
combined <- merge(df1,df2,all.x=TRUE)
findRate <- function(ggs,year){
# ggs - geographic grid square (stat_sq)
# year - desired year
filter1 <- combined[combined$Stat_sq==ggs,]
last3years <- c(year:(year-2))
filter2 <- filter1[is.element(filter1$Year,last3years),]
output <- aggregate(Catch_Rate~Year,data=filter2,mean)
print(output)
}
findRate("50E8",14)
Upvotes: 0