helen.h
helen.h

Reputation: 1023

Merge table by aggregating a 3 year average from second df

I am modelling the effects of a variety of variables on the growth of fish within a particular haul. I want to calculate a three year average catch rate for each haul based on its geographical grid location. I need a formula to take the 'Stat_sq' (geographic grid square) and the 'Year' of the fishing trip (contained in the first data frame) and generate an average catch rate for the three preivous years by aggregating the mean catch rates by 'Stat_sq' and 'Year' from the second data frame.

The year range is 93,94,95,96,97,98,99,00,01,02,03,04,05,06,07,08,09,10,11,12,13,14

I would also like it to just return a two year average or just a single year value if there is NA/missing values.

Dataframe1
Trip_Id Stat_sq Year
0113A_1   48E8  13
0113A_10  49E8  13
0113A_11  49E8  12
0113A_12  49E8  13
0113A_13  49E8  12
0113A_15  49E8  11
0113A_16  49E8  10
0113A_18  50E8  13
0113A_19  50E8  12

Dataframe2
Stat_sq Year    Catch_Rate
48E8    13      12.353719
48E8    12      16.508482
48E8    11      2.841493  
42E8    10      12.721584
50E8    13      12.419484
50E9    12      22.461538
50E8    11      28.141433
44E7    00      29.681110
44E8    00      5.427430   
45E6    00      13.259770
45E7    00      18.250903
45E8    00      62.222222  

For exmaple for haul 0113A_1 i would like the average catch rate in square 48E8 for years 13,12,11 (3 years including the current year) = 10.567898

Any help appreciated.

Upvotes: 1

Views: 67

Answers (2)

akrun
akrun

Reputation: 887088

May be this helps

 df1$Yr <- with(df1, as.numeric(ifelse(as.numeric(Year)>=93,
                              paste0(19,Year), paste0(20,Year))))

 df2$Yr <-  with(df2, as.numeric(ifelse(as.numeric(Year)>=93,
                              paste0(19,Year), paste0(20,Year))))


 res <-   unsplit(lapply(split(df1, df1$Stat_sq),
            function(x) {
           x1 <- df2[df2$Stat_sq %in% unique(x$Stat_sq),]
             x$Avg <- sapply(seq_len(nrow(x)), function(i) {
                   x2 <- x[i,]
                   indx <- x1$Yr %in% seq(x2$Yr-2, x2$Yr)
                   if(length(indx)>0) mean(x1$Catch_Rate[indx], na.rm=TRUE)
                    else NA})
              x}),
            df1$Stat_sq)

 head(res[,-4],2)
 #   Trip_Id Stat_sq Year     Avg
 #1  0113A_1    48E8   13 10.5679
 #2 0113A_10    49E8   13      NA

data

 df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11", 
 "0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
 ), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8", 
 "49E8", "50E8", "50E8"), Year = c("13", "13", "12", "13", "12", 
 "11", "10", "13", "12")), .Names = c("Trip_Id", "Stat_sq", "Year"
 ), class = "data.frame", row.names = c(NA, -9L))

 df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8", 
 "50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c("13", 
 "12", "11", "10", "13", "12", "11", "00", "00", "00", "00", "00"
 ), Catch_Rate = c(12.353719, 16.508482, 2.841493, 12.721584, 
 12.419484, 22.461538, 28.141433, 29.68111, 5.42743, 13.25977, 
 18.250903, 62.222222)), .Names = c("Stat_sq", "Year", "Catch_Rate"
 ), class = "data.frame", row.names = c(NA, -12L))

Upvotes: 2

Shubham Saini
Shubham Saini

Reputation: 738

Here, this should give you your desired result:

df1 <- structure(list(Trip_Id = c("0113A_1", "0113A_10", "0113A_11", 
                                  "0113A_12", "0113A_13", "0113A_15", "0113A_16", "0113A_18", "0113A_19"
), Stat_sq = c("48E8", "49E8", "49E8", "49E8", "49E8", "49E8", 
               "49E8", "50E8", "50E8"), Year = c(13, 13, 12, 13, 12, 11, 10, 
                                                 13, 12)), .Names = c("Trip_Id", "Stat_sq", "Year"), class = "data.frame", 
row.names = c(NA, -9L))

df2 <- structure(list(Stat_sq = c("48E8", "48E8", "48E8", "42E8", "50E8", 
                                  "50E9", "50E8", "44E7", "44E8", "45E6", "45E7", "45E8"), Year = c(13, 
                                                                                                    12, 11, 10, 13, 12, 11, 0, 0, 0, 0, 0), Catch_Rate = c(12.353719, 
                                                                                                                                                           16.508482, 2.841493, 12.721584, 12.419484, 22.461538, 28.141433, 
                                                                                                                                                           29.68111, 5.42743, 13.25977, 18.250903, 62.222222)), .Names = c("Stat_sq", 
                                                                                                                                                                                                                           "Year", "Catch_Rate"), class = "data.frame", row.names = c(NA, -12L))

combined <- merge(df1,df2,all.x=TRUE)

findRate <- function(ggs,year){

  # ggs - geographic grid square (stat_sq)
  # year - desired year

  filter1 <- combined[combined$Stat_sq==ggs,]
  last3years <- c(year:(year-2))
  filter2 <- filter1[is.element(filter1$Year,last3years),]
  output <- aggregate(Catch_Rate~Year,data=filter2,mean)
  print(output)
}

findRate("50E8",14)

Upvotes: 0

Related Questions