code123
code123

Reputation: 2146

convert List to zoo and use rollapply on the List

I would like to convert a list object to zoo and then apply rollapply on the zoo object. Short example reproduced below (I have 90,000 such files to process, using UNIX:)). Assume my list has two dataframes.

1) I would like to convert the date in each of the dataframes to this format:

dates <- as.Date(paste0(mylist$year, "-", mylist$month, "-", mylist$day), format="%Y-%m-%d")

z <- zoo(mylist, order.by=mylist[,1])

I understand lapply can do this but I tried without success.

Once I get my zoo object, I would like to use rollapply:

library(hydroTSM)#for daily2annual function but aggregate can do 

    x.3max <- rollapply(data=zooobject, width=3, FUN=sum, fill=NA, partial= TRUE,
                         align="center")
    # Maximum value per year of 3-day total rainfall for each one of the simulations
    z.3max.annual <- daily2annual(z.3max,  FUN=max,na.rm=TRUE)#dates=1

What the code above does is it centers a 3-day window on each column of the dataframes in zooobject and sums the values. The, the max per year of the 3-day total is extracted.

      mylist<- list(a,a)
mylist<-lapply(mylist, function(x) x[x[["Month"]] %in% c(12,1,2),])# extract data for DJF for individual sites


    library(zoo)
       a= structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L
), Month = c(1L, 1L, 1L, 1L, 1L, 1L), Site = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = "G100", class = "factor"), Day = 1:6, 
    sim01 = c(28.49, 29.04, 27.62, 28.43, 28.69, 29.16), sim02 = c(29.49, 
    30.04, 28.62, 29.43, 29.69, 30.16), sim03 = c(30.49, 31.04, 
    29.62, 30.43, 30.69, 31.16), sim04 = c(31.49, 32.04, 30.62, 
    31.43, 31.69, 32.16), sim05 = c(32.49, 33.04, 31.62, 32.43, 
    32.69, 33.16), sim06 = c(33.49, 34.04, 32.62, 33.43, 33.69, 
    34.16), sim07 = c(34.49, 35.04, 33.62, 34.43, 34.69, 35.16
    ), sim08 = c(35.49, 36.04, 34.62, 35.43, 35.69, 36.16), sim09 = c(36.49, 
    37.04, 35.62, 36.43, 36.69, 37.16), sim10 = c(37.49, 38.04, 
    36.62, 37.43, 37.69, 38.16), sim11 = c(38.49, 39.04, 37.62, 
    38.43, 38.69, 39.16), sim12 = c(39.49, 40.04, 38.62, 39.43, 
    39.69, 40.16), sim13 = c(40.49, 41.04, 39.62, 40.43, 40.69, 
    41.16), sim14 = c(41.49, 42.04, 40.62, 41.43, 41.69, 42.16
    ), sim15 = c(42.49, 43.04, 41.62, 42.43, 42.69, 43.16), sim16 = c(43.49, 
    44.04, 42.62, 43.43, 43.69, 44.16), sim17 = c(44.49, 45.04, 
    43.62, 44.43, 44.69, 45.16), sim18 = c(45.49, 46.04, 44.62, 
    45.43, 45.69, 46.16), sim19 = c(46.49, 47.04, 45.62, 46.43, 
    46.69, 47.16), sim20 = c(47.49, 48.04, 46.62, 47.43, 47.69, 
    48.16)), .Names = c("Year", "Month", "Site", "Day", "sim01", 
"sim02", "sim03", "sim04", "sim05", "sim06", "sim07", "sim08", 
"sim09", "sim10", "sim11", "sim12", "sim13", "sim14", "sim15", 
"sim16", "sim17", "sim18", "sim19", "sim20"), row.names = c(NA, 
6L), class = "data.frame")

Output should be similar to:

Year Site Sim01... 
1975 G100 ...
1976 G100 ...
1977 G100 ...

Only the values in the months c(12,1,2) are needed.

Upvotes: 0

Views: 993

Answers (2)

code123
code123

Reputation: 2146

Solved

lst1 <- lapply(list.files(pattern=".csv"),function(x) read.table(x,header=TRUE,sep="")) # read all files and data and replace -999.9 with NA

lst2<-lapply(lst1, function(x) x[x[["Month"]] %in% c(6,7,8),])#c(6,7,8) extract data for DJF for individual sites
names(lst2)<-list.files(pattern=".csv")
lapply(lst2,tail,4)
lst3<-lapply(lst2, function(x) x[!(names(x) %in% c("Site"))])
Lz <- lapply(lst3, read.zoo, index = 1:3, format = "%Y %m %d")

L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))

mapply(
  write.table,
  x=L3, file=paste(names(L3), "csv", sep="."),
  MoreArgs=list(row.names=FALSE, sep=",")
) # write files to folder keeping the list names as file names

Upvotes: 0

G. Grothendieck
G. Grothendieck

Reputation: 269481

This produces a list of zoo objects, Lz, and then performs rollapply on each component of the list giving L2. Finally L3 aggregates over year taking the max of each column.

library(zoo)

mylist <- list(a, a) # a is given at bottom of question

Lz <- lapply(mylist, read.zoo, index = 1:3, format = "%Y %m %d")
L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))

giving:

> L3

[[1]]
     sim01 sim02 sim03 sim04 sim05  sim06  sim07  sim08  sim09  sim10  sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
      sim12  sim13  sim14  sim15  sim16  sim17  sim18  sim19  sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28

[[2]]
     sim01 sim02 sim03 sim04 sim05  sim06  sim07  sim08  sim09  sim10  sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
      sim12  sim13  sim14  sim15  sim16  sim17  sim18  sim19  sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28

Upvotes: 1

Related Questions