code123
code123

Reputation: 2156

Using aggregate on list in R

I have a list (lst3, subset below) and would like to do some calculations on it, e.g.:

lst4 <-lapply(lst3, function(x) aggregate(x[,5:ncol(x)], x[c(4)], FUN = mean)) #column means

lst5<-lapply(lst4,function(x) apply(x[,-c(1)],1,mean)) # get row mean

However, I am unable to get row mean without ignoring "Site".

I would like my final list to look like this:

lst5<-

[[1]]
Site        x
G116 1.864233

[[2]]
Site       x
GG16 2.064567

The essence is that the final list should have the above structure so that I can write my data to working directory using:

lapply(lst5,function(x)write.table(x,file=paste(getwd(),"summer",paste0(unique(x$Site),".csv"),
                       sep="/"),row.names=FALSE,quote=FALSE)) ### create a folder called "summer" and write files to directory###

Thanks, AZ.

list(structure(list(Year = c(2005L, 2005L, 2005L), Month = c(8L, 
8L, 8L), Day = 29:31, Site = structure(c(1L, 1L, 1L), .Label = "G116", class = "factor"), 
Sim001 = c(8.4, 17.72, 6.03), Sim002 = c(0.27, 0, 0), Sim003 = c(2.83, 
0.14, 0.1), Sim004 = c(0, 0, 0), Sim005 = c(0, 0.77, 0.28
), Sim006 = c(0, 0, 0), Sim007 = c(0, 0, 0), Sim008 = c(10.94, 
4.77, 0), Sim009 = c(0, 0, 0), Sim010 = c(3.43, 2.74, 0.65
), Sim011 = c(0.36, 0, 2.75), Sim012 = c(26.91, 0, 2.16), 
Sim013 = c(0.88, 1.33, 0.87), Sim014 = c(0, 0.86, 9.42), 
Sim015 = c(0, 0.17, 1.15), Sim016 = c(0, 0, 0), Sim017 = c(0.13, 
0, 0), Sim018 = c(0, 0, 6.72), Sim019 = c(8.45, 12.99, 23.72
), Sim020 = c(1.76, 0, 0), Sim021 = c(0, 0, 2.34), Sim022 = c(0, 
0, 0), Sim023 = c(1.2, 0, 0.26), Sim024 = c(0.85, 0, 0), 
Sim025 = c(0, 0, 0), Sim026 = c(2.05, 0.76, 5.03), Sim027 = c(0.78, 
0, 0), Sim028 = c(1.2, 0, 0), Sim029 = c(22, 0.19, 0), Sim030 = c(0.12, 
0, 0), Sim031 = c(3.1, 13.67, 0), Sim032 = c(0, 0, 17.88), 
Sim033 = c(0, 0, 0), Sim034 = c(1.11, 0, 0), Sim035 = c(1.17, 
1.41, 23.35), Sim036 = c(0, 0.48, 1.71), Sim037 = c(1.51, 
11.1, 7.98), Sim038 = c(0, 0, 0), Sim039 = c(0, 0, 5.46), 
Sim040 = c(5.21, 0, 0), Sim041 = c(0.1, 0.11, 0), Sim042 = c(0, 
0.15, 5.23), Sim043 = c(0, 0, 0), Sim044 = c(0, 0.1, 0), 
Sim045 = c(0, 0, 0), Sim046 = c(0, 0, 0), Sim047 = c(0, 0, 
0.11), Sim048 = c(0, 0, 0), Sim049 = c(0, 0, 4.05), Sim050 = c(0, 
0, 0), Sim051 = c(0, 0.12, 0), Sim052 = c(0.24, 2.58, 0), 
Sim053 = c(3.63, 0, 0.17), Sim054 = c(10.94, 2.69, 0), Sim055 = c(0, 
0, 0), Sim056 = c(0.24, 0.44, 8.27), Sim057 = c(0, 0, 0), 
Sim058 = c(0, 0, 3.75), Sim059 = c(0.19, 11.06, 0), Sim060 = c(0, 
0, 1.65), Sim061 = c(0, 4.95, 0), Sim062 = c(0.15, 0, 4.73
), Sim063 = c(2.99, 0.12, 1.28), Sim064 = c(0, 0, 0), Sim065 = c(0, 
0, 0), Sim066 = c(0, 0, 0), Sim067 = c(0.11, 0.62, 0.56), 
Sim068 = c(2.84, 0, 0), Sim069 = c(0, 0, 0), Sim070 = c(17.91, 
0.11, 4.78), Sim071 = c(0, 0, 1.68), Sim072 = c(0, 0, 1.38
), Sim073 = c(1.68, 0, 0), Sim074 = c(0.53, 0, 2.87), Sim075 = c(0, 
0, 0), Sim076 = c(2.58, 0.27, 0.11), Sim077 = c(0, 0, 0), 
Sim078 = c(9.07, 3.13, 8.62), Sim079 = c(0.98, 0, 2.38), 
Sim080 = c(3.4, 0, 0), Sim081 = c(0, 0, 4.57), Sim082 = c(1.87, 
2.86, 0), Sim083 = c(21.76, 2.24, 0), Sim084 = c(0.45, 4.03, 
0.39), Sim085 = c(0, 0, 0), Sim086 = c(0, 0, 0), Sim087 = c(0, 
0, 17.12), Sim088 = c(5.05, 0, 0), Sim089 = c(0, 0, 1.4), 
Sim090 = c(0.1, 0, 0), Sim091 = c(1.96, 0, 1.38), Sim092 = c(0, 
0, 0), Sim093 = c(0, 0, 0), Sim094 = c(0, 0, 1.81), Sim095 = c(2.72, 
7.16, 1.7), Sim096 = c(6.37, 0, 0), Sim097 = c(0, 1.12, 25.7
), Sim098 = c(0, 0, 0), Sim099 = c(0, 0, 0), Sim100 = c(6.77, 
10.87, 2.6)), .Names = c("Year", "Month", "Day", "Site", 
"Sim001", "Sim002", "Sim003", "Sim004", "Sim005", "Sim006", "Sim007", 
"Sim008", "Sim009", "Sim010", "Sim011", "Sim012", "Sim013", "Sim014", 
"Sim015", "Sim016", "Sim017", "Sim018", "Sim019", "Sim020", "Sim021", 
"Sim022", "Sim023", "Sim024", "Sim025", "Sim026", "Sim027", "Sim028", 
"Sim029", "Sim030", "Sim031", "Sim032", "Sim033", "Sim034", "Sim035", 
"Sim036", "Sim037", "Sim038", "Sim039", "Sim040", "Sim041", "Sim042", 
"Sim043", "Sim044", "Sim045", "Sim046", "Sim047", "Sim048", "Sim049", 
"Sim050", "Sim051", "Sim052", "Sim053", "Sim054", "Sim055", "Sim056", 
"Sim057", "Sim058", "Sim059", "Sim060", "Sim061", "Sim062", "Sim063", 
"Sim064", "Sim065", "Sim066", "Sim067", "Sim068", "Sim069", "Sim070", 
"Sim071", "Sim072", "Sim073", "Sim074", "Sim075", "Sim076", "Sim077", 
"Sim078", "Sim079", "Sim080", "Sim081", "Sim082", "Sim083", "Sim084", 
"Sim085", "Sim086", "Sim087", "Sim088", "Sim089", "Sim090", "Sim091", 
"Sim092", "Sim093", "Sim094", "Sim095", "Sim096", "Sim097", "Sim098", 
"Sim099", "Sim100"), row.names = 15947:15949, class = "data.frame"), 
structure(list(Year = c(2005L, 2005L, 2005L), Month = c(8L, 
8L, 8L), Day = 29:31, Site = structure(c(1L, 1L, 1L), .Label = "GG16", class = "factor"), 
    Sim001 = c(18.36, 0.33, 0.14), Sim002 = c(0, 10.92, 0
    ), Sim003 = c(0, 0, 0), Sim004 = c(0, 0, 1.7), Sim005 = c(0, 
    0, 0), Sim006 = c(0.91, 4.24, 0), Sim007 = c(0, 0, 0.22
    ), Sim008 = c(0.63, 2.9, 2.24), Sim009 = c(0, 0, 0), 
    Sim010 = c(0, 0, 6.91), Sim011 = c(0, 3.28, 10.18), Sim012 = c(8.39, 
    14.58, 45.62), Sim013 = c(2.87, 0.53, 0.11), Sim014 = c(9.15, 
    21.1, 0.66), Sim015 = c(0, 1.75, 2.2), Sim016 = c(0, 
    7.86, 0), Sim017 = c(0, 0, 0), Sim018 = c(0, 0, 0), Sim019 = c(0, 
    0, 0), Sim020 = c(0.39, 0, 0), Sim021 = c(0.13, 0, 1.05
    ), Sim022 = c(0, 0, 10.91), Sim023 = c(0.23, 0, 0), Sim024 = c(0.12, 
    0.83, 5.35), Sim025 = c(0, 0, 0), Sim026 = c(7.75, 0, 
    4.82), Sim027 = c(20.04, 0, 0), Sim028 = c(12.41, 0, 
    5.3), Sim029 = c(0, 0, 0), Sim030 = c(0, 0, 0), Sim031 = c(0, 
    8.06, 0), Sim032 = c(0, 0, 0), Sim033 = c(0, 0, 0), Sim034 = c(0.1, 
    0, 3.34), Sim035 = c(0, 4.34, 3.53), Sim036 = c(2.89, 
    0.27, 0), Sim037 = c(0, 0, 0), Sim038 = c(0, 0, 0), Sim039 = c(0, 
    0.11, 0), Sim040 = c(9.83, 1.55, 9.09), Sim041 = c(3.6, 
    0, 0), Sim042 = c(0, 0, 1.37), Sim043 = c(0, 0, 0), Sim044 = c(0, 
    0, 0), Sim045 = c(0, 0, 0), Sim046 = c(0, 0, 0), Sim047 = c(0, 
    20.52, 0.65), Sim048 = c(1.77, 0.67, 0), Sim049 = c(0, 
    0, 0), Sim050 = c(0, 0, 0), Sim051 = c(0, 4.9, 0), Sim052 = c(0.71, 
    11.34, 0), Sim053 = c(3.46, 2.59, 1.5), Sim054 = c(0, 
    23.63, 0), Sim055 = c(0, 16.48, 4.99), Sim056 = c(0, 
    0, 0), Sim057 = c(0, 0, 0), Sim058 = c(0, 0, 0), Sim059 = c(0, 
    0, 0), Sim060 = c(16.87, 0, 0), Sim061 = c(0, 3.43, 0
    ), Sim062 = c(0.45, 0, 0), Sim063 = c(0, 11.14, 7.22), 
    Sim064 = c(0, 0, 0), Sim065 = c(0, 0, 0), Sim066 = c(0, 
    16.08, 1.87), Sim067 = c(0, 0, 0), Sim068 = c(5.16, 0.88, 
    0.1), Sim069 = c(0, 0, 3.91), Sim070 = c(0, 0, 0), Sim071 = c(0.17, 
    0, 5.22), Sim072 = c(0, 0, 6.95), Sim073 = c(0, 0, 0), 
    Sim074 = c(0.14, 0, 0), Sim075 = c(0, 0, 0), Sim076 = c(0, 
    9.62, 0), Sim077 = c(0, 0, 0), Sim078 = c(1.65, 0, 0), 
    Sim079 = c(0.23, 8.41, 0.28), Sim080 = c(0.78, 0, 0), 
    Sim081 = c(0, 0, 0), Sim082 = c(0.11, 2.75, 0), Sim083 = c(0.26, 
    7.34, 5.92), Sim084 = c(0, 0, 4.27), Sim085 = c(0, 0, 
    0), Sim086 = c(0, 0, 0.1), Sim087 = c(27.18, 0.72, 28.29
    ), Sim088 = c(0, 0, 4.2), Sim089 = c(0, 9.37, 6.59), 
    Sim090 = c(0.21, 2.57, 0), Sim091 = c(0.45, 0, 0), Sim092 = c(0, 
    4.97, 0), Sim093 = c(1.43, 0, 0), Sim094 = c(0, 0, 2.15
    ), Sim095 = c(6, 0, 1.63), Sim096 = c(7.21, 0, 0), Sim097 = c(0, 
    0.39, 1.92), Sim098 = c(0, 0, 0), Sim099 = c(4.38, 0, 
    0), Sim100 = c(0, 0, 0)), .Names = c("Year", "Month", 
"Day", "Site", "Sim001", "Sim002", "Sim003", "Sim004", "Sim005", 
"Sim006", "Sim007", "Sim008", "Sim009", "Sim010", "Sim011", 
"Sim012", "Sim013", "Sim014", "Sim015", "Sim016", "Sim017", 
"Sim018", "Sim019", "Sim020", "Sim021", "Sim022", "Sim023", 
"Sim024", "Sim025", "Sim026", "Sim027", "Sim028", "Sim029", 
"Sim030", "Sim031", "Sim032", "Sim033", "Sim034", "Sim035", 
"Sim036", "Sim037", "Sim038", "Sim039", "Sim040", "Sim041", 
"Sim042", "Sim043", "Sim044", "Sim045", "Sim046", "Sim047", 
"Sim048", "Sim049", "Sim050", "Sim051", "Sim052", "Sim053", 
"Sim054", "Sim055", "Sim056", "Sim057", "Sim058", "Sim059", 
"Sim060", "Sim061", "Sim062", "Sim063", "Sim064", "Sim065", 
"Sim066", "Sim067", "Sim068", "Sim069", "Sim070", "Sim071", 
"Sim072", "Sim073", "Sim074", "Sim075", "Sim076", "Sim077", 
"Sim078", "Sim079", "Sim080", "Sim081", "Sim082", "Sim083", 
"Sim084", "Sim085", "Sim086", "Sim087", "Sim088", "Sim089", 
"Sim090", "Sim091", "Sim092", "Sim093", "Sim094", "Sim095", 
"Sim096", "Sim097", "Sim098", "Sim099", "Sim100"), row.names = 15947:15949, class =     "data.frame"))

Upvotes: 1

Views: 3058

Answers (1)

talat
talat

Reputation: 70336

You can go from lst3 directly to lst5 without the intermediate aggregate step:

lapply(lst3, function(df){
  data.frame(Site = df$Site[1], x = mean(unlist(df[-c(1:4)])))
})
#[[1]]
#  Site        x
#1 G116 1.864233
#
#[[2]]
#  Site        x
#1 GG16 2.064567

Since you're calculating the mean of all columns except the first 4 columns and over all the rows of the other columns, it's quite easy to unlist the data, creating a single vector, and then using standard mean on it. Also, by skipping the lst4 step, this most likely be noticeably faster.

Or, as commented by Richard, a variation could be:

lapply(lst3, function(df){
  data.frame(Site = df$Site[1], x = mean(colMeans(df[-c(1:4)])))
})

Benchmark:

library(microbenchmark)
microbenchmark(
  f1 = {lapply(lst3, function(df){
    data.frame(Site = df$Site[1], x = mean(unlist(df[-c(1:4)])))
  })},
  f2 = {lapply(lst3, function(df){
    data.frame(Site = df$Site[1], x = mean(colMeans(df[-c(1:4)])))
  })},
  unit = "relative"
)

Unit: relative
 expr     min       lq   median       uq      max neval
   f1 1.00000 1.000000 1.000000 1.000000 1.000000   100
   f2 2.91545 2.937272 2.927799 2.894704 3.486007   100

Here's another option for your consideration:

library(reshape2)
x <- melt(lst3)
aggregate(value ~ Site, x[grepl("^Sim.*", x$variable),], FUN = mean)
#  Site    value
#1 G116 1.864233
#2 GG16 2.064567

Or the same concept but using dplyr:

library(dplyr)
filter(x, grepl("^Sim.*", variable)) %>% group_by(Site) %>% summarise(x = mean(value))
#Source: local data frame [2 x 2]
#
#  Site        x
#1 G116 1.864233
#2 GG16 2.064567

Of course, this could also be done using data.table, for example like this (there are probably several even slightly more efficient ways to do this in data.table):

library(data.table)
setDT(x)[grepl("^Sim.*", variable), list(x = mean(value)), by = Site]
#   Site        x
#1: G116 1.864233
#2: GG16 2.064567

Upvotes: 2

Related Questions