Reputation: 2156
I have a list (lst3, subset below) and would like to do some calculations on it, e.g.:
lst4 <-lapply(lst3, function(x) aggregate(x[,5:ncol(x)], x[c(4)], FUN = mean)) #column means
lst5<-lapply(lst4,function(x) apply(x[,-c(1)],1,mean)) # get row mean
However, I am unable to get row mean without ignoring "Site".
I would like my final list to look like this:
lst5<-
[[1]]
Site x
G116 1.864233
[[2]]
Site x
GG16 2.064567
The essence is that the final list should have the above structure so that I can write my data to working directory using:
lapply(lst5,function(x)write.table(x,file=paste(getwd(),"summer",paste0(unique(x$Site),".csv"),
sep="/"),row.names=FALSE,quote=FALSE)) ### create a folder called "summer" and write files to directory###
Thanks, AZ.
list(structure(list(Year = c(2005L, 2005L, 2005L), Month = c(8L,
8L, 8L), Day = 29:31, Site = structure(c(1L, 1L, 1L), .Label = "G116", class = "factor"),
Sim001 = c(8.4, 17.72, 6.03), Sim002 = c(0.27, 0, 0), Sim003 = c(2.83,
0.14, 0.1), Sim004 = c(0, 0, 0), Sim005 = c(0, 0.77, 0.28
), Sim006 = c(0, 0, 0), Sim007 = c(0, 0, 0), Sim008 = c(10.94,
4.77, 0), Sim009 = c(0, 0, 0), Sim010 = c(3.43, 2.74, 0.65
), Sim011 = c(0.36, 0, 2.75), Sim012 = c(26.91, 0, 2.16),
Sim013 = c(0.88, 1.33, 0.87), Sim014 = c(0, 0.86, 9.42),
Sim015 = c(0, 0.17, 1.15), Sim016 = c(0, 0, 0), Sim017 = c(0.13,
0, 0), Sim018 = c(0, 0, 6.72), Sim019 = c(8.45, 12.99, 23.72
), Sim020 = c(1.76, 0, 0), Sim021 = c(0, 0, 2.34), Sim022 = c(0,
0, 0), Sim023 = c(1.2, 0, 0.26), Sim024 = c(0.85, 0, 0),
Sim025 = c(0, 0, 0), Sim026 = c(2.05, 0.76, 5.03), Sim027 = c(0.78,
0, 0), Sim028 = c(1.2, 0, 0), Sim029 = c(22, 0.19, 0), Sim030 = c(0.12,
0, 0), Sim031 = c(3.1, 13.67, 0), Sim032 = c(0, 0, 17.88),
Sim033 = c(0, 0, 0), Sim034 = c(1.11, 0, 0), Sim035 = c(1.17,
1.41, 23.35), Sim036 = c(0, 0.48, 1.71), Sim037 = c(1.51,
11.1, 7.98), Sim038 = c(0, 0, 0), Sim039 = c(0, 0, 5.46),
Sim040 = c(5.21, 0, 0), Sim041 = c(0.1, 0.11, 0), Sim042 = c(0,
0.15, 5.23), Sim043 = c(0, 0, 0), Sim044 = c(0, 0.1, 0),
Sim045 = c(0, 0, 0), Sim046 = c(0, 0, 0), Sim047 = c(0, 0,
0.11), Sim048 = c(0, 0, 0), Sim049 = c(0, 0, 4.05), Sim050 = c(0,
0, 0), Sim051 = c(0, 0.12, 0), Sim052 = c(0.24, 2.58, 0),
Sim053 = c(3.63, 0, 0.17), Sim054 = c(10.94, 2.69, 0), Sim055 = c(0,
0, 0), Sim056 = c(0.24, 0.44, 8.27), Sim057 = c(0, 0, 0),
Sim058 = c(0, 0, 3.75), Sim059 = c(0.19, 11.06, 0), Sim060 = c(0,
0, 1.65), Sim061 = c(0, 4.95, 0), Sim062 = c(0.15, 0, 4.73
), Sim063 = c(2.99, 0.12, 1.28), Sim064 = c(0, 0, 0), Sim065 = c(0,
0, 0), Sim066 = c(0, 0, 0), Sim067 = c(0.11, 0.62, 0.56),
Sim068 = c(2.84, 0, 0), Sim069 = c(0, 0, 0), Sim070 = c(17.91,
0.11, 4.78), Sim071 = c(0, 0, 1.68), Sim072 = c(0, 0, 1.38
), Sim073 = c(1.68, 0, 0), Sim074 = c(0.53, 0, 2.87), Sim075 = c(0,
0, 0), Sim076 = c(2.58, 0.27, 0.11), Sim077 = c(0, 0, 0),
Sim078 = c(9.07, 3.13, 8.62), Sim079 = c(0.98, 0, 2.38),
Sim080 = c(3.4, 0, 0), Sim081 = c(0, 0, 4.57), Sim082 = c(1.87,
2.86, 0), Sim083 = c(21.76, 2.24, 0), Sim084 = c(0.45, 4.03,
0.39), Sim085 = c(0, 0, 0), Sim086 = c(0, 0, 0), Sim087 = c(0,
0, 17.12), Sim088 = c(5.05, 0, 0), Sim089 = c(0, 0, 1.4),
Sim090 = c(0.1, 0, 0), Sim091 = c(1.96, 0, 1.38), Sim092 = c(0,
0, 0), Sim093 = c(0, 0, 0), Sim094 = c(0, 0, 1.81), Sim095 = c(2.72,
7.16, 1.7), Sim096 = c(6.37, 0, 0), Sim097 = c(0, 1.12, 25.7
), Sim098 = c(0, 0, 0), Sim099 = c(0, 0, 0), Sim100 = c(6.77,
10.87, 2.6)), .Names = c("Year", "Month", "Day", "Site",
"Sim001", "Sim002", "Sim003", "Sim004", "Sim005", "Sim006", "Sim007",
"Sim008", "Sim009", "Sim010", "Sim011", "Sim012", "Sim013", "Sim014",
"Sim015", "Sim016", "Sim017", "Sim018", "Sim019", "Sim020", "Sim021",
"Sim022", "Sim023", "Sim024", "Sim025", "Sim026", "Sim027", "Sim028",
"Sim029", "Sim030", "Sim031", "Sim032", "Sim033", "Sim034", "Sim035",
"Sim036", "Sim037", "Sim038", "Sim039", "Sim040", "Sim041", "Sim042",
"Sim043", "Sim044", "Sim045", "Sim046", "Sim047", "Sim048", "Sim049",
"Sim050", "Sim051", "Sim052", "Sim053", "Sim054", "Sim055", "Sim056",
"Sim057", "Sim058", "Sim059", "Sim060", "Sim061", "Sim062", "Sim063",
"Sim064", "Sim065", "Sim066", "Sim067", "Sim068", "Sim069", "Sim070",
"Sim071", "Sim072", "Sim073", "Sim074", "Sim075", "Sim076", "Sim077",
"Sim078", "Sim079", "Sim080", "Sim081", "Sim082", "Sim083", "Sim084",
"Sim085", "Sim086", "Sim087", "Sim088", "Sim089", "Sim090", "Sim091",
"Sim092", "Sim093", "Sim094", "Sim095", "Sim096", "Sim097", "Sim098",
"Sim099", "Sim100"), row.names = 15947:15949, class = "data.frame"),
structure(list(Year = c(2005L, 2005L, 2005L), Month = c(8L,
8L, 8L), Day = 29:31, Site = structure(c(1L, 1L, 1L), .Label = "GG16", class = "factor"),
Sim001 = c(18.36, 0.33, 0.14), Sim002 = c(0, 10.92, 0
), Sim003 = c(0, 0, 0), Sim004 = c(0, 0, 1.7), Sim005 = c(0,
0, 0), Sim006 = c(0.91, 4.24, 0), Sim007 = c(0, 0, 0.22
), Sim008 = c(0.63, 2.9, 2.24), Sim009 = c(0, 0, 0),
Sim010 = c(0, 0, 6.91), Sim011 = c(0, 3.28, 10.18), Sim012 = c(8.39,
14.58, 45.62), Sim013 = c(2.87, 0.53, 0.11), Sim014 = c(9.15,
21.1, 0.66), Sim015 = c(0, 1.75, 2.2), Sim016 = c(0,
7.86, 0), Sim017 = c(0, 0, 0), Sim018 = c(0, 0, 0), Sim019 = c(0,
0, 0), Sim020 = c(0.39, 0, 0), Sim021 = c(0.13, 0, 1.05
), Sim022 = c(0, 0, 10.91), Sim023 = c(0.23, 0, 0), Sim024 = c(0.12,
0.83, 5.35), Sim025 = c(0, 0, 0), Sim026 = c(7.75, 0,
4.82), Sim027 = c(20.04, 0, 0), Sim028 = c(12.41, 0,
5.3), Sim029 = c(0, 0, 0), Sim030 = c(0, 0, 0), Sim031 = c(0,
8.06, 0), Sim032 = c(0, 0, 0), Sim033 = c(0, 0, 0), Sim034 = c(0.1,
0, 3.34), Sim035 = c(0, 4.34, 3.53), Sim036 = c(2.89,
0.27, 0), Sim037 = c(0, 0, 0), Sim038 = c(0, 0, 0), Sim039 = c(0,
0.11, 0), Sim040 = c(9.83, 1.55, 9.09), Sim041 = c(3.6,
0, 0), Sim042 = c(0, 0, 1.37), Sim043 = c(0, 0, 0), Sim044 = c(0,
0, 0), Sim045 = c(0, 0, 0), Sim046 = c(0, 0, 0), Sim047 = c(0,
20.52, 0.65), Sim048 = c(1.77, 0.67, 0), Sim049 = c(0,
0, 0), Sim050 = c(0, 0, 0), Sim051 = c(0, 4.9, 0), Sim052 = c(0.71,
11.34, 0), Sim053 = c(3.46, 2.59, 1.5), Sim054 = c(0,
23.63, 0), Sim055 = c(0, 16.48, 4.99), Sim056 = c(0,
0, 0), Sim057 = c(0, 0, 0), Sim058 = c(0, 0, 0), Sim059 = c(0,
0, 0), Sim060 = c(16.87, 0, 0), Sim061 = c(0, 3.43, 0
), Sim062 = c(0.45, 0, 0), Sim063 = c(0, 11.14, 7.22),
Sim064 = c(0, 0, 0), Sim065 = c(0, 0, 0), Sim066 = c(0,
16.08, 1.87), Sim067 = c(0, 0, 0), Sim068 = c(5.16, 0.88,
0.1), Sim069 = c(0, 0, 3.91), Sim070 = c(0, 0, 0), Sim071 = c(0.17,
0, 5.22), Sim072 = c(0, 0, 6.95), Sim073 = c(0, 0, 0),
Sim074 = c(0.14, 0, 0), Sim075 = c(0, 0, 0), Sim076 = c(0,
9.62, 0), Sim077 = c(0, 0, 0), Sim078 = c(1.65, 0, 0),
Sim079 = c(0.23, 8.41, 0.28), Sim080 = c(0.78, 0, 0),
Sim081 = c(0, 0, 0), Sim082 = c(0.11, 2.75, 0), Sim083 = c(0.26,
7.34, 5.92), Sim084 = c(0, 0, 4.27), Sim085 = c(0, 0,
0), Sim086 = c(0, 0, 0.1), Sim087 = c(27.18, 0.72, 28.29
), Sim088 = c(0, 0, 4.2), Sim089 = c(0, 9.37, 6.59),
Sim090 = c(0.21, 2.57, 0), Sim091 = c(0.45, 0, 0), Sim092 = c(0,
4.97, 0), Sim093 = c(1.43, 0, 0), Sim094 = c(0, 0, 2.15
), Sim095 = c(6, 0, 1.63), Sim096 = c(7.21, 0, 0), Sim097 = c(0,
0.39, 1.92), Sim098 = c(0, 0, 0), Sim099 = c(4.38, 0,
0), Sim100 = c(0, 0, 0)), .Names = c("Year", "Month",
"Day", "Site", "Sim001", "Sim002", "Sim003", "Sim004", "Sim005",
"Sim006", "Sim007", "Sim008", "Sim009", "Sim010", "Sim011",
"Sim012", "Sim013", "Sim014", "Sim015", "Sim016", "Sim017",
"Sim018", "Sim019", "Sim020", "Sim021", "Sim022", "Sim023",
"Sim024", "Sim025", "Sim026", "Sim027", "Sim028", "Sim029",
"Sim030", "Sim031", "Sim032", "Sim033", "Sim034", "Sim035",
"Sim036", "Sim037", "Sim038", "Sim039", "Sim040", "Sim041",
"Sim042", "Sim043", "Sim044", "Sim045", "Sim046", "Sim047",
"Sim048", "Sim049", "Sim050", "Sim051", "Sim052", "Sim053",
"Sim054", "Sim055", "Sim056", "Sim057", "Sim058", "Sim059",
"Sim060", "Sim061", "Sim062", "Sim063", "Sim064", "Sim065",
"Sim066", "Sim067", "Sim068", "Sim069", "Sim070", "Sim071",
"Sim072", "Sim073", "Sim074", "Sim075", "Sim076", "Sim077",
"Sim078", "Sim079", "Sim080", "Sim081", "Sim082", "Sim083",
"Sim084", "Sim085", "Sim086", "Sim087", "Sim088", "Sim089",
"Sim090", "Sim091", "Sim092", "Sim093", "Sim094", "Sim095",
"Sim096", "Sim097", "Sim098", "Sim099", "Sim100"), row.names = 15947:15949, class = "data.frame"))
Upvotes: 1
Views: 3058
Reputation: 70336
You can go from lst3 directly to lst5 without the intermediate aggregate step:
lapply(lst3, function(df){
data.frame(Site = df$Site[1], x = mean(unlist(df[-c(1:4)])))
})
#[[1]]
# Site x
#1 G116 1.864233
#
#[[2]]
# Site x
#1 GG16 2.064567
Since you're calculating the mean of all columns except the first 4 columns and over all the rows of the other columns, it's quite easy to unlist
the data, creating a single vector, and then using standard mean
on it. Also, by skipping the lst4 step, this most likely be noticeably faster.
Or, as commented by Richard, a variation could be:
lapply(lst3, function(df){
data.frame(Site = df$Site[1], x = mean(colMeans(df[-c(1:4)])))
})
library(microbenchmark)
microbenchmark(
f1 = {lapply(lst3, function(df){
data.frame(Site = df$Site[1], x = mean(unlist(df[-c(1:4)])))
})},
f2 = {lapply(lst3, function(df){
data.frame(Site = df$Site[1], x = mean(colMeans(df[-c(1:4)])))
})},
unit = "relative"
)
Unit: relative
expr min lq median uq max neval
f1 1.00000 1.000000 1.000000 1.000000 1.000000 100
f2 2.91545 2.937272 2.927799 2.894704 3.486007 100
Here's another option for your consideration:
library(reshape2)
x <- melt(lst3)
aggregate(value ~ Site, x[grepl("^Sim.*", x$variable),], FUN = mean)
# Site value
#1 G116 1.864233
#2 GG16 2.064567
Or the same concept but using dplyr:
library(dplyr)
filter(x, grepl("^Sim.*", variable)) %>% group_by(Site) %>% summarise(x = mean(value))
#Source: local data frame [2 x 2]
#
# Site x
#1 G116 1.864233
#2 GG16 2.064567
Of course, this could also be done using data.table, for example like this (there are probably several even slightly more efficient ways to do this in data.table):
library(data.table)
setDT(x)[grepl("^Sim.*", variable), list(x = mean(value)), by = Site]
# Site x
#1: G116 1.864233
#2: GG16 2.064567
Upvotes: 2