Tiptop
Tiptop

Reputation: 623

Area under curve for several sub groups

I wish to plot day against a value x and calculate the area under the curve for 3 groups (a,b,c) in my dataset.

I have tried this:

df %>%
  arrange(soil, daysincubated4) %>%
  group_by(soil) %>%
  summarise(areaundercurve = sum(diff(day)*rollmean(totalbvocs,2)))

Here's my dataset:

df <- structure(list(daysincubated4 = c(24, 24, 24, 24, 24, 24, 24, 
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
24, 24, 24, 24, 24, 24, 24, 24, 24, 66, 66, 66, 66, 66, 66, 66, 
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 
66, 66, 66, 66, 66, 66, 66, 66, 66, 81, 81, 81, 81, 81, 81, 81, 
81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 
81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 
81, 81, 81, 81, 81, 81, 81, 81, 94, 94, 94, 94, 94, 94, 94, 94, 
94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 
94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 
94, 94, 94, 94, 81), totalbvocs = c(0.16, 9.29, 0.03, 2.63, 0.14, 
6.05, 340.03, 0.03, 3.89, 6.67, 1.89, 5.95, 1.89, 1.42, 0.35, 
0.2, 0.01, 0.48, 0.4, 3.9, 0.15, 0.02, 0.37, 1.95, 0.04, 3.74, 
0.25, 3.27, 0.18, 0.01, 2.44, 0.64, 0.63, 0.23, 0.03, 0.02, 26.92, 
0.02, 0.62, 0.74, 0.17, 1.63, 5.98, 0.23, 1.37, 13.9, 0.37, 0.08, 
0.73, 0.02, 0.13, 0.02, 2.63, 0.05, 2.07, 0.29, 0.01, 0.06, 1.03, 
1.16, 0.04, 0.07, 0.04, 0.02, 0.01, 0.04, 0.01, 0.01, 0.05, 0.01, 
0.03, 0.01, 0.01, 0.02, 0.02, 0.01, 0.07, 0, 0.72, 0.14, 0, 0.02, 
0, 0, 0.75, 0.06, 0.03, 0.11, 0.01, 0.16, 0.06, 0.04, 0.05, 1.68, 
0.1, 0.06, 0.2, 0, 4.69, 0, 0.15, 0, 0.6, 0.01, 0, 0.05, 0.33, 
2.06, 0.04, 0.01, 0, 0.84, 0, 0.01, 0.01, 0, 0.01, 0.01, 0.01, 
0, 0.01, 0, 0, 0.15, 0.01, 0, 0.46, 0, 0, 0, 0, 0.89, 0.01, 0, 
0.07, 0, 0.03, 0.39, 0.04, 0.04, 87.18, 0.09, 0.06, 0.21, 0.03, 
0.07, 0, 0.04, 0.01, 0.06, 0.24, 0.11, 0.01, 0.15, 0, 0.03, 0.02, 
0.01, 0.01, 0, 0.08, 0.25, 0.01, 0.03, 0.01, 0, 0, 0, 0.12, 7.09, 
0.04, 0.01, 0.03, 0, 0.01, 0, 0, 0.29, 0, 0.07, 0.05, 0.35, 0.02, 
0.02, 1.76, 0.08, 0.18, 0.01), soil = c("6", "12", "18", "2", 
"39", "1", "14", "4", "9", "16", "10", "28", "33", "8", "31", 
"92", "25", "23", "20", "83", "66", "19", "27", "22", "95", "26", 
"21", "69", "30", "113", "15", "100", "38", "24", "110", "102", 
"34", "37", "7", "36", "17", "13", "29", "32", "90", "5", "3", 
"35", "31", "6", "12", "18", "2", "39", "1", "14", "4", "9", 
"16", "10", "28", "33", "8", "92", "25", "23", "20", "83", "66", 
"19", "27", "22", "95", "26", "21", "69", "30", "113", "15", 
"100", "38", "24", "110", "102", "34", "37", "7", "36", "17", 
"13", "29", "32", "90", "5", "3", "35", "31", "6", "12", "18", 
"2", "39", "1", "14", "4", "9", "16", "10", "28", "33", "8", 
"92", "25", "23", "20", "83", "66", "19", "27", "22", "95", "26", 
"21", "69", "30", "113", "15", "100", "38", "110", "102", "34", 
"37", "7", "36", "17", "13", "29", "32", "90", "5", "3", "35", 
"31", "6", "12", "18", "2", "39", "4", "9", "16", "10", "28", 
"33", "8", "92", "25", "23", "20", "83", "66", "19", "27", "22", 
"95", "26", "21", "69", "30", "113", "15", "100", "38", "24", 
"110", "102", "34", "37", "7", "36", "17", "13", "29", "5", "3", 
"35", "24")), row.names = c(NA, -188L), class = "data.frame")

All help is much appreciated!

Upvotes: 0

Views: 502

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 388817

You can perform the calculation for each group as :

library(dplyr)
library(zoo)

df %>%
  arrange(group, day) %>%
  group_by(group) %>%
  summarise(areaundercurve = sum(diff(day)*rollmean(x,2)))

#   group areaundercurve
#  <chr>          <dbl>
#1 a               1658
#2 b               1023
#3 c               1297

Upvotes: 3

Moritz Schwarz
Moritz Schwarz

Reputation: 2489

There are much nicer ways to code this up, but I would use the AUC command from the package DescTools.

Here solved with a Loop through the sub-group: Note I presumed that the variable you wanted to have on the x-axis of your function (e.g. f(x)) would actually be the day variable (time is normally on the x-axis). Otherwise, just switch those two around!

library(DescTools)
for ( i in unique(df$group)){
  AUC(x = df[df$group == i,"day"],
      y = df[df$group == i,"x"]) %>% 
    print()
}


[1] 1658
[1] 1023
[1] 1297

Upvotes: 2

Related Questions