Reputation: 295
I have this data...
Scientificname Level Zone levelmean
<chr> <int> <chr> <dbl>
1 Acanthostracion polygonius 3 B 0.135
2 Acanthostracion quadricornis 1 B 0.286
3 Acanthostracion quadricornis 1 D 0.228
4 Acanthostracion quadricornis 2 B 0.212
5 Acanthostracion quadricornis 2 D 0.181
6 Acanthostracion quadricornis 3 B 0.247
7 Acanthostracion quadricornis 3 D 0.222
8 Acanthostracion quadricornis 4 B 0.151
9 Acanthostracion quadricornis 4 D 0.202
10 Acanthostracion spp. 2 B 0.225
11 Achirus lineatus 1 B 0.204
12 Achirus lineatus 1 D 0.202
13 Achirus lineatus 2 B 0.219
14 Achirus lineatus 2 D 0.181
15 Achirus lineatus 3 B 0.145
16 Achirus lineatus 3 D 0.172
17 Achirus lineatus 4 B 0.135
18 Achirus lineatus 4 D 0.142
structure(list(Scientificname = c("Acanthostracion polygonius",
"Acanthostracion quadricornis", "Acanthostracion quadricornis",
"Acanthostracion quadricornis", "Acanthostracion quadricornis",
"Acanthostracion quadricornis", "Acanthostracion quadricornis",
"Acanthostracion quadricornis", "Acanthostracion quadricornis",
"Acanthostracion spp.", "Achirus lineatus", "Achirus lineatus",
"Achirus lineatus", "Achirus lineatus", "Achirus lineatus", "Achirus lineatus",
"Achirus lineatus", "Achirus lineatus"), Level = c(3L, 1L, 1L,
2L, 2L, 3L, 3L, 4L, 4L, 2L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L),
Zone = c("B", "B", "D", "B", "D", "B", "D", "B", "D", "B",
"B", "D", "B", "D", "B", "D", "B", "D"), levelmean = c(0.134916351861846,
0.286175876741544, 0.228368580556262, 0.21169261421555, 0.181497972824247,
0.247241190981072, 0.221534021013127, 0.151406128200516,
0.201513319317781, 0.224860586436409, 0.204040161766372,
0.201884774621553, 0.219239071775499, 0.18121539764963, 0.144981540016618,
0.172393116267914, 0.134916351861846, 0.141662169454938)), row.names = c(NA,
-18L), groups = structure(list(Scientificname = c("Acanthostracion polygonius",
"Acanthostracion quadricornis", "Acanthostracion quadricornis",
"Acanthostracion quadricornis", "Acanthostracion quadricornis",
"Acanthostracion spp.", "Achirus lineatus", "Achirus lineatus",
"Achirus lineatus", "Achirus lineatus"), Level = c(3L, 1L, 2L,
3L, 4L, 2L, 1L, 2L, 3L, 4L), .rows = structure(list(1L, 2:3,
4:5, 6:7, 8:9, 10L, 11:12, 13:14, 15:16, 17:18), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
These are species names, Discharge level (4 is lowest, 1 is highest), and CPUE (catch per unit effort, aka number of fish caught).
What I want to do is get a quantifiable measure of sensitivity to discharge. So the only way I could think to do it was take the difference between the values for each pair of levels of discharge for each species for each zone. For example for Acanthostracion quadricornis in zone B i would take the difference between group 1 and 2, 1 and 3, 1 and 4, 2 and 3, 2 and 4, and 3 and 4, then take the mean of all those values.
It becomes more complicated because I only want to do this for species that occur in at least 2 levels per zone. Also I have about 130 species and they vary how many levels they show up in for each zone.
My ideal output would be...
Scientificname Zone Sensitivity
1 Acanthostracion quadricornis B 0.06367512
2 Acanthostracion quadricornis D 0.02399275
3 Achirus lineatus B 0.05164523
4 Achirus lineatus D 0.03447407
The values in the ideal output may have been rounded.
Upvotes: 4
Views: 143
Reputation: 21908
Here is a base R solution for you:
do.call(rbind, lapply(split(df, ~ Scientificname + Zone, drop = TRUE), function(x) {
if(nrow(x) >= 2) {
combs <- as.data.frame(t(combn(x$Level, m = 2)))
x[["Sensitivity"]] <-
mean(abs(mapply(function(a, b) {
x$levelmean[x$Level == a] - x$levelmean[x$Level == b]
}, combs$V1, combs$V2)))
head(x, 1)[, -which(names(x) %in% c("Level", "levelmean"))]
}
}))
# A tibble: 4 x 3
# Groups: Scientificname [2]
Scientificname Zone Sensitivity
<chr> <chr> <dbl>
1 Acanthostracion quadricornis B 0.0733
2 Achirus lineatus B 0.0520
3 Acanthostracion quadricornis D 0.0268
4 Achirus lineatus D 0.0316
Upvotes: 5
Reputation: 101327
A data.table
option using combn
setDT(df)[
,
.SD[.N > 1],
.(Scientificname, Zone)
][
,
.(Sensitivity = mean(abs(combn(levelmean,2,diff))))
,
.(Scientificname, Zone)
]
or a shorter one (since df
is already a grouped data.frame)
setDT(df %>%
filter(n() > 1))[
,
.(Sensitivity = mean(abs(combn(levelmean,2,diff))))
,
.(Scientificname, Zone)
]
gives
Scientificname Zone Sensitivity
1: Acanthostracion quadricornis B 0.07330964
2: Acanthostracion quadricornis D 0.02677209
3: Achirus lineatus B 0.05200446
4: Achirus lineatus D 0.03158168
Upvotes: 4
Reputation: 887088
We could reshape to 'wide' format with pivot_wider
and then use combn
to get the pairwise difference and take the mean
with rowMeans
library(dplyr)
library(tidyr)
tmp <- df1 %>%
filter(n() > 1) %>%
ungroup %>%
pivot_wider(names_from = Level, values_from = levelmean, values_fill = 0)
Sensitivity <- rowMeans(do.call(cbind, combn(tmp[-(1:2)], 2,
FUN = function(x) abs(x[1]-x[2]), simplify = FALSE)))
out <- tmp %>%
select(1:2) %>%
mutate(Sensitivity = Sensitivity)
-ouptutu
out
# A tibble: 4 x 3
Scientificname Zone Sensitivity
<chr> <chr> <dbl>
1 Acanthostracion quadricornis B 0.0733
2 Acanthostracion quadricornis D 0.0268
3 Achirus lineatus B 0.0520
4 Achirus lineatus D 0.0316
Or without reshaping
library(purrr)
df1 %>%
filter(n() > 1) %>%
ungroup %>%
nest_by(Scientificname, Zone) %>%
ungroup %>%
transmute(Scientificname, Zone,
Sensitivity = map_dbl(data,
~ mean(abs(combn(.x$levelmean, 2, FUN = \(x) x[1]- x[2])))))
-output
# A tibble: 4 x 3
Scientificname Zone Sensitivity
<chr> <chr> <dbl>
1 Acanthostracion quadricornis B 0.0733
2 Acanthostracion quadricornis D 0.0268
3 Achirus lineatus B 0.0520
4 Achirus lineatus D 0.0316
Upvotes: 5