Reputation: 117
I have a dataframe with species lengths and the observed counts.
> df <- data.frame(
+ species = c("A","A","A","B","B","B"),
+ station = c(1:3,1:3),
+ CLS1 = 11:16,
+ Freq1 = c(1,1,1,1,1,2),
+ CLS2 = c(0, 14, 0, 16,0,17),
+ Freq2 = c(0,1,0,1,0,2),
+ CLS3 = c(0,0,0,18,0,20),
+ Freq3 = c(0,0,0,1,0,1)
+ )
> df
species station CLS1 Freq1 CLS2 Freq2 CLS3 Freq3
1 A 1 11 1 0 0 0 0
2 A 2 12 1 14 1 0 0
3 A 3 13 1 0 0 0 0
4 B 1 14 1 16 1 18 1
5 B 2 15 1 0 0 0 0
6 B 3 16 2 17 2 20 1
CLS1
stands for the shortest lengths in each obserbation and Freq1
stands for the frequency that corresponds to it.
If you observe an individual with length greater than CLS1
at the same station, you would have the values in CLS2
and Freq2
or more CLS3
and Freq3
.
What I want to have is a bin lengths table like this. But how can I get this?
> cnt_table <- data.frame(
+ species = c("A","A","A","B","B","B"),
+ station = c(1:3,1:3),
+ X11_12 = c(1,1,0,0,0,0),
+ X13_14 = c(0,1,1,1,0,0),
+ X15_16 = c(0,0,0,1,1,2),
+ X17_18 = c(0,0,0,1,0,0),
+ X19_20 = c(0,0,0,0,0,1)
+ )
> cnt_table
species station X11_12 X13_14 X15_16 X17_18 X19_20
1 A 1 1 0 0 0 0
2 A 2 1 1 0 0 0
3 A 3 0 1 0 0 0
4 B 1 0 1 1 1 0
5 B 2 0 0 1 0 0
6 B 3 0 0 2 0 1
Upvotes: 0
Views: 87
Reputation: 211
I modified df
a bit to match your outcome, dplyr solution would be like::
df <- data.frame(
species = c("A","A","A","B","B","B"),
station = c(1:3,1:3),
CLS1 = 11:16,
Freq1 = c(1,1,1,1,1,2),
CLS2 = c(0, 14, 0, 16,0,15),
Freq2 = c(0,1,0,1,0,2),
CLS3 = c(0,0,0,18,0,20),
Freq3 = c(0,0,0,1,0,1))
> df
species station CLS1 Freq1 CLS2 Freq2 CLS3 Freq3
1 A 1 11 1 0 0 0 0
2 A 2 12 1 14 1 0 0
3 A 3 13 1 0 0 0 0
4 B 1 14 1 16 1 18 1
5 B 2 15 1 0 0 0 0
6 B 3 16 2 15 2 20 1
group_1 <- c(11,12)
group_2 <- c(13,14)
group_3 <- c(15,16)
group_4 <- c(17,18)
group_5 <- c(19,20)
>
> df %>% select(species,station, starts_with("CLS")) %>%
+ pivot_longer(cols = starts_with("CLS")) %>%
+ mutate(class = case_when(
+ value %in% group_1 ~ "X11_12",
+ value %in% group_2 ~ "X13_14",
+ value %in% group_3 ~ "X15_16",
+ value %in% group_4 ~ "X17_18",
+ value %in% group_5 ~ "X19_20",
+ TRUE ~ "0")) %>%
+ filter(class != "0") %>%
+ group_by(species,station,class) %>%
+ mutate(freq = n()) %>%
+ select(-c(name,value)) %>%
+ ungroup() %>% distinct() %>%
+ pivot_wider(names_from = class,
+ values_from = freq,
+ values_fill = 0)
# A tibble: 6 × 7
species station X11_12 X13_14 X15_16 X17_18 X19_20
<chr> <int> <int> <int> <int> <int> <int>
1 A 1 1 0 0 0 0
2 A 2 1 1 0 0 0
3 A 3 0 1 0 0 0
4 B 1 0 1 1 1 0
5 B 2 0 0 1 0 0
6 B 3 0 0 2 0 1
more elegant solution would be:
from library(AMR)
we use age_groups()
just to make nice looking gruops:, you can use cut()
instead if you like:
df_long <- df %>% select(species,station, starts_with("CLS")) %>%
pivot_longer(cols = starts_with("CLS")) %>%
filter(value !=0) %>%
mutate(interval =age_groups(value, split_at = c(11,13,15,17,19,21))) %>%
select(-name,-value) %>% droplevels()
ftable(df_long$species ,df_long$station,df_long$interval)
11-12 13-14 15-16 17-18 19-20
A 1 1 0 0 0 0
2 1 1 0 0 0
3 0 1 0 0 0
B 1 0 1 1 1 0
2 0 0 1 0 0
3 0 0 2 0 1
Upvotes: 1