TKH_9
TKH_9

Reputation: 117

How to create a bin lengths table in R?

I have a dataframe with species lengths and the observed counts.

> df <- data.frame(
+   species = c("A","A","A","B","B","B"),
+   station = c(1:3,1:3),
+   CLS1 = 11:16,
+   Freq1 = c(1,1,1,1,1,2),
+   CLS2 = c(0, 14, 0, 16,0,17),
+   Freq2 = c(0,1,0,1,0,2),
+   CLS3 = c(0,0,0,18,0,20),
+   Freq3 = c(0,0,0,1,0,1)
+ )
> df
  species station CLS1 Freq1 CLS2 Freq2 CLS3 Freq3
1       A       1   11     1    0     0    0     0
2       A       2   12     1   14     1    0     0
3       A       3   13     1    0     0    0     0
4       B       1   14     1   16     1   18     1
5       B       2   15     1    0     0    0     0
6       B       3   16     2   17     2   20     1

CLS1 stands for the shortest lengths in each obserbation and Freq1 stands for the frequency that corresponds to it. If you observe an individual with length greater than CLS1 at the same station, you would have the values in CLS2 and Freq2 or more CLS3 and Freq3.

What I want to have is a bin lengths table like this. But how can I get this?

> cnt_table <- data.frame(
+   species = c("A","A","A","B","B","B"),
+   station = c(1:3,1:3),
+   X11_12 = c(1,1,0,0,0,0),
+   X13_14 = c(0,1,1,1,0,0),
+   X15_16 = c(0,0,0,1,1,2),
+   X17_18 = c(0,0,0,1,0,0),
+   X19_20 = c(0,0,0,0,0,1)
+ )
> cnt_table
  species station X11_12 X13_14 X15_16 X17_18 X19_20
1       A       1      1      0      0      0      0
2       A       2      1      1      0      0      0
3       A       3      0      1      0      0      0
4       B       1      0      1      1      1      0
5       B       2      0      0      1      0      0
6       B       3      0      0      2      0      1

Upvotes: 0

Views: 87

Answers (1)

Ammar Gamal
Ammar Gamal

Reputation: 211

I modified df a bit to match your outcome, dplyr solution would be like::

df <- data.frame(
 species = c("A","A","A","B","B","B"),
 station = c(1:3,1:3),
 CLS1 = 11:16,
 Freq1 = c(1,1,1,1,1,2),
 CLS2 = c(0, 14, 0, 16,0,15),
 Freq2 = c(0,1,0,1,0,2),
 CLS3 = c(0,0,0,18,0,20),
 Freq3 = c(0,0,0,1,0,1))
> df
  species station CLS1 Freq1 CLS2 Freq2 CLS3 Freq3
1       A       1   11     1    0     0    0     0
2       A       2   12     1   14     1    0     0
3       A       3   13     1    0     0    0     0
4       B       1   14     1   16     1   18     1
5       B       2   15     1    0     0    0     0
6       B       3   16     2   15     2   20     1

group_1 <- c(11,12)
group_2 <- c(13,14)
group_3 <- c(15,16)
group_4 <- c(17,18)
group_5 <- c(19,20)

> 
> df %>% select(species,station, starts_with("CLS")) %>% 
+   pivot_longer(cols = starts_with("CLS")) %>% 
+   mutate(class = case_when(
+     value %in% group_1 ~ "X11_12",
+     value %in% group_2 ~ "X13_14",
+     value %in% group_3 ~ "X15_16",
+     value %in% group_4 ~ "X17_18",
+     value %in% group_5 ~ "X19_20",
+     TRUE ~ "0")) %>%
+   filter(class != "0") %>%
+   group_by(species,station,class) %>%
+   mutate(freq = n()) %>%
+   select(-c(name,value)) %>% 
+   ungroup() %>% distinct() %>%
+   pivot_wider(names_from = class,
+               values_from = freq,
+               values_fill = 0)
# A tibble: 6 × 7
  species station X11_12 X13_14 X15_16 X17_18 X19_20
  <chr>     <int>  <int>  <int>  <int>  <int>  <int>
1 A             1      1      0      0      0      0
2 A             2      1      1      0      0      0
3 A             3      0      1      0      0      0
4 B             1      0      1      1      1      0
5 B             2      0      0      1      0      0
6 B             3      0      0      2      0      1

more elegant solution would be: from library(AMR) we use age_groups() just to make nice looking gruops:, you can use cut() instead if you like:

df_long <- df %>% select(species,station, starts_with("CLS")) %>% 
  pivot_longer(cols = starts_with("CLS")) %>%
  filter(value !=0) %>% 
  mutate(interval =age_groups(value, split_at = c(11,13,15,17,19,21)))  %>%
  select(-name,-value) %>% droplevels()

ftable(df_long$species ,df_long$station,df_long$interval)


     11-12 13-14 15-16 17-18 19-20
                                  
A 1      1     0     0     0     0
  2      1     1     0     0     0
  3      0     1     0     0     0
B 1      0     1     1     1     0
  2      0     0     1     0     0
  3      0     0     2     0     1

Upvotes: 1

Related Questions