McMahok
McMahok

Reputation: 360

Combine rows into one row and merge information

I have these data

df <- structure(list(Site = c("2B", "2B", "2B", "2B", "2B", "2C", "2C", 
"2C", "2C", "2C", "FS", "FS", "FS", "FS", "HE", "HE", "HE"), 
    Year = c(2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 
    2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014), Maxcount_site = c(46L, 
    46L, 46L, 46L, 46L, 25L, 25L, 25L, 25L, 25L, 19L, 19L, 19L, 
    19L, 10L, 10L, 10L), Status = c("New Capture", "New Capture", 
    "Retrap", "Retrap", "Retrap", "New Capture", "New Capture", 
    "Retrap", "Retrap", "Retrap", "New Capture", "New Capture", 
    "Retrap", "Retrap", "New Capture", "New Capture", "Retrap"
    ), Name = c("bluti", "greti", "bluti", "greti", "marti", 
    "bluti", "greti", "bluti", "greti", "marti", "bluti", "greti", 
    "bluti", "greti", "bluti", "greti", "bluti"), maxcount = c(17L, 
    3L, 14L, 11L, 1L, 2L, 2L, 13L, 5L, 3L, 7L, 1L, 9L, 2L, 5L, 
    1L, 4L), blutinew = c(17L, NA, NA, NA, NA, 2L, NA, NA, NA, 
    NA, 7L, NA, NA, NA, 5L, NA, NA), blutiretrap = c(NA, NA, 
    14L, NA, NA, NA, NA, 13L, NA, NA, NA, NA, 9L, NA, NA, NA, 
    4L), gretinew = c(NA, 3L, NA, NA, NA, NA, 2L, NA, NA, NA, 
    NA, 1L, NA, NA, NA, 1L, NA), gretiretrap = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_), martinew = c(NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), martiretrap = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -17L), groups = structure(list(Site = c("2B", 
"2C", "FS", "HE"), .rows = structure(list(1:5, 6:10, 11:14, 15:17), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -4L), .drop = TRUE))

head(df)
Site   Year Maxcount_site Status      Name  maxcount blutinew blutiretrap gretinew gretiretrap martinew martiretrap
  <chr> <dbl>         <int> <chr>       <chr>    <int>    <int>       <int>    <int>       <int>    <int>       <int>
1 2B     2014            46 New Capture bluti       17       17          NA       NA          NA       NA          NA
2 2B     2014            46 New Capture greti        3       NA          NA        3          NA       NA          NA
3 2B     2014            46 Retrap      bluti       14       NA          14       NA          NA       NA          NA
4 2B     2014            46 Retrap      greti       11       NA          NA       NA          NA       NA          NA
5 2B     2014            46 Retrap      marti        1       NA          NA       NA          NA       NA          NA
6 2C     2014            25 New Capture bluti        2        2          NA       NA          NA       NA          NA
> 

Let's say they describe the observation of individuals at a Site in a Year, whether they are new observations or re-observed Status, the max number of all individuals Maxcount_site, then those observations broken down into categories e.g blutinew is all the observations for bluti that are New Capture and so on.

I would like to combine all rows with the same Site, Year into one row so that it looks like this, while removing the cols Name and status and maxcount

Site Year Maxcount_site blutinew blutiretrap gretinew gretiretrap martinew martiretrap
1   2B 2014            46       17          14        3          11        0           1
2   2C 2014            25        2          13        2           5        0           3
3   FS 2014            19        7           9        1           2        0           0
4   HE 2014            10        5           4        1           0        0           0

Upvotes: 0

Views: 58

Answers (2)

akrun
akrun

Reputation: 886938

Using aggregate from base R

aggregate(.~ Site + Year, subset(df, 
   select= -c(Maxcount_site, Status, Name)), sum, na.rm = TRUE, na.action = NULL)
  Site Year maxcount blutinew blutiretrap gretinew gretiretrap martinew martiretrap
1   2B 2014       46       17          14        3           0        0           0
2   2C 2014       25        2          13        2           0        0           0
3   FS 2014       19        7           9        1           0        0           0
4   HE 2014       10        5           4        1           0        0           0

Upvotes: 0

deschen
deschen

Reputation: 10996

This is the job for a simple group_by and summarize

library(tidyverse)
df |> 
  group_by(Site, Year) |> 
  summarize(across(-c(Maxcount_site, Status, Name), sum, na.rm = TRUE)) |> 
  ungroup()

# A tibble: 4 × 9
  Site   Year maxcount blutinew blutiretrap gretinew gretiretrap martinew martiretrap
  <chr> <dbl>    <int>    <int>       <int>    <int>       <int>    <int>       <int>
1 2B     2014       46       17          14        3           0        0           0
2 2C     2014       25        2          13        2           0        0           0
3 FS     2014       19        7           9        1           0        0           0
4 HE     2014       10        5           4        1           0        0           0

Upvotes: 2

Related Questions