french_fries
french_fries

Reputation: 1

Count number of occurrences of several column cases

I have a dataframe:

ID   Date            col1  col2  
1    1606807860      LOY    A
2    1606807860      LOY    B
2    1606807860      LOY    B
3    1606807860      LOY    B
1    1606807860      LOY    A

I want to count number of occurrences of unique values according to ID, Date, col1 and col2. So, desired result is:

ID     Date              event    count
1    1606807860          loy-a      2
2    1606807860          loy-b      2
3    1606807860          loy-b      1

How could I do that? Also how to transform timestamp format into normal form, not 1606807860? how to change date type? make it like year-month-day?

This works for counting in case of only col1 and col2:

%>%
  mutate(across(c(col1, col2), tolower)) %>%
  count(col1, col2) %>%
  unite(event, col1, col2, sep='-')

Upvotes: 2

Views: 239

Answers (3)

ThomasIsCoding
ThomasIsCoding

Reputation: 102710

A base R option

aggregate(
  n ~ .,
  transform(
    df,
    event = tolower(paste(col1, col2, sep = "-")),
    Date = as.Date(as.POSIXct(Date, origin = "1970-01-01")),
    n = 1,
    col1 = NULL,
    col2 = NULL
  ),
  sum
)

which gives

  ID       Date event n
1  1 2020-12-01 loy-a 2
2  2 2020-12-01 loy-b 2
3  3 2020-12-01 loy-b 1

A data.table option

setDT(df)
df[, Date := as.Date(as.POSIXct(Date, origin = "1970-01-01"))][, .(event = tolower(paste(col1, col2, sep = "-")), n = .N), by = names(df)][, c("col1", "col2") := NULL][]

which gives

   ID       Date event n
1:  1 2020-12-01 loy-a 2
2:  2 2020-12-01 loy-b 2
3:  3 2020-12-01 loy-b 1

Data

> dput(df)
structure(list(ID = c(1L, 2L, 2L, 3L, 1L), Date = c(1606807860L,
1606807860L, 1606807860L, 1606807860L, 1606807860L), col1 = c("LOY",
"LOY", "LOY", "LOY", "LOY"), col2 = c("A", "B", "B", "B", "A"
)), class = "data.frame", row.names = c(NA, -5L))

Upvotes: 0

Duck
Duck

Reputation: 39613

Try this:

library(dplyr)
#Code
new <- df %>% group_by(ID,Date,event=tolower(paste0(col1,'-',col2))) %>%
  summarise(N=n()) %>% mutate(Date=as.Date(as.POSIXct(Date,origin = "1970-01-01")))

Output:

# A tibble: 3 x 4
# Groups:   ID, Date [3]
     ID Date       event     N
  <int> <date>     <chr> <int>
1     1 2020-12-01 loy-a     2
2     2 2020-12-01 loy-b     2
3     3 2020-12-01 loy-b     1

Upvotes: 1

akrun
akrun

Reputation: 887891

In this case, instead of specifying multiple columns one by one, we use across within group_by and then specify a range of columns or from the names, and summarise

library(dplyr)
library(stringr)
df1 %>%
   group_by(across(names(.)[1:4])) %>%
   summarise(count = n(), .groups = 'drop') %>%
   mutate(event = tolower(str_c(col1, col2, sep="-"))) %>%
   select(-col1, -col2)

-output

# A tibble: 3 x 4
#     ID       Date count event
#  <int>      <int> <int> <chr>
#1     1 1606807860     2 loy-a
#2     2 1606807860     2 loy-b
#3     3 1606807860     1 loy-b

Or we can create the group_by as

df1 %>% 
   group_by(across(ID:Date), event = tolower(str_c(col1, col2, sep='-'))) %>%
   summarise(count = n(), .groups = 'drop') %>%
   mutate(Date = as.Date(as.POSIXct(Date, origin = '1970-01-01')))

-output

# A tibble: 3 x 4
#     ID Date       event count
#  <int> <date>     <chr> <int>
#1     1 2020-12-01 loy-a     2
#2     2 2020-12-01 loy-b     2
#3     3 2020-12-01 loy-b     1

Or with count and unite

library(tidyr)
df1 %>%
   mutate(across(c(col1, col2), tolower)) %>%
   unite(event, col1, col2, sep='-') %>%
   count(ID, Date, event)

-output

#   ID       Date event n
#1  1 1606807860 loy-a 2
#2  2 1606807860 loy-b 2
#3  3 1606807860 loy-b 1

data

df1 <- structure(list(ID = c(1L, 2L, 2L, 3L, 1L), Date = c(1606807860L, 
1606807860L, 1606807860L, 1606807860L, 1606807860L), col1 = c("LOY", 
"LOY", "LOY", "LOY", "LOY"), col2 = c("A", "B", "B", "B", "A"
)), class = "data.frame", row.names = c(NA, -5L))

Upvotes: 0

Related Questions