Reputation: 1
I have a dataframe:
ID Date col1 col2
1 1606807860 LOY A
2 1606807860 LOY B
2 1606807860 LOY B
3 1606807860 LOY B
1 1606807860 LOY A
I want to count number of occurrences of unique values according to ID, Date, col1 and col2. So, desired result is:
ID Date event count
1 1606807860 loy-a 2
2 1606807860 loy-b 2
3 1606807860 loy-b 1
How could I do that? Also how to transform timestamp format into normal form, not 1606807860? how to change date type? make it like year-month-day?
This works for counting in case of only col1 and col2:
%>%
mutate(across(c(col1, col2), tolower)) %>%
count(col1, col2) %>%
unite(event, col1, col2, sep='-')
Upvotes: 2
Views: 239
Reputation: 102710
A base R option
aggregate(
n ~ .,
transform(
df,
event = tolower(paste(col1, col2, sep = "-")),
Date = as.Date(as.POSIXct(Date, origin = "1970-01-01")),
n = 1,
col1 = NULL,
col2 = NULL
),
sum
)
which gives
ID Date event n
1 1 2020-12-01 loy-a 2
2 2 2020-12-01 loy-b 2
3 3 2020-12-01 loy-b 1
A data.table
option
setDT(df)
df[, Date := as.Date(as.POSIXct(Date, origin = "1970-01-01"))][, .(event = tolower(paste(col1, col2, sep = "-")), n = .N), by = names(df)][, c("col1", "col2") := NULL][]
which gives
ID Date event n
1: 1 2020-12-01 loy-a 2
2: 2 2020-12-01 loy-b 2
3: 3 2020-12-01 loy-b 1
Data
> dput(df)
structure(list(ID = c(1L, 2L, 2L, 3L, 1L), Date = c(1606807860L,
1606807860L, 1606807860L, 1606807860L, 1606807860L), col1 = c("LOY",
"LOY", "LOY", "LOY", "LOY"), col2 = c("A", "B", "B", "B", "A"
)), class = "data.frame", row.names = c(NA, -5L))
Upvotes: 0
Reputation: 39613
Try this:
library(dplyr)
#Code
new <- df %>% group_by(ID,Date,event=tolower(paste0(col1,'-',col2))) %>%
summarise(N=n()) %>% mutate(Date=as.Date(as.POSIXct(Date,origin = "1970-01-01")))
Output:
# A tibble: 3 x 4
# Groups: ID, Date [3]
ID Date event N
<int> <date> <chr> <int>
1 1 2020-12-01 loy-a 2
2 2 2020-12-01 loy-b 2
3 3 2020-12-01 loy-b 1
Upvotes: 1
Reputation: 887891
In this case, instead of specifying multiple columns one by one, we use across
within group_by
and then specify a range of columns or from the names
, and summarise
library(dplyr)
library(stringr)
df1 %>%
group_by(across(names(.)[1:4])) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(event = tolower(str_c(col1, col2, sep="-"))) %>%
select(-col1, -col2)
-output
# A tibble: 3 x 4
# ID Date count event
# <int> <int> <int> <chr>
#1 1 1606807860 2 loy-a
#2 2 1606807860 2 loy-b
#3 3 1606807860 1 loy-b
Or we can create the group_by
as
df1 %>%
group_by(across(ID:Date), event = tolower(str_c(col1, col2, sep='-'))) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Date = as.Date(as.POSIXct(Date, origin = '1970-01-01')))
-output
# A tibble: 3 x 4
# ID Date event count
# <int> <date> <chr> <int>
#1 1 2020-12-01 loy-a 2
#2 2 2020-12-01 loy-b 2
#3 3 2020-12-01 loy-b 1
Or with count
and unite
library(tidyr)
df1 %>%
mutate(across(c(col1, col2), tolower)) %>%
unite(event, col1, col2, sep='-') %>%
count(ID, Date, event)
-output
# ID Date event n
#1 1 1606807860 loy-a 2
#2 2 1606807860 loy-b 2
#3 3 1606807860 loy-b 1
df1 <- structure(list(ID = c(1L, 2L, 2L, 3L, 1L), Date = c(1606807860L,
1606807860L, 1606807860L, 1606807860L, 1606807860L), col1 = c("LOY",
"LOY", "LOY", "LOY", "LOY"), col2 = c("A", "B", "B", "B", "A"
)), class = "data.frame", row.names = c(NA, -5L))
Upvotes: 0