user8248672
user8248672

Reputation:

Group by find summary measure, non NA value, and specific value using summarise in R

I have this sample dataset (df)

structure(list(from = c("(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452"), to = c("(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542"), extension = c("9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls"), forwarded_to = c(NA, "(323) 457-2039", 
"(310) 598-1753", "(818) 900-0706", "(818) 539-7811", "(213) 344-4965", 
"(213) 458-7662", "(818) 208-3012", "(818) 293-0175", "(818) 284-4238", 
"(818) 206-9506", "(310) 299-7340", "(310) 405-0875", "(213) 260-9113", 
"(213) 805-5208", "(818) 887-3058", "(424) 271-2141", "(213) 218-6579", 
"(818) 638-9466", "(213) 784-7164", "(323) 457-2038", "(213) 805-6959", 
"(228) 285-7898", "(213) 341-1055", "(213) 568-0979", "(213) 344-4905", 
"(818) 459-3811", NA, "(323) 457-2039", "(310) 598-1753", "(818) 900-0706", 
"(818) 539-7811", "(213) 344-4965", "(213) 458-7662", "(818) 208-3012", 
"(818) 293-0175", "(818) 284-4238", "(818) 206-9506", "(310) 299-7340", 
"(310) 405-0875", "(213) 260-9113", "(213) 805-5208", "(818) 887-3058", 
"(424) 271-2141", "(213) 218-6579", "(818) 638-9466", "(213) 784-7164", 
"(323) 457-2038", "(213) 805-6959", "(228) 285-7898", "(213) 341-1055", 
"(213) 568-0979", "(213) 344-4905", "(818) 459-3811"), date = c("Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018"), time = structure(c(55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55500, 55500, 
55500, 55500, 55500, 55500, 55500, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55380, 55380, 
55380, 55380, 55380, 55380, 55380), class = c("hms", "difftime"
), units = "secs"), action = c("Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe"), action_result = c("Accepted", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "IP Phone Offline", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "Stopped", 
"Stopped", "IP Phone Offline", "Stopped", "Stopped", "Call connected", 
"Stopped", "Accepted", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"IP Phone Offline", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "Stopped", "Stopped", "IP Phone Offline", "Call connected", 
"Stopped", "Stopped", "Stopped"), result_description = c("The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
NA, NA, "The call connected to and was accepted by this number.", 
NA, "The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
"The call connected to and was accepted by this number.", NA, 
NA, NA), duration = structure(c(297, 52, 52, 51, 51, 51, 51, 
51, 51, 0, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 9, 9, 0, 9, 
9, 236, 9, 71, 52, 52, 52, 51, 51, 51, 51, 51, 0, 51, 51, 51, 
51, 51, 51, 51, 51, 51, 51, 7, 7, 0, 13, 7, 7, 7), class = c("hms", 
"difftime"), units = "secs"), ID = c(19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L), CallConnected = c(FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), who_answered = c("NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "(213) 344-4905", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "(213) 341-1055", "NA", 
"NA", "NA")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-54L))

This shows that call data of phone numbers that made the call and received the call. The full dataset has a lot more pairs of numbers, but I've only posted one pair for brevity

I'd like to find the number of TRUE in CallConnected within each pair of numbers, the first phone number inside who_answered, and the duration value that corresponds to Call Connected. I've thought of using group_by and summarise but I can't figure out a way.

The desired output would look like:

from          | to           | CallConnected | WhoAnswered  | Duration
----------------------------------------------------------------------
(122) 212-3452|(700) 890-6542| 2             |(213) 344-4905| 00:03:56

Upvotes: 2

Views: 53

Answers (4)

Rushabh Patel
Rushabh Patel

Reputation: 2764

You can also do this-

> library(data.table)
> setDT(dt)[CallConnected==TRUE,.(from,to,CallConnected,who_answered,duration)][,head(.SD,1),by=.(from,to,CallConnected)]

             from             to CallConnected   who_answered duration
1: (122) 212-3452 (700) 890-6542          TRUE (213) 344-4905 00:03:56

Upvotes: 0

olorcain
olorcain

Reputation: 1248

Maybe something like this?

output <- df %>%
  filter(CallConnected == TRUE) %>%
  group_by(from, to) %>%
  summarise(duration = sum(duration), firstWhoAnswered = first(who_answered), CallConnected = n())

Upvotes: 1

Ronak Shah
Ronak Shah

Reputation: 388817

Using dplyr we can count number of CallConnected by taking sum of TRUE values, find the first value of non-"NA" who_answered column and the first TRUE value from duration.

library(dplyr)

df %>%
  group_by(from, to) %>%
  summarise(count_call = sum(CallConnected), 
            who_answered = who_answered[which.max(who_answered != "NA")],
            Duration = duration[which.max(CallConnected)])


#  from           to             count_call who_answered   Duration
#  <chr>          <chr>               <int> <chr>          <time>  
#1 (122) 212-3452 (700) 890-6542          2 (213) 344-4905 03'56" 

Upvotes: 2

A. Stam
A. Stam

Reputation: 2222

I think you were pretty close to a solution. Here's my version:

df %>%
  group_by(from, to) %>%
  filter(CallConnected) %>%
  summarise(CallConnected = n(),
            WhoAnswered = first(who_answered),
            Duration = first(duration))

Upvotes: 2

Related Questions