Reputation:
I have this sample dataset (df
)
structure(list(from = c("(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452",
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452"), to = c("(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542",
"(700) 890-6542"), extension = c("9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls",
"9205 - Raw IRS Calls"), forwarded_to = c(NA, "(323) 457-2039",
"(310) 598-1753", "(818) 900-0706", "(818) 539-7811", "(213) 344-4965",
"(213) 458-7662", "(818) 208-3012", "(818) 293-0175", "(818) 284-4238",
"(818) 206-9506", "(310) 299-7340", "(310) 405-0875", "(213) 260-9113",
"(213) 805-5208", "(818) 887-3058", "(424) 271-2141", "(213) 218-6579",
"(818) 638-9466", "(213) 784-7164", "(323) 457-2038", "(213) 805-6959",
"(228) 285-7898", "(213) 341-1055", "(213) 568-0979", "(213) 344-4905",
"(818) 459-3811", NA, "(323) 457-2039", "(310) 598-1753", "(818) 900-0706",
"(818) 539-7811", "(213) 344-4965", "(213) 458-7662", "(818) 208-3012",
"(818) 293-0175", "(818) 284-4238", "(818) 206-9506", "(310) 299-7340",
"(310) 405-0875", "(213) 260-9113", "(213) 805-5208", "(818) 887-3058",
"(424) 271-2141", "(213) 218-6579", "(818) 638-9466", "(213) 784-7164",
"(323) 457-2038", "(213) 805-6959", "(228) 285-7898", "(213) 341-1055",
"(213) 568-0979", "(213) 344-4905", "(818) 459-3811"), date = c("Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018",
"Mon 12/31/2018"), time = structure(c(55440, 55440, 55440, 55440,
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440,
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55500, 55500,
55500, 55500, 55500, 55500, 55500, 55320, 55320, 55320, 55320,
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320,
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55380, 55380,
55380, 55380, 55380, 55380, 55380), class = c("hms", "difftime"
), units = "secs"), action = c("Phone Call", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe", "Phone Call", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe",
"FindMe", "FindMe", "FindMe"), action_result = c("Accepted",
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer",
"No Answer", "No Answer", "No Answer", "IP Phone Offline", "No Answer",
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer",
"No Answer", "No Answer", "No Answer", "No Answer", "Stopped",
"Stopped", "IP Phone Offline", "Stopped", "Stopped", "Call connected",
"Stopped", "Accepted", "No Answer", "No Answer", "No Answer",
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer",
"IP Phone Offline", "No Answer", "No Answer", "No Answer", "No Answer",
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer",
"No Answer", "Stopped", "Stopped", "IP Phone Offline", "Call connected",
"Stopped", "Stopped", "Stopped"), result_description = c("The call connected to and was accepted by this number.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"This DigitalLine was either not plugged in or did not have an internet connection.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.",
NA, NA, "The call connected to and was accepted by this number.",
NA, "The call connected to and was accepted by this number.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"This DigitalLine was either not plugged in or did not have an internet connection.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.",
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.",
"The call connected to and was accepted by this number.", NA,
NA, NA), duration = structure(c(297, 52, 52, 51, 51, 51, 51,
51, 51, 0, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 9, 9, 0, 9,
9, 236, 9, 71, 52, 52, 52, 51, 51, 51, 51, 51, 0, 51, 51, 51,
51, 51, 51, 51, 51, 51, 51, 7, 7, 0, 13, 7, 7, 7), class = c("hms",
"difftime"), units = "secs"), ID = c(19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L), CallConnected = c(FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), who_answered = c("NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "(213) 344-4905", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "(213) 341-1055", "NA",
"NA", "NA")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-54L))
This shows that call data of phone numbers that made the call and received the call. The full dataset has a lot more pairs of numbers, but I've only posted one pair for brevity
I'd like to find the number of TRUE
in CallConnected
within each pair of numbers, the first phone number inside who_answered
, and the duration value that corresponds to Call Connected
. I've thought of using group_by
and summarise
but I can't figure out a way.
The desired output would look like:
from | to | CallConnected | WhoAnswered | Duration
----------------------------------------------------------------------
(122) 212-3452|(700) 890-6542| 2 |(213) 344-4905| 00:03:56
Upvotes: 2
Views: 53
Reputation: 2764
You can also do this-
> library(data.table)
> setDT(dt)[CallConnected==TRUE,.(from,to,CallConnected,who_answered,duration)][,head(.SD,1),by=.(from,to,CallConnected)]
from to CallConnected who_answered duration
1: (122) 212-3452 (700) 890-6542 TRUE (213) 344-4905 00:03:56
Upvotes: 0
Reputation: 1248
Maybe something like this?
output <- df %>%
filter(CallConnected == TRUE) %>%
group_by(from, to) %>%
summarise(duration = sum(duration), firstWhoAnswered = first(who_answered), CallConnected = n())
Upvotes: 1
Reputation: 388817
Using dplyr
we can count number of CallConnected
by taking sum
of TRUE
values, find the first value of non-"NA" who_answered
column and the first TRUE
value from duration
.
library(dplyr)
df %>%
group_by(from, to) %>%
summarise(count_call = sum(CallConnected),
who_answered = who_answered[which.max(who_answered != "NA")],
Duration = duration[which.max(CallConnected)])
# from to count_call who_answered Duration
# <chr> <chr> <int> <chr> <time>
#1 (122) 212-3452 (700) 890-6542 2 (213) 344-4905 03'56"
Upvotes: 2
Reputation: 2222
I think you were pretty close to a solution. Here's my version:
df %>%
group_by(from, to) %>%
filter(CallConnected) %>%
summarise(CallConnected = n(),
WhoAnswered = first(who_answered),
Duration = first(duration))
Upvotes: 2