JassiL
JassiL

Reputation: 462

Pass a list through a function and create a summary df

I want to pass a list to a function which pulls journey information from Tfl's API (legs, walk time, duration etc) and outputs a summary df of this info.

This is an example of what my list taxi.args.new looks like (list length is 1):

enter image description here

I can run the code below for a list with length 1 only by referencing the 1st element (it works fine):


library(httr)
library(jsonlite)
library(tidyverse)

# this function works for the list taxis.args.new (calling the first element)

api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
  
  tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
  
  dat <- data.frame()
  
  for (i in 1){
    request      <- GET(url = tflpath, query = taxi.args.new[[i]])
    json_content <- fromJSON(content(request, "text"), flatten = TRUE) 

    # obtain info about journey legs
    df           <- as.data.frame(json_content$journeys$legs)

    # obtain info about journey origin and destination
    od           <- as.data.frame(json_content$journeyVector)[,2:3]

    df1          <- df[,c("duration", "mode.id")]

    # tidy the df to get summary of legs, duration, walk time etc
    df2          <- df1%>%
                     group_by(mode.id)%>%
                     summarise(legs = n(), total_dur = sum(duration))%>%
                     mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
                                 mode.id== 'walking'~ 0,
                                 mode.id== 'bus'~ 0),
                           bus_legs  = case_when( mode.id== 'tube'~ 0,
                                 mode.id== 'walking'~ 0,
                                 mode.id== 'bus'~ as.numeric(legs)),
                           walk_legs = case_when(mode.id== 'tube'~ 0,
                                 mode.id== 'walking'~ as.numeric(legs),
                                 mode.id== 'bus'~ 0),
                           walk_dur  = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
                    ungroup()%>%
                    select(-mode.id)%>%
                    summarise(total_legs  = sum(legs),
                              total_dur   = sum(total_dur),
                              tube_legs   = sum(tube_legs),
                              bus_legs    = sum(bus_legs),
                              walk_legs   = sum(walk_legs),
                              walk_dur    = sum(walk_dur))%>%
                    cbind(od)%>%
                    cbind(min(df$departureTime), max(df$arrivalTime))%>%
                    rename(dep_time = "min(df$departureTime)" ,  arrival_time = "max(df$arrivalTime)")%>%
                    select(from, to, dep_time, arrival_time, tube_legs,
                    bus_legs, walk_legs, total_legs, walk_dur, total_dur)
                  
          dat <- append(dat, df2) 
          }
return(dat)
}

## this returns a df which summarises jny info 
test_df <- as.data.frame(do.call(api_function, taxi.args.new[[1]]))

This is what test_df looks like:

enter image description here

However, I want to scale up and do this for a list of length 200k and cannot figure out how to do this. I want the output to be a df with all results appended together. So it'd look a bit like this (example with just 2 rows):

enter image description here

Any help would be greatly appreciated. You can probably tell I had already tried to so something (hence the for loop in the code)

UPDATE:

Thanks so much to Ronak for his suggestion. Here is the final result tidied up:

api_function <- function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
  
  tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
  
  request      <- GET(url = tflpath, 
                      query = list( from                     =from,
                                    to                       =to,
                                    date                     =date,
                                    time                     =time,
                                    timeIs                   =timeIs,
                                    journeyPreference        =journeyPreference,
                                    accessibilityPreference  =accessibilityPreference,
                                    walkingSpeed             =walkingSpeed ,
                                    cyclePreference          =cyclePreference,
                                    bikeProficiency          =bikeProficiency))
  json_content <- fromJSON(content(request, "text"), flatten = TRUE) 
  # obtain info about journey legs
  x            <- flatten(json_content$journeys$legs)
  # obtain info about mode and duration of each leg
  df1          <- data.frame(duration = x$duration, mode.id  = x$mode.id)
  # obtain info about journey origin and destination
  od           <- as.data.frame(json_content$journeyVector)[,2:3]
  
  df2 <- df1%>%
    group_by(mode.id)%>%
    summarise(legs = n(), total_dur = sum(duration))%>%
    mutate(tube_legs       = case_when(mode.id== 'tube'~ as.numeric(legs) ,
                                       mode.id== 'walking'~ 0,
                                       mode.id== 'bus'~ 0,
                                       mode.id== 'overground'~ 0),
           bus_legs        = case_when(mode.id== 'tube'~ 0,
                                       mode.id== 'walking'~ 0,
                                       mode.id== 'overground'~ 0,
                                       mode.id== 'bus'~ as.numeric(legs)),
           walk_legs       = case_when(mode.id== 'tube'~ 0,
                                       mode.id== 'walking'~ as.numeric(legs),
                                       mode.id== 'bus'~ 0,
                                       mode.id== 'overground'~ 0),
           overground_legs = case_when(mode.id== 'tube'~ 0,
                                       mode.id== 'walking'~ 0,
                                       mode.id== 'bus'~ 0,
                                       mode.id== 'overground'~ as.numeric(legs)),
           walk_dur        = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
    ungroup()%>%
    select(-mode.id)%>%
    summarise(total_legs      = sum(legs),
              total_dur       = sum(total_dur),
              tube_legs       = sum(tube_legs),
              bus_legs        = sum(bus_legs),
              overground_legs = sum(overground_legs),
              walk_legs       = sum(walk_legs),
              walk_dur        = sum(walk_dur))%>%
    cbind(od)%>%
    cbind(min(x$departureTime), max(x$arrivalTime))%>%
    rename(dep_time = "min(x$departureTime)" ,  arrival_time = "max(x$arrivalTime)")%>%
    select(from, to, dep_time, arrival_time, tube_legs,
           bus_legs, walk_legs, overground_legs, total_legs, walk_dur, total_dur)
  return(df2)
}

results <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x) do.call(api_function, taxi.args.new[[x]])))

Upvotes: 1

Views: 70

Answers (1)

Ronak Shah
Ronak Shah

Reputation: 389325

You can try with lapply :

result <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x) 
                  do.call(api_function, c(taxi.args.new[[x]], x))))

Or with purrr :

library(purrr)
result <- map_df(seq_along(taxi.args.new), 
                 ~invoke(api_function, c(taxi.args.new[[x]], .x)))

I have changed function a bit and added another argument i to it.

api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency, i){
  tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
    request      <- GET(url = tflpath, query = taxi.args.new[[i]])
    json_content <- fromJSON(content(request, "text"), flatten = TRUE) 
    # obtain info about journey legs
    df           <- as.data.frame(json_content$journeys$legs)
    # obtain info about journey origin and destination
    od           <- as.data.frame(json_content$journeyVector)[,2:3]
    df1          <- df[,c("duration", "mode.id")]
    # tidy the df to get summary of legs, duration, walk time etc
    df2          <- df1%>%
      group_by(mode.id)%>%
      summarise(legs = n(), total_dur = sum(duration))%>%
      mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
                                   mode.id== 'walking'~ 0,
                                   mode.id== 'bus'~ 0),
             bus_legs  = case_when( mode.id== 'tube'~ 0,
                                    mode.id== 'walking'~ 0,
                                    mode.id== 'bus'~ as.numeric(legs)),
             walk_legs = case_when(mode.id== 'tube'~ 0,
                                   mode.id== 'walking'~ as.numeric(legs),
                                   mode.id== 'bus'~ 0),
             walk_dur  = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
      ungroup()%>%
      select(-mode.id)%>%
      summarise(total_legs  = sum(legs),
                total_dur   = sum(total_dur),
                tube_legs   = sum(tube_legs),
                bus_legs    = sum(bus_legs),
                walk_legs   = sum(walk_legs),
                walk_dur    = sum(walk_dur))%>%
      cbind(od)%>%
      cbind(min(df$departureTime), max(df$arrivalTime))%>%
      rename(dep_time = "min(df$departureTime)" ,  arrival_time = "max(df$arrivalTime)")%>%
      select(from, to, dep_time, arrival_time, tube_legs,
             bus_legs, walk_legs, total_legs, walk_dur, total_dur)
    return(df2)
}

Upvotes: 1

Related Questions