Reputation: 462
I want to pass a list to a function which pulls journey information from Tfl's API (legs, walk time, duration etc) and outputs a summary df of this info.
This is an example of what my list taxi.args.new
looks like (list length is 1):
I can run the code below for a list with length 1 only by referencing the 1st element (it works fine):
library(httr)
library(jsonlite)
library(tidyverse)
# this function works for the list taxis.args.new (calling the first element)
api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
dat <- data.frame()
for (i in 1){
request <- GET(url = tflpath, query = taxi.args.new[[i]])
json_content <- fromJSON(content(request, "text"), flatten = TRUE)
# obtain info about journey legs
df <- as.data.frame(json_content$journeys$legs)
# obtain info about journey origin and destination
od <- as.data.frame(json_content$journeyVector)[,2:3]
df1 <- df[,c("duration", "mode.id")]
# tidy the df to get summary of legs, duration, walk time etc
df2 <- df1%>%
group_by(mode.id)%>%
summarise(legs = n(), total_dur = sum(duration))%>%
mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0),
bus_legs = case_when( mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ as.numeric(legs)),
walk_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ as.numeric(legs),
mode.id== 'bus'~ 0),
walk_dur = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
ungroup()%>%
select(-mode.id)%>%
summarise(total_legs = sum(legs),
total_dur = sum(total_dur),
tube_legs = sum(tube_legs),
bus_legs = sum(bus_legs),
walk_legs = sum(walk_legs),
walk_dur = sum(walk_dur))%>%
cbind(od)%>%
cbind(min(df$departureTime), max(df$arrivalTime))%>%
rename(dep_time = "min(df$departureTime)" , arrival_time = "max(df$arrivalTime)")%>%
select(from, to, dep_time, arrival_time, tube_legs,
bus_legs, walk_legs, total_legs, walk_dur, total_dur)
dat <- append(dat, df2)
}
return(dat)
}
## this returns a df which summarises jny info
test_df <- as.data.frame(do.call(api_function, taxi.args.new[[1]]))
This is what test_df
looks like:
However, I want to scale up and do this for a list of length 200k and cannot figure out how to do this. I want the output to be a df with all results appended together. So it'd look a bit like this (example with just 2 rows):
Any help would be greatly appreciated. You can probably tell I had already tried to so something (hence the for loop in the code)
UPDATE:
Thanks so much to Ronak for his suggestion. Here is the final result tidied up:
api_function <- function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency){
tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
request <- GET(url = tflpath,
query = list( from =from,
to =to,
date =date,
time =time,
timeIs =timeIs,
journeyPreference =journeyPreference,
accessibilityPreference =accessibilityPreference,
walkingSpeed =walkingSpeed ,
cyclePreference =cyclePreference,
bikeProficiency =bikeProficiency))
json_content <- fromJSON(content(request, "text"), flatten = TRUE)
# obtain info about journey legs
x <- flatten(json_content$journeys$legs)
# obtain info about mode and duration of each leg
df1 <- data.frame(duration = x$duration, mode.id = x$mode.id)
# obtain info about journey origin and destination
od <- as.data.frame(json_content$journeyVector)[,2:3]
df2 <- df1%>%
group_by(mode.id)%>%
summarise(legs = n(), total_dur = sum(duration))%>%
mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0,
mode.id== 'overground'~ 0),
bus_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'overground'~ 0,
mode.id== 'bus'~ as.numeric(legs)),
walk_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ as.numeric(legs),
mode.id== 'bus'~ 0,
mode.id== 'overground'~ 0),
overground_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0,
mode.id== 'overground'~ as.numeric(legs)),
walk_dur = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
ungroup()%>%
select(-mode.id)%>%
summarise(total_legs = sum(legs),
total_dur = sum(total_dur),
tube_legs = sum(tube_legs),
bus_legs = sum(bus_legs),
overground_legs = sum(overground_legs),
walk_legs = sum(walk_legs),
walk_dur = sum(walk_dur))%>%
cbind(od)%>%
cbind(min(x$departureTime), max(x$arrivalTime))%>%
rename(dep_time = "min(x$departureTime)" , arrival_time = "max(x$arrivalTime)")%>%
select(from, to, dep_time, arrival_time, tube_legs,
bus_legs, walk_legs, overground_legs, total_legs, walk_dur, total_dur)
return(df2)
}
results <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x) do.call(api_function, taxi.args.new[[x]])))
Upvotes: 1
Views: 70
Reputation: 389325
You can try with lapply
:
result <- do.call(rbind, lapply(seq_along(taxi.args.new), function(x)
do.call(api_function, c(taxi.args.new[[x]], x))))
Or with purrr
:
library(purrr)
result <- map_df(seq_along(taxi.args.new),
~invoke(api_function, c(taxi.args.new[[x]], .x)))
I have changed function a bit and added another argument i
to it.
api_function <-function(from, to, date, time, timeIs, journeyPreference, accessibilityPreference, walkingSpeed, cyclePreference, bikeProficiency, i){
tflpath <- "https://api.tfl.gov.uk/Journey/JourneyResults/{from}/to/{to}?date=[date]&time=[time]&timeIs=[timeIs]&journeyPreference=[journeyPreference]&mode=tube%2Cbus%2Coverground&accessibilityPreference=[accessibilityPreference]&walkingSpeed=[walkingSpeed]&cyclePreference=[cyclePreference]&bikeProficiency=[bikeProficiency]"
request <- GET(url = tflpath, query = taxi.args.new[[i]])
json_content <- fromJSON(content(request, "text"), flatten = TRUE)
# obtain info about journey legs
df <- as.data.frame(json_content$journeys$legs)
# obtain info about journey origin and destination
od <- as.data.frame(json_content$journeyVector)[,2:3]
df1 <- df[,c("duration", "mode.id")]
# tidy the df to get summary of legs, duration, walk time etc
df2 <- df1%>%
group_by(mode.id)%>%
summarise(legs = n(), total_dur = sum(duration))%>%
mutate(tube_legs = case_when(mode.id== 'tube'~ as.numeric(legs) ,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ 0),
bus_legs = case_when( mode.id== 'tube'~ 0,
mode.id== 'walking'~ 0,
mode.id== 'bus'~ as.numeric(legs)),
walk_legs = case_when(mode.id== 'tube'~ 0,
mode.id== 'walking'~ as.numeric(legs),
mode.id== 'bus'~ 0),
walk_dur = case_when(mode.id== 'walking' ~ as.numeric(total_dur), TRUE ~ 0))%>%
ungroup()%>%
select(-mode.id)%>%
summarise(total_legs = sum(legs),
total_dur = sum(total_dur),
tube_legs = sum(tube_legs),
bus_legs = sum(bus_legs),
walk_legs = sum(walk_legs),
walk_dur = sum(walk_dur))%>%
cbind(od)%>%
cbind(min(df$departureTime), max(df$arrivalTime))%>%
rename(dep_time = "min(df$departureTime)" , arrival_time = "max(df$arrivalTime)")%>%
select(from, to, dep_time, arrival_time, tube_legs,
bus_legs, walk_legs, total_legs, walk_dur, total_dur)
return(df2)
}
Upvotes: 1