Reputation: 59
I have created the following script based on the information provided by this link Extract data URL with javascript (table in php)
code
library(httr) library(rvest) library(janitor) library(dplyr) library(purrr)
headers <- c("Content-Type" = "application/x-www-form-urlencoded; charset=UTF-8")
data <- "vid_tipo=1&vprod=&vvari=&vfecha=22/06/2022"
for (i in seq_along(fechas)) {
r <- httr::POST(
url = "http://old.emmsa.com.pe/emmsa_spv/app/reportes/ajax/rpt07_gettable.php",
httr::add_headers(.headers = headers),
body = data
)
t <- content(r) %>%
html_element(".timecard") %>%
html_table() %>%
row_to_names(1) %>%
clean_names() %>%
dplyr::filter(producto != "") %>%
mutate_at(vars(matches("precio")), as.numeric) %>%
as_tibble() -> precios
timestamp <- 1:seq_along(i)
filename <- paste0("c:/Users/.../Desktop/data/precios_",timestamp,".rds")
saveRDS(precios, file = filename)
}```
My problem is that this sequence that I have created by looking at other links in this page has not allowed me to obtain the following results:
1.-Scrape the page according to the sequence of dates;
2.- include the date in the file name such as "data_22-06-2022";
3.- I don't know how to link the date of the variable
`data <- "vid_tipo=1&vprod=&vvari=&vfecha=22/06/2022"`
with the sequence of one file for each date;
4.- Any improvements to the file download and save function are welcome.
Thank you
Upvotes: 0
Views: 124
Reputation: 6583
The biggest issues is probably the date formatting. Here I used map
to save multiple files of .RData
.
library(tidyverse)
library(lubridate)
library(janitor)
library(httr)
Change the from_date
and to_date
to whatever you like
"22/06/2022" %>%
as.Date(format = "%d/%m/%Y") -> from_date
"26/06/2022" %>%
as.Date(format = "%d/%m/%Y") -> to_date
dates_formatted <- seq(from_date, to_date, by = "day") %>%
format("%d/%m/%Y")
[1] "22/06/2022" "23/06/2022" "24/06/2022" "25/06/2022" "26/06/2022"
Create a function to get a data frame for one date
get_df <- function(the_date) {
headers <-
c("Content-Type" = "application/x-www-form-urlencoded; charset=UTF-8")
data <- paste0("vid_tipo=1&vprod=&vvari=&vfecha=", the_date)
r <-
httr::POST(url = "http://old.emmsa.com.pe/emmsa_spv/app/reportes/ajax/rpt07_gettable.php",
httr::add_headers(.headers = headers),
body = data)
df <- content(r) %>%
html_element(".timecard") %>%
html_table() %>%
row_to_names(1) %>%
clean_names() %>%
dplyr::filter(producto != "") %>%
mutate_at(vars(matches("precio")), as.numeric) %>%
as_tibble()
save(df, file = paste0("precios_", the_date %>%
str_replace_all(pattern = "/",
replacement = "_") %>%
paste0("data_", .), ".Rdata"))
}
# A tibble: 144 x 5
producto variedad precio_min precio_max precio_prom
<chr> <chr> <dbl> <dbl> <dbl>
1 ACELGA ACELGA 3.5 4 3.75
2 AJI AJI AMARILLO SECO 13 14 13.4
3 AJI AJI ESCABECHE FRESCO/ZANAHOR/LISO 1.5 2.7 2.18
4 AJI AJI MONTANA/CHAN(COSTA/SELVA) 5 8 6.5
5 AJI AJI SECO PANCA 18 20 18.8
6 AJI AJI ROCOTO (COSTA/SIERRA/SELVA) 9.44 11.1 10.3
7 AJI PAPRIKA 13 14 13.5
8 AJO AJO PELADO 5.5 7.5 6.63
9 AJO AJO CRIOLLO O NAPURI 6 8 6.88
10 AJO AJO MORADO/BARRAN/LEGIT/OTROS 6.5 8 7.25
# ... with 134 more rows
Map through the dates
map(dates_formatted, get_df)
library(tidyverse)
library(lubridate)
library(janitor)
library(httr)
"22/06/2022" %>%
as.Date(format = "%d/%m/%Y") -> from_date
"26/06/2022" %>%
as.Date(format = "%d/%m/%Y") -> to_date
dates_formatted <- seq(from_date, to_date, by = "day") %>%
format("%d/%m/%Y")
get_df <- function(the_date) {
headers <-
c("Content-Type" = "application/x-www-form-urlencoded; charset=UTF-8")
data <- paste0("vid_tipo=1&vprod=&vvari=&vfecha=", the_date)
r <-
httr::POST(url = "http://old.emmsa.com.pe/emmsa_spv/app/reportes/ajax/rpt07_gettable.php",
httr::add_headers(.headers = headers),
body = data)
df <- content(r) %>%
html_element(".timecard") %>%
html_table() %>%
row_to_names(1) %>%
clean_names() %>%
dplyr::filter(producto != "") %>%
mutate_at(vars(matches("precio")), as.numeric) %>%
as_tibble()
save(df, file = paste0("precios_", the_date %>%
str_replace_all(pattern = "/",
replacement = "_") %>%
paste0("data_", .), ".Rdata"))
}
map(dates_formatted, get_df)
Upvotes: 1