Carlos Garibotto
Carlos Garibotto

Reputation: 59

I need to create a map (or loop) function to be able to save data from one list of dates

I have created the following script based on the information provided by this link Extract data URL with javascript (table in php)

code library(httr) library(rvest) library(janitor) library(dplyr) library(purrr)


headers <- c("Content-Type" = "application/x-www-form-urlencoded; charset=UTF-8")

data <- "vid_tipo=1&vprod=&vvari=&vfecha=22/06/2022"

for (i in seq_along(fechas)) {
  
    r <- httr::POST(
    url = "http://old.emmsa.com.pe/emmsa_spv/app/reportes/ajax/rpt07_gettable.php",
    httr::add_headers(.headers = headers),
    body = data
  )
  
  t <- content(r) %>%
    html_element(".timecard") %>%
    html_table() %>%
    row_to_names(1) %>%
    clean_names() %>%
    dplyr::filter(producto != "") %>%
    mutate_at(vars(matches("precio")), as.numeric) %>% 
    as_tibble() -> precios
  
    timestamp <- 1:seq_along(i) 
    filename  <- paste0("c:/Users/.../Desktop/data/precios_",timestamp,".rds")
    saveRDS(precios, file = filename)
    }```

My problem is that this sequence that I have created by looking at other links in this page has not allowed me to obtain the following results:

1.-Scrape the page according to the sequence of dates;
2.- include the date in the file name such as "data_22-06-2022";
3.- I don't know how to link the date of the variable
 `data <- "vid_tipo=1&vprod=&vvari=&vfecha=22/06/2022"` 
with the sequence of one file for each date;
4.- Any improvements to the file download and save function are welcome.

Thank you

Upvotes: 0

Views: 124

Answers (1)

HoelR
HoelR

Reputation: 6583

The biggest issues is probably the date formatting. Here I used map to save multiple files of .RData.

library(tidyverse)
library(lubridate)
library(janitor)
library(httr)

Change the from_date and to_date to whatever you like

"22/06/2022" %>%
  as.Date(format = "%d/%m/%Y") -> from_date

"26/06/2022" %>%
  as.Date(format = "%d/%m/%Y") -> to_date

dates_formatted <- seq(from_date, to_date, by = "day") %>%
  format("%d/%m/%Y")

[1] "22/06/2022" "23/06/2022" "24/06/2022" "25/06/2022" "26/06/2022"

Create a function to get a data frame for one date

get_df <- function(the_date) {
  headers <-
    c("Content-Type" = "application/x-www-form-urlencoded; charset=UTF-8")
  data <- paste0("vid_tipo=1&vprod=&vvari=&vfecha=", the_date)
  
  r <-
    httr::POST(url = "http://old.emmsa.com.pe/emmsa_spv/app/reportes/ajax/rpt07_gettable.php",
               httr::add_headers(.headers = headers),
               body = data)
  
  df <- content(r) %>%
    html_element(".timecard") %>%
    html_table() %>%
    row_to_names(1) %>%
    clean_names() %>%
    dplyr::filter(producto != "") %>%
    mutate_at(vars(matches("precio")), as.numeric) %>%
    as_tibble()
  
  save(df, file = paste0("precios_", the_date %>% 
                              str_replace_all(pattern = "/", 
                                          replacement = "_") %>% 
                              paste0("data_", .), ".Rdata"))
}

# A tibble: 144 x 5
   producto variedad                          precio_min precio_max precio_prom
   <chr>    <chr>                                  <dbl>      <dbl>       <dbl>
 1 ACELGA   ACELGA                                  3.5         4          3.75
 2 AJI      AJI AMARILLO SECO                      13          14         13.4 
 3 AJI      AJI ESCABECHE FRESCO/ZANAHOR/LISO       1.5         2.7        2.18
 4 AJI      AJI MONTANA/CHAN(COSTA/SELVA)           5           8          6.5 
 5 AJI      AJI SECO PANCA                         18          20         18.8 
 6 AJI      AJI ROCOTO (COSTA/SIERRA/SELVA)         9.44       11.1       10.3 
 7 AJI      PAPRIKA                                13          14         13.5 
 8 AJO      AJO PELADO                              5.5         7.5        6.63
 9 AJO      AJO CRIOLLO O NAPURI                    6           8          6.88
10 AJO      AJO MORADO/BARRAN/LEGIT/OTROS           6.5         8          7.25
# ... with 134 more rows

Map through the dates

map(dates_formatted, get_df)

enter image description here

library(tidyverse)
library(lubridate)
library(janitor)
library(httr)


"22/06/2022" %>%
  as.Date(format = "%d/%m/%Y") -> from_date

"26/06/2022" %>%
  as.Date(format = "%d/%m/%Y") -> to_date

dates_formatted <- seq(from_date, to_date, by = "day") %>%
  format("%d/%m/%Y")


get_df <- function(the_date) {
  headers <-
    c("Content-Type" = "application/x-www-form-urlencoded; charset=UTF-8")
  data <- paste0("vid_tipo=1&vprod=&vvari=&vfecha=", the_date)
  
  r <-
    httr::POST(url = "http://old.emmsa.com.pe/emmsa_spv/app/reportes/ajax/rpt07_gettable.php",
               httr::add_headers(.headers = headers),
               body = data)
  
  df <- content(r) %>%
    html_element(".timecard") %>%
    html_table() %>%
    row_to_names(1) %>%
    clean_names() %>%
    dplyr::filter(producto != "") %>%
    mutate_at(vars(matches("precio")), as.numeric) %>%
    as_tibble()
  
  save(df, file = paste0("precios_", the_date %>% 
                              str_replace_all(pattern = "/", 
                                          replacement = "_") %>% 
                              paste0("data_", .), ".Rdata"))
}

map(dates_formatted, get_df)

Upvotes: 1

Related Questions