Reputation: 43
I'm trying to catalog and track all types of coupons (title, picture, description, expiration as well as what filter it belongs to) using R. I think it's javascript so the basic scraping tools are not working.
Is there a way to stay in R and do this (not proficient in other systems)
Tried following the below - but couldn't seem to get it to work
https://datascienceplus.com/scraping-javascript-rendered-web-content-using-r/
EDIT
library(rvest)
coupon <- read_html("kroger.com/cl/coupons/")
coupon <- coupon %>% + html_nodes(".Text--bold") %>%
html_text()
coupon
Also have tried this:
#Loading both the required libraries
library(rvest)
library(V8)
#URL with js-rendered content to be scraped
link <- 'kroger.com/cl/coupons/'
#Read the html page content and extract all javascript codes that are inside a list
emailjs <- read_html(kroger.com/cl/coupons) %>% html_nodes('li') %>%
html_nodes('script') %>% html_text()
# Create a new v8 context
ct <- v8()
#parse the html content from the js output and print it as text
read_html(ct$eval(gsub('document.write','',emailjs))) %>% html_text()
Upvotes: 3
Views: 4658
Reputation: 6106
Though it uses javascript, it sends JSON. You can aviod using javascript by using the hidden api:
library(rvest)
library(jsonlite)
my_url <- "https://www.kroger.com/cl/api/coupons?couponsCountPerLoad=418&sortType=relevance&newCoupons=false" #hidden api
pagesource <- read_html(my_url)
content<- pagesource %>% html_node("p") %>% html_text()
data <- fromJSON(content)
mydata <- data$data$coupons
> glimpse(mydata)
Observations: 418
Variables: 19
$ id <int> 2149194, 2149191, 2127870, 2129277, 2128587, 2126349, 2121480, 2128278, 2157633, 2169615, 2159613, 2140047, 2159769, 2167485, 2141526...
$ brandName <chr> "Other", "Other", "Store Brand", "Store Brand", "Store Brand", "Store Brand", "Sargento", "Hallmark", "Colgate", "Oscar Mayer", "Kett...
$ longDescription <chr> "Selling or purchasing fuel points is prohibited. Fuel redemption offer cannot be combined with any other discounts. No discounts to ...
$ shortDescription <chr> "Get 4x FUEL Points on FRI - SAT - SUN Only", "Get 4x FUEL Points on FRI - SAT - SUN Only", "2x Fuel Points", "Save $0.50 on 2 Kroger...
$ requirementDescription <chr> "when you buy a participating gift card. *Restrictions apply, see store for details.", "when you buy a $25, $50 or $100 Mastercard® o...
$ categories <list> ["Gift Cards", "Gift Cards", "General", "Snacks", <"Promotions", "Frozen">, "General", "Dairy", "General", <"Baking Goods", "Health ...
$ expirationDate <chr> "2018-05-13T04:00:00Z", "2018-05-13T04:00:00Z", "2018-07-29T04:00:00Z", "2018-05-26T04:00:00Z", "2018-05-26T04:00:00Z", "2018-05-29T0...
$ lastRedemptionDate <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "...
$ displayStartDate <chr> "2018-05-07T04:00:00Z", "2018-05-07T04:00:00Z", "2018-04-30T04:00:00Z", "2018-04-18T04:00:00Z", "2018-04-18T04:00:00Z", "2018-05-02T0...
$ imageUrl <chr> "https://cdnws.softcoin.com/mediaCache/ecoupon_1585374.png", "https://cdnws.softcoin.com/mediaCache/ecoupon_1585365.png", "https://cd...
$ krogerCouponNumber <chr> "800000013010", "800000013711", "10000008220", "800000012111", "800000012554", "800000014782", "800000015150", "800000022503", "80000...
$ addedToCard <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, ...
$ canBeAddedToCard <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T...
$ canBeRemoved <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T...
$ filterTags <list> [<"FT4XGRAD", "FTBL4XGRADFM", "FTBL4XGRAD", "FTBL4XMOMGC", "FTBL4XMOM1", "4XGCWEEKEND", "FTBL4XGRAD2", "KPF">, <"FTBL4XGRAD1", "4XGC...
$ title <chr> "Get 4x FUEL Points on FRI - SAT - SUN Only", "Get 4x FUEL Points on FRI - SAT - SUN Only", "2x Fuel Points", "Save 50¢", "Save 50¢",...
$ displayDescription <chr> "", "", "", "on 2 Kroger Potato Chips", "on 2 Kroger Deluxe Ice Cream", "", "on Sargento® Blends™ Slices", "on 2 Hallmark Cards", "on...
$ redemptionsAllowed <int> -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, -1, 1, -1, 1, 1, 1, 1, 1, -1, 1, 5, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ value <dbl> 1.00, 1.00, 1.00, 0.50, 0.50, 20.00, 0.75, 1.00, 0.50, 1.25, 1.00, 0.50, 1.49, 1.00, 1.00, 1.00, 0.75, 2.00, 0.50, 0.50, 1.00, 1.00, ...
Upvotes: 3