Reputation: 145
I am trying to scrape links of screenshots from https://play.google.com/store/apps/details?id=com.whatsapp&hl=en
I've used css class T75of DYfLw
of div<>
element to get the src
all of screenshots. But instead of fetching links of all screenshots, below code displays desired output (links) for first values but displays undefined for next links (src
) to be fetched.
const appUrl = 'http://play.google.com/store/apps/details?id=com.whatsapp';
request(appUrl, function (error, response, html) {
if (!error && response.statusCode === 200) {
const $ = cheerio.load(html); //cheerio
let scLinks = [];
$(".T75of.DYfLw ").each(function () {
const link = $(this);
const text = link.text();
const href = link.attr('src');
const app_full_url = href;
scLinks.push(app_full_url);
});
console.log(scLinks);
}
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div jsname="CmYpTb" class="JiLaSd u3EI9e">
<div jsname="pCbVjb" class="SgoUSc" style="transform: matrix(1, 0, 0, 1, -20, 0);">
<button class="Q4vdJd" aria-label="Open screenshot 0" jscontroller="DeWHJf" jsaction="click:O1htCb" jsname="WR0adb" data-screenshot-item-index="0">
<img src="https://lh3.googleusercontent.com/MMue08byixTw74ST_VkNQDUUJBgVEbjNHDYLhIuHmYhMIMJIp3KjVlnhhqZQOZUtNt8=w720-h310-rw" srcset="https://lh3.googleusercontent.com/MMue08byixTw74ST_VkNQDUUJBgVEbjNHDYLhIuHmYhMIMJIp3KjVlnhhqZQOZUtNt8=w1440-h620-rw 2x" class="T75of DYfLw" aria-hidden="true" alt="Screenshot Image" itemprop="image">
</button>
<button class="Q4vdJd" aria-label="Open screenshot 1" jscontroller="DeWHJf" jsaction="click:O1htCb" jsname="WR0adb" data-screenshot-item-index="1">
<img src="https://lh3.googleusercontent.com/foFmwvVGIwWWXJIukN7png18lFjFgbw3K7BqIm8G-jsFgSTVtkCa-dDkFApUzbvzIvbe=w720-h310-rw" srcset="https://lh3.googleusercontent.com/foFmwvVGIwWWXJIukN7png18lFjFgbw3K7BqIm8G-jsFgSTVtkCa-dDkFApUzbvzIvbe=w1440-h620-rw 2x" class="T75of DYfLw" aria-hidden="true" alt="Screenshot Image" itemprop="image">
</button>
<button class="Q4vdJd" aria-label="Open screenshot 2" jscontroller="DeWHJf" jsaction="click:O1htCb" jsname="WR0adb" data-screenshot-item-index="2">
<img data-ils="3" class="T75of DYfLw" aria-hidden="true" alt="Screenshot Image" itemprop="image" srcset="https://lh3.googleusercontent.com/iLgMXFO5qEPlTffpI21zaoG51AORnfu8NPb_2SdAXYgOYcb-xDOMfCryPhVvGNjoew=w1440-h620-rw 2x" src="https://lh3.googleusercontent.com/iLgMXFO5qEPlTffpI21zaoG51AORnfu8NPb_2SdAXYgOYcb-xDOMfCryPhVvGNjoew=w720-h310-rw">
</button>
<button class="Q4vdJd" aria-label="Open screenshot 3" jscontroller="DeWHJf" jsaction="click:O1htCb" jsname="WR0adb" data-screenshot-item-index="3">
<img data-ils="3" class="T75of DYfLw" aria-hidden="true" alt="Screenshot Image" itemprop="image" srcset="https://lh3.googleusercontent.com/ElfUPGX67gv0TNQXuDxQGa5a4BYnLAZIgJJmoNoARvqlLQsKEQcNNWz-J_zqGV5vzQ=w1440-h620-rw 2x" src="https://lh3.googleusercontent.com/ElfUPGX67gv0TNQXuDxQGa5a4BYnLAZIgJJmoNoARvqlLQsKEQcNNWz-J_zqGV5vzQ=w720-h310-rw">
</button>
<button class="Q4vdJd" aria-label="Open screenshot 4" jscontroller="DeWHJf" jsaction="click:O1htCb" jsname="WR0adb" data-screenshot-item-index="4">
<img data-ils="3" class="T75of DYfLw" aria-hidden="true" alt="Screenshot Image" itemprop="image" srcset="https://lh3.googleusercontent.com/wBgVA58O2xncQkksLatHGoNLI8L6o_1ZM8AC3GCScxCyaDOWOvMsMilaj1Scp3kEOIw=w1440-h620-rw 2x" src="https://lh3.googleusercontent.com/wBgVA58O2xncQkksLatHGoNLI8L6o_1ZM8AC3GCScxCyaDOWOvMsMilaj1Scp3kEOIw=w720-h310-rw">
</button>
<button class="Q4vdJd" aria-label="Open screenshot 5" jscontroller="DeWHJf" jsaction="click:O1htCb" jsname="WR0adb" data-screenshot-item-index="5">
<img data-ils="3" class="T75of DYfLw" aria-hidden="true" alt="Screenshot Image" itemprop="image" srcset="https://lh3.googleusercontent.com/nknhZ6--QSyZlohrv72BaPlwO2EHPvGKJY8NNACYY0IY8j4QKYATP6alT1yyMYb-35j9=w1440-h620-rw 2x" src="https://lh3.googleusercontent.com/nknhZ6--QSyZlohrv72BaPlwO2EHPvGKJY8NNACYY0IY8j4QKYATP6alT1yyMYb-35j9=w720-h310-rw">
</button>
</div>
</div>
Output
Upvotes: 0
Views: 74
Reputation: 1731
First lets define a recursive function which gets the data from the existing attribute of all the possible attributes one of which will exist.
function getValueofAttr(elem,attrArray,startIndex){
var data = elem.attr(attrArray[startIndex]);
if(href === undefined || href === ''){
if( attrArray.length > startIndex){
data = getValueofAttr(elem,attrArray,index+1);
}
}
return data;
}
test run example :
var link = $('.SomeSpecificControl'); //class of some html tag.
var href = getValueofAttr(link,['src','data-src','href'],0);
now for your scenario :
$(".T75of.DYfLw ").each(function () {
const link = $(this);
const text = link.text();
const href = getValueofAttr(link,['src','data-src','srcset'],0);
const app_full_url = href;
scLinks.push(app_full_url);
});
Upvotes: 1