Reputation: 309
Is it possible to find a href
in a website that has a certain file extension. for example it would print http://www.test.com/something.mp3
http://www.test.com/somelinktoamuscifile.mp3
http://www.test.com/music.mp3
.
It would show all of links, with a file extension of .mp3
for example.
would you do
var extension = ".mp3"
var checker = url + extension
if(url == checker){console.log(url);}
Upvotes: 1
Views: 3998
Reputation: 1745
So you want to extract all links that contain a certain string from any given url? Maybe this script will help you:
var request = require('request');
var cheerio = require('cheerio');
var url = "http://www.stackoverflow.com";
var toFind = "delete" //use file extension or whatever you want to find
request(url, function(err, resp, body) {
if (err) throw err;
var $ = cheerio.load(body);
$('a').each(function (i, element) {
var a = $(this);
//console.log(a.attr('href'));
var href = a.attr('href');
if (href && href.indexOf(toFind) != -1) {
console.log(href);
}
})
})
Output:
$ node scraping.js
http://ux.stackexchange.com/questions/49991/should-yes-delete-it-be-red-or-green
Just change the content of url
and toFind
. There is a good tutorial on web scraping here and here. Of course this can be done in a lot of different programming languages. I merely used javascript because you tagged it that way.
Upvotes: 2
Reputation: 4227
Here is a native javascript solution that works in current browsers (IE8+, Chrome, Firefox) without jQuery.
function getLinksWithExtension(extension) {
var links = document.querySelectorAll('a[href$="' + extension + '"]'),
i;
for (i=0; i<links.length; i++){
console.log(links[i]);
}
}
Upvotes: 2
Reputation: 1802
I think it goes like this:
var mp3_extension = '.mp3';
var url_string = url.split('.');
var url_extension = url_string[url_string.length-1];
if(url_extension === mp3_extension){
//go go go!!!
}
Upvotes: 0