Reputation: 115
I'm trying to scrape the jobs with zero experience on MonsterIndia.com, so i wrote the following code using cheerio and nodejs, i observed that i can search for php
jobs by seaching it like https://www.monsterindia.com/**php**-jobs.html
but if i want to search the php jobs with zero experience i had to add filters manually on the site, but it does not reflect in the url of the page, so how can i achieve this, i am a complete beginner in web scraping, please help.
var request = require('request');
var cheerio = require('cheerio');
const context = "php";
function scraper(context){
request('http://www.monsterindia.com/'+context+"-jobs.html", function (error, response, html) {
if (!error && response.statusCode == 200) {
console.log("Request Called");
var $ = cheerio.load(html);
var jobs = [];
var json = {title : "", link:"", description:"", };
$('a.title_in').each(function(i , element){
console.log($(this).attr('title'));
})
}
if(error){
console.log(error);
}
});
}
scraper(context);
Upvotes: 1
Views: 961
Reputation: 115
The best approach is to use .filter()
function in cheerio, following is my code which implements one such filters.
var job = $('div.job-container').filter(function (i, el) {
var exp = $(this).children('div.view-apply-
container').children('div.padding-top-5').children('div.col-md-
3.col-xs-3.col-lg-3').children('span.experience');
if (!exp){
return $('div.job-container');
}
else{
exp = exp.text().charAt(0) === '0';
return exp;
}
});
Upvotes: 0
Reputation: 1249
You can use casperjs library to achieving your aim. It is a simple example that are placed in its website to searching for a word using Google search engine and then fetching links that is placed in the first page of search result.
var links = [];
var casper = require('casper').create();
function getLinks() {
var links = document.querySelectorAll('h3.r a');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute('href');
});
}
casper.start('http://google.fr/', function() {
// Wait for the page to be loaded
this.waitForSelector('form[action="/search"]');
});
casper.then(function() {
// search for 'casperjs' from google form
this.fill('form[action="/search"]', { q: 'casperjs' }, true);
});
casper.then(function() {
// aggregate results for the 'casperjs' search
links = this.evaluate(getLinks);
// now search for 'phantomjs' by filling the form again
this.fill('form[action="/search"]', { q: 'phantomjs' }, true);
});
casper.then(function() {
// aggregate results for the 'phantomjs' search
links = links.concat(this.evaluate(getLinks));
});
casper.run(function() {
// echo results in some pretty fashion
this.echo(links.length + ' links found:');
this.echo(' - ' + links.join('\n - ')).exit();
});
Upvotes: 0