Reputation: 1263
I used to build web apps in PHP, thus the habit to do things synchronously.
I'm currently trying to build a web scraper. The way it works is
However, I've realized that most of the calls are synchronous and I had a hard time understanding async module in nodejs.
This is the main method.
var proxyChecker = require('proxy-checker');
var request = require('request');
var forEach = require('async-foreach').forEach;
var async = require('async');
var proxiesJar = [];
var goodProxies = [];
var proxyCount = 0;
parseProxiesList(function(error) {
async.each(proxiesJar, checker, function(err, result) {
console.log('Result:' + err);
});
});
Getting proxy list
function parseProxiesList(callback) {
console.log("parseProxiesList");
request('http://hidden.com', function (error, response, body) {
if (error) {
console.log("Error [1]");
return callback(error);
}
console.log("Got proxies list");
if (!error && response.statusCode == 200) {
proxies = body.split(/\r?\n/);
var shouldBreak = false;
for (var i = 0; i < proxies.length; i++) {
if (/[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\:[0-9]{1,5}/.test(proxies[i])) {
p = proxies[i].split(' ');
var elts = p[0].split(':');
var host = elts[0];
var port = elts[1];
proxiesJar.push(host + ":" + port);
}
}
callback(null, 1);
}
});
}
After getting the proxy list, it checks if the proxy is working.
var checker = function(proxy, callback) {
var p = proxy.split(':');
var host = p[0];
var port = p[1];
console.log('[Checking] ' + host + ':' + port);
proxyChecker.checkProxy(host, port, {url: 'http://google.com',regex: /Google/}, function(host, port, ok, statusCode, err) {
if(!ok) {
console.log("Proxy don't work: " + host + ":" + port);
return callback(err);
} else {
console.log("Working proxy: " + host + ":" + port);
goodProxies.push(host + ":" + port);
return callback(null, host + ":" + port);
}
});
};
The logs, however, turns out to be
[Checking] 1.1.1.1:80
[Checking] 2.2.2.2:80
.
.
.
Working proxy: 1.1.1.1:80
Working proxy: 2.2.2.2:80
instead of
[Checking] 1.1.1.1:80
Working proxy: 1.1.1.1:80
[Checking] 2.2.2.2:80
Working proxy: 2.2.2.2:80
Upvotes: 0
Views: 368
Reputation: 7666
You can try promises, creating a promise to get a proxy and then a execute a promise to check it.
You can find more on promises over here
Example:
function parseProxiesList() {
var deferred = Q.defer()
console.log("parseProxiesList");
request('http://hidden.com', function (error, response, body) {
if (error) {
console.log("Error [1]");
deferred.reject(error);
}
console.log("Got proxies list");
if (!error && response.statusCode == 200) {
proxies = body.split(/\r?\n/);
var shouldBreak = false;
for (var i = 0; i < proxies.length; i++) {
if (/[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\:[0-9]{1,5}/.test(proxies[i])) {
p = proxies[i].split(' ');
var elts = p[0].split(':');
var host = elts[0];
var port = elts[1];
proxiesJar.push(host + ":" + port);
}
}
deffered.resolve(proxiesJar);
}
return deffered.promise;
});
}
This way you have created a promise to get a proxy list. Similarly you can do it for checking proxy.
Upvotes: 0
Reputation: 457
async.each executes iterators for each item in parallel.
Use async.eachSeries for synchronous calls.
Upvotes: 1