Reputation: 337
Trying to make a simple Tumblr scraper using node.js
var request = require('request');
var fs = require('fs');
var apiKey = 'my-key-here';
var offset = 0;
for (var i=0; i<5; i++) {
console.log('request #' + i + '...');
var requestURL = 'http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key='
+ apiKey
+ '&offset='
+ offset;
console.log(requestURL);
request(requestURL, function(error, response, body) {
if (!error && response.statusCode == 200) {
var resultAsJSON = JSON.parse(body);
resultAsJSON.response.posts.forEach(function(obj) {
fs.appendFile('content.txt', offset + ' ' + obj.title + '\n', function (err) {
if (err) return console.log(err);
});
offset++;
});
}
});
}
By default, the API only returns a maximum of 20 latest posts. I want to grab all the posts instead. As a test, I want to get the latest 100 first, hence the i<5
in the loop declaration.
The trick to do it is to use the offset
parameter. Given an offset
value of 20, for example, the API will not return the latest 20, but instead returns posts starting from the 21st from the top.
As I can't be sure that the API will always return 20 posts, I am using offset++
to get the correct offset number.
The code above works, but console.log(requestURL)
returns http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key=my-key-here&offset=0
five times.
So my question is, why does the offset
value in my requestURL
remains as 0, even though I have added offset++
?
Upvotes: 0
Views: 355
Reputation: 36975
You should increment the offset
in the loop, not in callbacks. Callbacks fire only after the request has been completed, which means you make five requests with offset = 0
and it's incremented after you get a response.
var requestURL = 'http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key='
+ apiKey
+ '&offset='
+ (offset++); // increment here, before passing URL to request();
Edit: To offset by 20 in each iteration, and use the offset in callback:
for (var i=0; i<5; i++) {
var offset = i * 20, requestURL = 'http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key='
+ apiKey
+ '&offset='
+ offset;
(function(off){
request(requestURL, function(error, response, body) {
if (!error && response.statusCode == 200) {
var resultAsJSON = JSON.parse(body);
resultAsJSON.response.posts.forEach(function(obj) {
fs.appendFile('content.txt', off + ' ' + obj.title + '\n', function (err) {
if (err) return console.log(err);
});
off++;
});
}
});
}(offset)); // pass the offset from loop to a closure
}
Upvotes: 1