Reputation: 143
I'm recently diving into nodejs (and using nightmare.js) to parse a website and am having issues with the callback functions and displaying the returned results. I'm trying to call a separate function in another function, but can't seem to return any results. They all return undefined. Any help on this is greatly appreciated.
function getDetails(loadURL, callback){
nightmare.goto(loadURL)
.wait(2000)
.evaluate(function(){
var gigs = [];
$('.hidden-xs .used-vehicle').each(function(){
item = {}
item["year"] = $(this).attr('data-year')
item["make"] = $(this).attr('data-make')
item["model"] = $(this).attr('data-model')
item["body"] = $(this).attr('data-body')
item["color"] = $(this).attr('data-ext-color')
item["trim"] = $(this).attr('data-trim')
item["mileage"] = $(this).attr('data-mileage')
item["transmission"] = $(this).attr('data-transmission')
item["vin"] = $(this).find(".vehicle-overview").attr('id')
item["title"] = $(this).find(".vehicle-overview h2 a").text()
item["link"] = $(this).find(".vehicle-overview h2 a").attr('href')
item["price"] = $(this).find(".vehicle-content .price").text()
gigs.push(item)
})
return gigs
})
.end()
.then(function(result){
var returnString = '';
for(gig in result){
returnString = returnString + result[gig].title + " " + result[gig].link + " " + result[gig].year + " " + result[gig].make + " " + result[gig].model + " " + result[gig].body + " " + result[gig].color + " " + result[gig].trim + " " + result[gig].transmission + " " + result[gig].vin + " " + result[gig].price + "\n"
}
callback(returnString)
})
}
// We will need to get the total amount of pages that we need to parse
function getInventory(sURL, callback){
nightmare.goto(sURL)
.wait(2000)
.evaluate(function(){
totals = [];
items = {}
totalCars = $('.total-found .count').text()
carsOnPage = $('.hidden-xs .used-vehicle').size()
items['carTotal'] = totalCars
items['onPage'] = carsOnPage
var pageCalc = (totalCars / carsOnPage)
items['tPages'] = Math.ceil(pageCalc)
totals.push(items)
return totals
})
.end()
.then(function(result){
var totalCars = '';
var totalPages = '';
for (item in result){
totalPages = result[item].tPages
totalCars = result[item].carTotal
}
counter = 0;
newURL = '';
returnDetails = '';
for (i =0; i < totalPages; i++){
if (i == 0){
newURL = sURL;
} else {
counter = i + 1;
newURL = sURL + "#action=im_ajax_call&perform=get_results&_post_id=5&page=" + counter + "&show_all_filters=false";
}
//console.log(newURL)
getINV = getDetails(newURL, function(returnString){
callback(returnString)
})
returnDetails = returnDetails + getINV
}
callback(returnDetails)
})
}
getInventory(startURL, function(result){
console.log(result)
})
Upvotes: 0
Views: 501
Reputation: 143
Learning promises and callbacks with asynchronous functions is quite an undertaking. I was able to get this to work with the following. Thank you all for your help and direction. Each answer sent me down a different rabbit hole to ultimately find the solution.
function getInventory(sURL){
nightmare.goto(sURL)
.wait(2000)
.evaluate(function(){
totals = [];
items = {}
totalCars = $('.total-found .count').text()
carsOnPage = $('.hidden-xs .used-vehicle').size()
items['carTotal'] = totalCars
items['onPage'] = carsOnPage
var pageCalc = (totalCars / carsOnPage)
items['tPages'] = Math.ceil(pageCalc)
totals.push(items)
return totals
})
.then(result => {
var totalCars = '';
var totalPages = '';
for (item in result){
totalPages = result[item].tPages
totalCars = result[item].carTotal
}
counter = 0;
let links = [];
let returnLinks = '';
newURL = '';
for (i = 0; i < totalPages; i++){
if (i == 0){
newURL = sURL;
} else {
counter = i + 1;
newURL = sURL + "#action=im_ajax_call&perform=get_results&_post_id=5&page=" + counter + "&show_all_filters=false";
}
links.push(newURL);
}
return links;
})
.then(results => {
var arrayLinks = results;
arrayLinks.reduce(function(accumulator, url){
return accumulator.then(function(newResults){
return nightmare.goto(url)
.wait(5000)
.evaluate(() => {
const gigs = [];
$(".hidden-xs .used-vehicle").each(function() {
item = {};
item["year"] = $(this).attr("data-year");
item["make"] = $(this).attr("data-make");
item["model"] = $(this).attr("data-model");
item["body"] = $(this).attr("data-body");
item["color"] = $(this).attr("data-ext-color");
item["trim"] = $(this).attr("data-trim");
item["mileage"] = $(this).attr("data-mileage");
item["transmission"] = $(this).attr("data-transmission");
item["vin"] = $(this).find(".vehicle-overview").attr("id");
item["title"] = $(this).find(".vehicle-overview h2 a").text();
item["link"] = $(this).find(".vehicle-overview h2 a").attr("href");
item["price"] = $(this).find(".vehicle-content .price").text();
gigs.push(item);
});
return gigs;
})
.then(detail => {
for (gig in detail) {
try {
var carVin = detail[gig].vin;
var carTitle = detail[gig].title;
var carDescrip = detail[gig].year + " " + detail[gig].make + " " + detail[gig].model;
var carURL = detail[gig].link;
var carMake = detail[gig].make;
var carModel = detail[gig].model;
var carYear = detail[gig].year;
var carMileageFull = detail[gig].mileage;
var carMileage = carMileageFull.replace(',', '');
var carTransmission = detail[gig].transmission;
var carBody = detail[gig].body;
var carPriceFull = detail[gig].price;
var carPriceFull = carPriceFull.replace('$', '');
var carPriceFull = carPriceFull.replace('*', '');
var carPriceFull = carPriceFull.replace(',', '');
var carPrice = carPriceFull.trim();
var dealerAddress = "{addr1: '"+ addressFull.addr1 +"', city: '"+ addressFull.city +"', region: '"+ addressFull.region +"', postal_code: '"+ addressFull.postal_code +"', country: '"+ addressFull.country +"'}";
var dealerLat = latLongFull.latitude;
var dealerLong = latLongFull.longitude;
var carColor = detail[gig].color;
arrSetup = [carVin, carTitle, carDescrip, carURL, carMake, carModel, carYear, carMileage, 'MI', '', '', 'AUTOMATIC', 'GASOLINE', 'OTHER', 'Other', carVin, 'OTHER', carPrice + " USD", dealerAddress, carColor, carPrice + " USD", 'AVAILABLE', 'USED', dealerLat, dealerLong];
newResults.push(arrSetup);
}
catch(error){
returnString += error;
}
}
return newResults;
})
.catch(error => {
throw new Error(error);
});
});
}, Promise.resolve([]))
.then(function(finalCall){
/*
We need to get the 3rd image on every vdp in the array. We will need to create a loop, go to the page, get the image and properly insert it into the proper array index
*/
finalCall.reduce(function(accumulator, resultArray){
return accumulator.then(function(finalResults){
var vdp = resultArray[3];
return nightmare.goto(vdp)
.wait(500)
.evaluate(() => {
var thirdIMG = $('.gallery-thumbs .owl-item:nth-of-type(3) img').attr('src');
return thirdIMG;
})
.then(imgResult => {
// 9
resultArray.splice(9, 1, imgResult);
console.log(resultArray);
finalResults.push(resultArray);
return finalResults;
})
.catch(error => {
throw new Error(error);
});
});
}, Promise.resolve([]))
.then(finalInsert => {
const csvWriter = createCsvWriter({
header: ["vehicle_id", "title", "description", "url", "make", "model", "year", "mileage.value", "mileage.unit", "image[0].url", "image[0].tag[0]", "transmission", "fuel_type", "body_style", "drivetrain", "vin", "condition", "price", "address", "exterior_color", "sale_price", "availability", "state_of_vehicle", "latitude", "longitude"],
path: 'test.csv'
});
var records = finalInsert;
console.log(records)
csvWriter.writeRecords(records)
.then(() => {
nightmare.end();
console.log('...Done');
});
})
});
})
.catch(function(error){
return error;
})
}
getInventory(startURL, function(response){
try {
console.log("This is the response" + response);
}
catch(error){
console.log(error)
}
});
Upvotes: 0
Reputation: 18826
I won't bother telling you that you should not mix callbacks with promises like that. But let's see the problem for now.
How about you check for errors too? Maybe your script is throwing errors. I can see you are calling the callback on .then
but nothing on .catch
. Maybe then is never getting any data.
Let's check your functions. You are calling .end
every time. Are you creating new Nightmare instance everytime too?
On the getInventory
function, you should not call .end
. On the getDetails
function, you should not call .end
. It's ending the nightmare instances and you are losing your data.
Call nightmare.end()
after you are done with all of your functions and works. To do this properly you will need to learn more about Promises check case 3 below.
Learn how promises works. On the line below, you are never waiting for the function to finish.
getINV = getDetails(newURL, function(returnString){
callback(returnString)
})
You should wait for the promises to finish. Also, make sure nightmare is not trying to browse two links at same time.
So go ahead and learn about Promises and async await stuff.
How would I solve your code?
I would use Promise.all, .map and bunch of other new stuff. Here are some sample code done for you, don't copy paste or run the code directly, try to understand why it's different from your code and what can be the result of it.
const pLimit = require("promise-limit")(2);
function getDetails(loadURL) {
return nightmare
.goto(loadURL)
.wait(2000)
.evaluate(() => {
const gigs = [];
$(".hidden-xs .used-vehicle").each(function() {
item = {};
item["year"] = $(this).attr("data-year");
item["make"] = $(this).attr("data-make");
item["model"] = $(this).attr("data-model");
item["body"] = $(this).attr("data-body");
item["color"] = $(this).attr("data-ext-color");
item["trim"] = $(this).attr("data-trim");
item["mileage"] = $(this).attr("data-mileage");
item["transmission"] = $(this).attr("data-transmission");
item["vin"] = $(this)
.find(".vehicle-overview")
.attr("id");
item["title"] = $(this)
.find(".vehicle-overview h2 a")
.text();
item["link"] = $(this)
.find(".vehicle-overview h2 a")
.attr("href");
item["price"] = $(this)
.find(".vehicle-content .price")
.text();
gigs.push(item);
});
return gigs;
})
.then(result => {
let returnString = "";
for (gig in result) {
returnString =
`${returnString +
result[gig].title} ${result[gig].link} ${result[gig].year} ${result[gig].make} ${result[gig].model} ${result[gig].body} ${result[gig].color} ${result[gig].trim} ${result[gig].transmission} ${result[gig].vin} ${result[gig].price}\n`;
}
return returnString;
})
.catch(error => {
throw new Error(error);
});
}
// We will need to get the total amount of pages that we need to parse
function getInventory(sURL) {
return nightmare
.goto(sURL)
.wait(2000)
.evaluate(() => {
totals = [];
items = {};
totalCars = $(".total-found .count").text();
carsOnPage = $(".hidden-xs .used-vehicle").size();
items["carTotal"] = totalCars;
items["onPage"] = carsOnPage;
const pageCalc = totalCars / carsOnPage;
items["tPages"] = Math.ceil(pageCalc);
totals.push(items);
return totals;
})
.then(result => {
let totalCars = "";
let totalPages = "";
for (item in result) {
totalPages = result[item].tPages;
totalCars = result[item].carTotal;
}
counter = 0;
newURL = "";
urls = [];
returnDetails = [];
for (i = 0; i < totalPages; i++) {
if (i == 0) {
newURL = sURL;
} else {
counter = i + 1;
newURL =
`${sURL}#action=im_ajax_call&perform=get_results&_post_id=5&page=${counter}&show_all_filters=false`;
}
// push to the url array
// use .map for cleaner code
urls.push(newURL);
}
// return a new promise with concurrency limit
return Promise.all(
urls.map(url => {
return limit(() => getDetails(newURL));
})
);
})
.catch(error => {
throw new Error(error);
});
}
getInventory(startURL)
.then(result => {
console.log(result);
})
.catch(error => {
console.err(error);
});
Resources:
Upvotes: 1
Reputation: 39260
Maybe the following will work, changed the loops to reduce and map, took out jQuery and made some small changes.
The most important ones are:
Here is the code:
const getDetails = loadURL =>
nightmare.goto(loadURL)//return promise here
.wait(2000)
.evaluate(
()=>
Array.from(document.querySelectorAll('.hidden-xs .used-vehicle'))
.reduce(
(all,item)=>
all.concat(
[
element.getAttribute('data-year'),
element.getAttribute('data-make'),
element.getAttribute('data-model'),
element.getAttribute('data-body'),
element.getAttribute('data-ext-color'),
element.getAttribute('data-trim'),
element.getAttribute('data-mileage'),
element.getAttribute('data-transmission'),
element.querySelector(".vehicle-overview").getAttribute('id'),
element.querySelector(".vehicle-overview h2 a").innerText,
element.querySelector(".vehicle-overview h2 a").getAttribute('href'),
element.querySelector(".vehicle-content .price").innerText
].join(" ")
),
[]//the all array
)
);
// We will need to get the total amount of pages that we need to parse
const getInventory = sURL =>
nightmare.goto(sURL)
.wait(2000)
.evaluate(
()=> {
//there is only one item here, not sure why you push it into totals
// and call it items
const item = {}
//getAttribute returns a string, parse it to number
totalCars = parseInt(document.querySelector('.total-found .count').innerText,10);
carsOnPage = document.querySelectorAll('.hidden-xs .used-vehicle').length;
item['carTotal'] = totalCars
item['onPage'] = carsOnPage
var pageCalc = (totalCars / carsOnPage)
item['tPages'] = Math.ceil(pageCalc)
return item;
}
)
.then(
totalItem =>{
var totalCars = '';
var totalPages = '';
totalPages = totalItem.tPages
totalCars = totalItem.carTotal
newURL = '';
returnDetails = '';
return Array.from(new Array(totalPages),(_,index)=>index+1)
.reduce(
(p,counter)=>
p.then(
results=>{
if (counter === 1) {
newURL = sURL;
} else {
newURL = sURL + "#action=im_ajax_call&perform=get_results&_post_id=5&page=" + counter + "&show_all_filters=false";
}
return getDetails(newURL)
.then(
result=>results.concat(result)
);
}
),
Promise.resolve([])
);
}
);
getInventory(startURL)
.then(
result=>
console.log(result)
).catch(
err=>
console.warn("Something went wrong:",err)
);
Upvotes: 0
Reputation: 687
Try using callback instead of return
function getDetails(loadURL, callback){
nightmare.goto(loadURL)
.wait(2000)
.evaluate(function(callback){
var gigs = [];
$('.hidden-xs .used-vehicle').each(function(){
item = {}
item["year"] = $(this).attr('data-year')
item["make"] = $(this).attr('data-make')
item["model"] = $(this).attr('data-model')
item["body"] = $(this).attr('data-body')
item["color"] = $(this).attr('data-ext-color')
item["trim"] = $(this).attr('data-trim')
item["mileage"] = $(this).attr('data-mileage')
item["transmission"] = $(this).attr('data-transmission')
item["vin"] = $(this).find(".vehicle-overview").attr('id')
item["title"] = $(this).find(".vehicle-overview h2 a").text()
item["link"] = $(this).find(".vehicle-overview h2 a").attr('href')
item["price"] = $(this).find(".vehicle-content .price").text()
gigs.push(item)
})
callback(gigs)
})
.end()
.then(function(result){
var returnString = '';
for(gig in result){
returnString = returnString + result[gig].title + " " + result[gig].link + " " + result[gig].year + " " + result[gig].make + " " + result[gig].model + " " + result[gig].body + " " + result[gig].color + " " + result[gig].trim + " " + result[gig].transmission + " " + result[gig].vin + " " + result[gig].price + "\n"
}
callback(returnString)
})
}
// We will need to get the total amount of pages that we need to parse
function getInventory(sURL, callback){
nightmare.goto(sURL)
.wait(2000)
.evaluate(function(){
totals = [];
items = {}
totalCars = $('.total-found .count').text()
carsOnPage = $('.hidden-xs .used-vehicle').size()
items['carTotal'] = totalCars
items['onPage'] = carsOnPage
var pageCalc = (totalCars / carsOnPage)
items['tPages'] = Math.ceil(pageCalc)
totals.push(items)
return totals
})
.end()
.then(function(result){
var totalCars = '';
var totalPages = '';
for (item in result){
totalPages = result[item].tPages
totalCars = result[item].carTotal
}
counter = 0;
newURL = '';
returnDetails = '';
for (i =0; i < totalPages; i++){
if (i == 0){
newURL = sURL;
} else {
counter = i + 1;
newURL = sURL + "#action=im_ajax_call&perform=get_results&_post_id=5&page=" + counter + "&show_all_filters=false";
}
//console.log(newURL)
getINV = getDetails(newURL, function(returnString){
callback(returnString)
})
returnDetails = returnDetails + getINV
}
callback(returnDetails)
})
}
getInventory(startURL, function(result){
console.log(result)
})
Upvotes: 0