Reputation: 47
I'm trying to scrape a webpage & to put the value in cache in order to not hit the daily urlFetch limit.
This is the code I'm using, it works without the Cache & Properties service but not when I try to add that element.
function scrapercache(url) {
var url = "https://www.gurufocus.com/term/fscore/nyse:ABBV/Piotroski-F-Score";
var result = [];
var description;
var options = {
'muteHttpExceptions': true,
'followRedirects': false,
};
Logger.log('line 16 OK');
var cache = CacheService.getScriptCache();
var properties = PropertiesService.getScriptProperties();
Logger.log('line 21 OK');
let res = cache.get(url);
// if(res){
// return JSON.parse(res)
//}
Logger.log(res);
Logger.log('line 24 OK');
if (res) {
// trim url to prevent (rare) errors
url.toString().trim();
var r = UrlFetchApp.fetch(url, options);
Logger.log(r);
Logger.log('line 34 OK');
var c = r.getResponseCode();
Logger.log(c);
Logger.log('line 38 OK');
// check for meta refresh if 200 ok
if (c == 200) {
var html = r.getContentText();
cache.put(url, "cached", 21600);
properties.setProperty(url, html);
Logger.log('line 46 OK');
var $ = Cheerio.load(html); // make sure this lib is added to your project!
Logger.log('line 49 OK');
// meta description
if ($('meta[name=description]').attr("content")) {
description = $('meta[name=description]').attr("content").trim();
var trim_des = description.substr(0, 40);
Logger.log('line 55 OK');
}
}
result.push([trim_des]);
Logger.log('line 60 OK');
}
return result;
Logger.log('line 64 OK');
}
I call the function like that:
=scrapercache("https://www.gurufocus.com/term/fscore/nyse:ABBV/Piotroski-F-Score")
& I get the following error message
Error: Reference does not exist
EDIT: I added log lines to see if the script was processing correctly & it looks like it's ok only until like 28
Upvotes: 0
Views: 880
Reputation: 10365
Error: “Reference does not exist”
This error message is usually returned by calling a custom function in a spreadsheet cell that does not return a value. It is explicitly mentioned by the official docs, but the error message is not provided, so the confusion is understandable.
An empty array is not a valid return value (since there are no elements to return). The error is easily reproducible with the following custom function:
/**
* @customfunction
*/
function testReferenceError() {
const list = [];
return list;
}
Which, when called in a cell, resulting in the "desired" error:
Applied to your case
In your situation, when there is a cached data in cache
, the if
statement clause evaluates to false
(truthy value when negated evaluates to false
). When it does, nothing gets push
ed to the result
, and an empty array is returned in finally
(see above for the explanation of consequences). Consider this mock:
const cache = {
get() {
return "cached";
}
};
let res = cache.get("mock_url");
//ternary operator here acts the same as "if...else":
console.log( !res ? "will enter if block" : "will enter else block" );
Note on return
in finally: If you put a return statement into a finally
block, expect it to override the return
statements in try
or catch
. Consider this example close to how your program is structured:
const final = (add = false) => {
const list = [];
try {
add && list.push(1);
return [1,2]; //this return is skipped
}
catch(error) {
list.push(error);
}
finally {
return list;
}
};
console.log( final() );
Also, the question already has an answer here
Upvotes: 2
Reputation: 55002
You're not putting the results in the cache, you're putting the string "cached" there. Consider:
let cached = cache.get(url)
if(cached){
return JSON.parse(cached)
}
let results = ["blah","blah"] // stuff we got with cheerio
cache.put(url, JSON.stringify(results), 120)
return results
Upvotes: 2