Reputation: 251
So I am scraping a website, and I want it to show the data to innerHTML, and I am having trouble getting it to work. I am trying to use
document.getElementById('results').innerHTML = searchJobs('');
But it's telling me that document is not defined, sorry nooby question but It's easier to ask than be stuck on the same thing for 45 minutes.
Here is the code :
function searchJobs(i) {
const url = axios.get('actualurl')
return fetch(`${url}${i}`)
.then(response => response.text())
.then(res => {
const jobs = [];
const jobsBody = [];
const $ = cheerio.load(res.data);
$('.result').each((index, element) => {
const title = $(element).children('.title').text();
const linkToJob = $(element).children('.title').children('a').attr('href')
const body = $(element).children('.summary').text();
jobs[index] = { title, linkToJob, body };
});
return jobs;
});
}
document.getElementById('results').innerHTML = searchJobs('');
module.exports = searchJobs;
I can get the scraper to console.log just fine.
index route :
const express = require('express');
const router = express.Router();
const scraper = require('../scrapers/scrapers');
/* GET home page. */
router.get('/', function(req, res, next) {
res.render('index', { title: 'Express' });
});
app.js :
const express = require('express'),
path = require('path'),
cookieParser = require('cookie-parser'),
logger = require('morgan'),
puppeteer = require('puppeteer'),
axios = require('axios'),
cheerio = require('cheerio');
es6Renderer = require('express-es6-template-engine');
var indexRouter = require('./routes/index');
var usersRouter = require('./routes/users');
var app = express();
// view engine setup
app.engine('html', es6Renderer);
app.set('views', './views');
app.set('view engine', 'html');
app.use(logger('dev'));
app.use(express.json());
app.use(express.urlencoded({ extended: false }));
app.use(cookieParser());
app.use(express.static(path.join(__dirname, 'public')));
app.use('/', indexRouter);
app.use('/users', usersRouter);
// catch 404 and forward to error handler
app.use(function(req, res, next) {
next(createError(404));
});
// error handler
app.use(function(err, req, res, next) {
// set locals, only providing error in development
res.locals.message = err.message;
res.locals.error = req.app.get('env') === 'development' ? err : {};
// render the error page
res.status(err.status || 500);
res.render('error');
});
module.exports = app;
I think the problem is actually in the view which it's basically just this to keep the post short :
<body>
<p id="results"></p>
<script src="/scrapers/scrapers.js" async defer></script>
</body>
Upvotes: 1
Views: 289
Reputation: 146
I think I understand what you're trying to accomplish. Remove the document.getElementById
line from scrapers.js and modify the others like so:
index route:
router.get('/', function(req, res, next) {
res.render('index', { locals: { results: scraper.searchJobs() }});
});
view:
<body>
<p id="results">${results}</p>
</body>
By doing this you're running the scraper on the server inside of node and then using the templating engine to render the results into the HTML prior to sending it down to the browser.
Upvotes: 1