Reputation: 45
I' trying to scrape a html page and make it into a json object
this is the page the page
<html><head><title>Index</title><meta charset="UTF-8"></head><body><div><p>[ <a href="index.html">Index</a> ] | [ <a href="config.html">Device Config</a> ]</p></div><div>Neighbors<pre>fe80::212:4b00:8b8:6ecb REACHABLE</pre></div><div>Default Route<pre>fe80::212:4b00:8b8:6ecb</pre></div><div>Routes<pre></pre></div><div>Sensors<pre>Battery Temp = 19 C
Battery Volt = 3320 mV
Air Pressure = 1031.12 hPa
Air Temp = 22.66 C
Object Temp = 12.375 C
Ambient Temp = 23.062 C
Light = 0.00 lux
HDC Humidity = 43.93 %RH
HDC Temp = 23.03 C
Acc X = 0.02 G
Acc Y = 0.02 G
Acc Z = -1.10 G
Gyro X = -2.93 deg per sec
Gyro Y = -2.74 deg per sec
Gyro Z = 5.18 deg per sec</pre></div><div>Page hits: 4<br>Uptime: 138 secs<br></div></body></html>
This is my attempt :
var request = require('request');
var cheerio = require('cheerio');
request('http://[aaaa::212:4b00:c2a:b704]/index.html', function(error, response,html){
if(!error && response.statusCode == 200){
//JSON.parse(html)
//console.log('--------------------------------------');
var temp = {"id":html}
var obj = JSON.parse(temp)
console.log(JSON.stringify(obj));
}
});
how can I achieve putting the sensors div into an object that contain the sensors names as keys and the data of the sensors as proprieties
UPDATE :
thanks to Rafal Wiliński help I somehow managed to get it working but the last key is taking divs as a value in the object
new code :
var request = require('request');
var cheerio = require('cheerio');
request('http://[aaaa::212:4b00:c2a:b704]/index.html', function(error, response,html){
if(!error && response.statusCode == 200){
var obj = {};
html.split('\n').forEach((line) => {
var key = line.split(' = ')[0];
var value = line.split(' = ')[1];
obj[key] = value;
});
console.log(JSON.stringify(obj,null,' '))
}});
but my output is
{
"Battery Temp": "22 C",
"Battery Volt": "3320 mV",
"Air Pressure": "1031.36 hPa",
"Air Temp": "26.09 C",
"Object Temp": "15.531 C",
"Ambient Temp": "26.312 C",
"Light": "0.08 lux",
"HDC Humidity": "34.73 %RH",
"HDC Temp": "26.38 C",
"Acc X": "0.02 G",
"Acc Y": "0.00 G",
"Acc Z": "-1.05 G",
"Gyro X": "-2.11 deg per sec",
"Gyro Y": "-1.10 deg per sec",
"Gyro Z": "3.64 deg per sec</pre></div><div>Page hits: 18<br>Uptime: 2968 secs<br></div></body></html>"
}
Upvotes: 0
Views: 3460
Reputation: 25820
I'd recommend that you use an HTML parser (I personally think jQuery is easy to use, but there are a LOT of options) to find and get the content from a specific element. Then you can run your parse logic on the result.
var response = '<html><head><title>Index</title><meta charset="UTF-8"></head><body><div><p>[ <a href="index.html">Index</a> ] | [ <a href="config.html">Device Config</a> ]</p></div><div>Neighbors<pre>fe80::212:4b00:8b8:6ecb REACHABLE</pre></div><div>Default Route<pre>fe80::212:4b00:8b8:6ecb</pre></div><div>Routes<pre></pre></div><div>Sensors<pre>Battery Temp = 19 C\nBattery Volt = 3320 mV\nAir Pressure = 1031.12 hPa\nAir Temp = 22.66 C\nObject Temp = 12.375 C\nAmbient Temp = 23.062 C\nLight = 0.00 lux\nHDC Humidity = 43.93 %RH\nHDC Temp = 23.03 C\nAcc X = 0.02 G\nAcc Y = 0.02 G\nAcc Z = -1.10 G\nGyro X = -2.93 deg per sec\nGyro Y = -2.74 deg per sec\nGyro Z = 5.18 deg per sec</pre></div><div>Page hits: 4<br>Uptime: 138 secs<br></div></body></html>';
// Turn the result into an HTML DOM.
var responseDOM = $(response);
// Find the specific element you want (in this case, the third pre) and get its content.
var preContent = $('pre', responseDOM).eq(3).text();
// Now, split the content into lines, split again by " = ", and then merge the result back into a single object.
var obj = preContent
// Split content into lines (by "\n")
.split('\n')
// split each line into key and value (by " = ")
.map(line => line.split(' = '))
// reduce each key value pair into a single object with properties
.reduce( (acc,kvp) => { acc[kvp[0]] = kvp[1]; return acc; }, {})
// Finally, turn the object into a JSON string.
var json = JSON.stringify(obj);
console.log(json);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
Upvotes: 0
Reputation: 2390
You need to divide your string by =
signs. Part before is a key, part after is your value.
Following function might resolve this issue:
function jsonify(str) {
var obj = {};
str.split('\n').forEach((line) => {
var key = line.split(' = ')[0];
var value = line.split(' = ')[1];
obj[key] = value;
});
return obj;
}
Upvotes: 2