LearningDev
LearningDev

Reputation: 45

scraping a html page and make it into a json object

I' trying to scrape a html page and make it into a json object

this is the page the page

<html><head><title>Index</title><meta charset="UTF-8"></head><body><div><p>[ <a href="index.html">Index</a> ] | [ <a href="config.html">Device Config</a> ]</p></div><div>Neighbors<pre>fe80::212:4b00:8b8:6ecb REACHABLE</pre></div><div>Default Route<pre>fe80::212:4b00:8b8:6ecb</pre></div><div>Routes<pre></pre></div><div>Sensors<pre>Battery Temp = 19 C
Battery Volt = 3320 mV
Air Pressure = 1031.12 hPa
Air Temp = 22.66 C
Object Temp = 12.375 C
Ambient Temp = 23.062 C
Light = 0.00 lux
HDC Humidity = 43.93 %RH
HDC Temp = 23.03 C
Acc X = 0.02 G
Acc Y = 0.02 G
Acc Z = -1.10 G
Gyro X = -2.93 deg per sec
Gyro Y = -2.74 deg per sec
Gyro Z = 5.18 deg per sec</pre></div><div>Page hits: 4<br>Uptime: 138 secs<br></div></body></html>

This is my attempt :

var request = require('request');
var cheerio = require('cheerio');





request('http://[aaaa::212:4b00:c2a:b704]/index.html', function(error, response,html){
        if(!error && response.statusCode == 200){
          //JSON.parse(html)
          //console.log('--------------------------------------');
          var temp = {"id":html}
          var obj = JSON.parse(temp)
          console.log(JSON.stringify(obj));
        }

});

how can I achieve putting the sensors div into an object that contain the sensors names as keys and the data of the sensors as proprieties

UPDATE :

thanks to Rafal Wiliński help I somehow managed to get it working but the last key is taking divs as a value in the object

new code :

var request = require('request');
var cheerio = require('cheerio');



 request('http://[aaaa::212:4b00:c2a:b704]/index.html', function(error, response,html){
        if(!error && response.statusCode == 200){

          var obj = {};
          html.split('\n').forEach((line) => {
             var key = line.split(' = ')[0];
             var value = line.split(' = ')[1];
             obj[key] = value;
          });
          console.log(JSON.stringify(obj,null,' '))


        }});

but my output is

{
 "Battery Temp": "22 C",
 "Battery Volt": "3320 mV",
 "Air Pressure": "1031.36 hPa",
 "Air Temp": "26.09 C",
 "Object Temp": "15.531 C",
 "Ambient Temp": "26.312 C",
 "Light": "0.08 lux",
 "HDC Humidity": "34.73 %RH",
 "HDC Temp": "26.38 C",
 "Acc X": "0.02 G",
 "Acc Y": "0.00 G",
 "Acc Z": "-1.05 G",
 "Gyro X": "-2.11 deg per sec",
 "Gyro Y": "-1.10 deg per sec",
 "Gyro Z": "3.64 deg per sec</pre></div><div>Page hits: 18<br>Uptime: 2968 secs<br></div></body></html>"
}

Upvotes: 0

Views: 3460

Answers (2)

JDB
JDB

Reputation: 25820

I'd recommend that you use an HTML parser (I personally think jQuery is easy to use, but there are a LOT of options) to find and get the content from a specific element. Then you can run your parse logic on the result.

var response = '<html><head><title>Index</title><meta charset="UTF-8"></head><body><div><p>[ <a href="index.html">Index</a> ] | [ <a href="config.html">Device Config</a> ]</p></div><div>Neighbors<pre>fe80::212:4b00:8b8:6ecb REACHABLE</pre></div><div>Default Route<pre>fe80::212:4b00:8b8:6ecb</pre></div><div>Routes<pre></pre></div><div>Sensors<pre>Battery Temp = 19 C\nBattery Volt = 3320 mV\nAir Pressure = 1031.12 hPa\nAir Temp = 22.66 C\nObject Temp = 12.375 C\nAmbient Temp = 23.062 C\nLight = 0.00 lux\nHDC Humidity = 43.93 %RH\nHDC Temp = 23.03 C\nAcc X = 0.02 G\nAcc Y = 0.02 G\nAcc Z = -1.10 G\nGyro X = -2.93 deg per sec\nGyro Y = -2.74 deg per sec\nGyro Z = 5.18 deg per sec</pre></div><div>Page hits: 4<br>Uptime: 138 secs<br></div></body></html>';

// Turn the result into an HTML DOM.
var responseDOM = $(response);

// Find the specific element you want (in this case, the third pre) and get its content.
var preContent = $('pre', responseDOM).eq(3).text();

// Now, split the content into lines, split again by " = ", and then merge the result back into a single object.
var obj = preContent
      // Split content into lines (by "\n")
      .split('\n')
      // split each line into key and value (by " = ")
      .map(line => line.split(' = '))
      // reduce each key value pair into a single object with properties
      .reduce( (acc,kvp) => { acc[kvp[0]] = kvp[1]; return acc; }, {})


// Finally, turn the object into a JSON string.
var json = JSON.stringify(obj);

console.log(json);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>

Upvotes: 0

Rafal Wiliński
Rafal Wiliński

Reputation: 2390

You need to divide your string by = signs. Part before is a key, part after is your value.

Following function might resolve this issue:

function jsonify(str) {
   var obj = {};
   str.split('\n').forEach((line) => {
      var key = line.split(' = ')[0];
      var value = line.split(' = ')[1];
      obj[key] = value;
   });
   return obj;
}

Upvotes: 2

Related Questions