Nate
Nate

Reputation: 7876

DOMParser - children are not DOM objects

There is a strange behavior with DOMParser. When I use "text/xml" as the parameter I get my object and each time I use a child (like parentNodes), the child is itself a DOM object. However, when I use "text/html" as the parameter, the children are not DOM objects. Why is that and how can I have DOM objects for all the children?

Here is what I do:

parser = new DOMParser();
doc = parser.parseFromString(stringContainingHTMLSource, "text/html").getElementsByTagName('p');

console.log(doc[0].childNodes[0]);

My childNode returns the element but not as a DOM object...

Edit: Here are my recursive functions:

        var getParents = function(node, parentNodes){
            if(node.nodeName == 'span'){
                parentNodes.push(node.attributes[0].nodeValue);
            } else if(node.nodeName == 'p' && node.attributes.length > 0) {
                parentNodes.push(node.nodeName);
                parentNodes.push(node.attributes[0].nodeValue);
            } else {
                parentNodes.push(node.nodeName);
            }
            if(node.parentNode.nodeName != '#document'){
                getParents(node.parentNode, parentNodes);
            }
            return parentNodes;

        };
        var parse = function(node, vertical, horizontal, paragraph){
            if(node.childNodes.length > 0){
                for(var int = 0; int < node.childNodes.length; int++){
                    parse(node.childNodes[int], vertical, horizontal, paragraph);
                }
            } else{
                var object = {};
                var attributes = getParents(node, []);
                for(var int = 0; int < attributes.length; int++) {
                    // right alignment
                    if(/text-align/i.test(attributes[int])){
                        object.alignment = attributes[int].split(": ")[1].replace(';','');
                    } else if (/color/i.test(attributes[int])) {
                        // color
                        object.color = attributes[int].split(":")[1];
                    } else if (attributes[int] == 'em') {
                        // italic
                        if (object.italics) {
                            delete object.bold;
                            object.bolditalics = true;
                        } else {
                            object.italics = true;
                        }
                    } else if (attributes[int] == 'strong') {
                        // bold
                        if (object.italics) {
                            delete object.italics;
                            object.bolditalics = true;
                        } else {
                            object.bold = true;
                        }
                    } else if (attributes[int] == 'u') {
                        // underline
                        object.decoration = 'underline';
                    } else if (attributes[int] == 's') {
                        // strike
                        object.decoration = 'lineThrough';
                    }
                }
                object.text = node.textContent;
                pdfContent[vertical][horizontal].push(object);
            }
        };
        for(var vertical = 0; vertical < payment.htmlContent.length; vertical++) {
            for(var horizontal = 0; horizontal < payment.htmlContent[vertical].length; horizontal++) {
                var parser = new DOMParser();
                var paragraphs = parser.parseFromString(payment.htmlContent[vertical][horizontal], "text/xml").getElementsByTagName('p');
                for (var paragraph = 0; paragraph < paragraphs.length; paragraph++) {
                    for (var num = 0; num < paragraphs[paragraph].childNodes.length; num++) {
                        parse(paragraphs[paragraph].childNodes[num], vertical, horizontal, paragraph);
                    }
                }
            }
        }

Upvotes: 0

Views: 1706

Answers (1)

Kaiido
Kaiido

Reputation: 137131

I made a few assumptions on what the values are and after I Added a few verifications like if(node.attributes.length>0)into your code, it seems to work.

var payment={htmlContent:[['<p>some<em>text</em></p>', '<p>some<span>text<strong>here</strong></span></p>'],['<p>some<s>text</s></p>', '<p>some<span style="color:#FF00FF">text</span></p>']]};    

var getParents = function(node, parentNodes){

            if(node.nodeName == 'span'){
              if(node.attributes.length>0)
                parentNodes.push(node.attributes[0].nodeValue);
            } else if(node.nodeName == 'p' && node.attributes.length > 0) {
                parentNodes.push(node.nodeName);
              if(node.attributes.length>0)
                parentNodes.push(node.attributes[0].nodeValue);
            } else {
                parentNodes.push(node.nodeName);
            }
            if(node.parentNode.nodeName != '#document'){
                getParents(node.parentNode, parentNodes);
            }
            return parentNodes;

        };
        var parse = function(node, vertical, horizontal, paragraph){
            if(node.childNodes.length > 0){
                for(var int = 0; int < node.childNodes.length; int++){
                    parse(node.childNodes[int], vertical, horizontal, paragraph);
                }
            } else{
                var object = {};
                var attributes = getParents(node, []);
              console.log(attributes);
                for(var int = 0; int < attributes.length; int++) {
                    // right alignment
                    if(/text-align/i.test(attributes[int])){
                        object.alignment = attributes[int].split(": ")[1].replace(';','');
                    } else if (/color/i.test(attributes[int])) {
                        // color
                        object.color = attributes[int].split(":")[1];
                    } else if (attributes[int] == 'em') {
                        // italic
                        if (object.italics) {
                            delete object.bold;
                            object.bolditalics = true;
                        } else {
                            object.italics = true;
                        }
                    } else if (attributes[int] == 'strong') {
                        // bold
                        if (object.italics) {
                            delete object.italics;
                            object.bolditalics = true;
                        } else {
                            object.bold = true;
                        }
                    } else if (attributes[int] == 'u') {
                        // underline
                        object.decoration = 'underline';
                    } else if (attributes[int] == 's') {
                        // strike
                        object.decoration = 'lineThrough';
                    }
                }
                object.text = node.textContent;
              if(!pdfContent[vertical])pdfContent[vertical]=[];
              if(!pdfContent[vertical][horizontal])
                pdfContent[vertical][horizontal]=[];
                pdfContent[vertical][horizontal].push(object);
            }
        };
var pdfContent = [];
        for(var vertical = 0; vertical < payment.htmlContent.length; vertical++) {
            for(var horizontal = 0; horizontal < payment.htmlContent[vertical].length; horizontal++) {
                var parser = new DOMParser();
                var paragraphs = parser.parseFromString(payment.htmlContent[vertical][horizontal], "text/xml").getElementsByTagName('p');
                for (var paragraph = 0; paragraph < paragraphs.length; paragraph++) {
                    for (var num = 0; num < paragraphs[paragraph].childNodes.length; num++) {
                        parse(paragraphs[paragraph].childNodes[num], vertical, horizontal, paragraph);
                    }
                }
            }
        }
for(var i=0; i<pdfContent.length; i++){
   for(var j=0; j<pdfContent[i].length; j++){
    document.querySelector('#log').textContent+=pdfContent[i][j].toSource();
    }
  }
<p id="log"></p>

Upvotes: 1

Related Questions