Reputation:
I'm working on a DOM traversal type of script and I'm almost finished with it. However, there is one problem that I've encountered and for the life of me, I can't figure out what to do to fix it. Pardon my ineptitude, as I'm brand new to JS/JQuery and I'm still learning the ropes.
Basically, I'm using Javascript/JQuery to create an "outline", representing the structure of an HTML page, and appending the "outline" to the bottom of the webpage. For example, if the HTML is this...
<html>
<head>
</head>
<body>
<h1>Hello World</h1>
<script src=”http://code.jquery.com/jquery-2.1.0.min.js” type=”text/javascript”>
</script>
<script src=”outline.js” type=”text/javascript”></script>
</body>
</html>
Then the output should be an unordered list like this:
Here's what I've got so far:
var items=[];
$(document).ready(function(){
$("<ul id = 'list'></ul>").appendTo("body");
traverse(document, function (node) {
if(node.nodeName.indexOf("#") <= -1){
items.push("<ul>"+"<li>"+node.nodeName.toLowerCase());
}
else {
var x = "text("+node.nodeValue+")";
if(node.nodeValue == null) {
items.push("<li> document");
}
else if(/[a-z0-9]/i.test(node.nodeValue) && node.nodeValue != null) {
items.push("<ul><li>"+ x +"</ul>");
}
else {
items.push("</ul>");
}
}
});
$('#list').append(items.join(''));
});
function traverse(node, func) {
func(node);
node = node.firstChild;
while (node) {
traverse(node, func);
node = node.nextSibling;
}
}
It works almost perfectly, except it seems to read a carriage return as a text node. For example, if there's
<head><title>
it reads that properly, adding head as an unordered list element, and then creating a new "unordered list" for title, which is nested inside the header. HOWEVER, if it's
<head>
<title>
It makes the new unordered list and its element, "head", but then jumps to the else statement that does items.push(</ul>)
. How do I get it to ignore the carriage return? I tried testing to see if the nodeValue was equal to the carriage return, \r, but that didn't seem to do the trick.
Upvotes: 1
Views: 301
Reputation: 707326
I'm having a bit of a hard time understanding exactly which text nodes you want to skip. If you just want to skip a text node that is only whitespace, you can do that like this:
var onlyWhitespaceRegex = /^\s*$/;
traverse(document, function (node) {
if (node.nodeType === 3 && onlyWhitespaceRegex.test(node.nodeValue) {
// skip text nodes that contain only whitespace
return;
}
else if (node.nodeName.indexOf("#") <= -1){
items.push("<ul>"+"<li>"+node.nodeName.toLowerCase());
} else ...
Or, maybe you just want to trim any multiple leading or trailing whitespaces off a text node before displaying it since it may not display in HTML.
var trimWhitespaceRegex = /^\s+|\s+$/g;
traverse(document, function (node) {
if(node.nodeName.indexOf("#") <= -1){
items.push("<ul>"+"<li>"+node.nodeName.toLowerCase());
} else {
var text = node.nodeValue;
if (node.nodeType === 3) {
text = text.replace(trimWhitespaceRegex, " ");
}
var x = "text("+text+")";
if(node.nodeValue == null) {
items.push("<li> document");
} ....
A further description of exactly what you're trying to achieve in the output for various forms of different text nodes would help us better understand your requirements.
Upvotes: 0