Reputation: 38203
I wrote the following function to linearize the text content of an arbitrary string of HTML:
html2text(html) {
function _html2text(element, accum) {
return Array.prototype.slice.call(element.childNodes).reduce((accum, node) => {
return (node.nodeType === 3)
? `${accum} ${node.textContent}`
: _html2text(node, accum);
}, accum);
}
const div = document.createElement('div');
div.innerHTML = html;
return _html2text(div, '');
}
But now I am not able to transform it into a trail-recusive style so that it can be optimized. My problem is that the recursion recurs within the reduction. I will write this out as loops, but it's been killing me that I've not been able tail recurse this.
Here is my loop version:
function html2text(html) {
var div = document.createElement('div');
div.innerHTML = html;
var accum = '';
var stack = [];
stack.push([div, 0]);
while (stack.length !== 0) {
var frame = stack.pop();
var el = frame[0];
var i = frame[1];
for (; i < el.childNodes.length; i++) {
var node = el.childNodes[i];
if (node.nodeType === Node.ELEMENT_NODE) {
stack.push([el, i+1]);
stack.push([node, 0]);
break;
} else if (node.nodeType === Node.TEXT_NODE) {
accum += ' ' + node.textContent;
}
}
}
return accum;
}
Here's the function above written with a recursive tail call, passing the stack:
function html2text(html) {
function recurse(stack, accum) {
if (!stack.length) {
return accum;
}
var frame = stack.pop();
var el = frame[0];
var i = frame[1];
for (; i < el.childNodes.length; i++) {
var node = el.childNodes[i];
if (node.nodeType === Node.ELEMENT_NODE) {
stack.push([el, i+1]);
stack.push([node, 0]);
break;
} else if (node.nodeType === Node.TEXT_NODE) {
accum += ' ' + node.textContent;
}
}
return recurse(stack, accum)
}
var div = document.createElement('div');
div.innerHTML = html;
var stack = [];
stack.push([div, 0]);
return recurse(stack, '');
}
The inner loop can be converted to another recursion, but without using immutable datastructures none of this really makes too much sense for me.
Upvotes: 2
Views: 225
Reputation: 1596
function html2text(current, text = "") {
"use strict";
if (current === null) return text;
var nextNode = current.nextSibling || current.parentNode.nextSibling,
more = current.nodeType === 3 ? current.textContent : "";
return html2text(nextNode, text + more);
}
You were right, you can do the concatenation before the tail call. I think this fits all the criteria, but ES6 is new, so please do double check.
Upvotes: 2