Dmitry Minkovsky
Dmitry Minkovsky

Reputation: 38203

How can I rewrite this function with tail recursion

I wrote the following function to linearize the text content of an arbitrary string of HTML:

html2text(html) {
    function _html2text(element, accum) {
        return Array.prototype.slice.call(element.childNodes).reduce((accum, node) => {
            return (node.nodeType === 3)
                ? `${accum} ${node.textContent}`
                : _html2text(node, accum);
        }, accum);
    }
    const div = document.createElement('div');
    div.innerHTML = html;
    return _html2text(div, '');
}

But now I am not able to transform it into a trail-recusive style so that it can be optimized. My problem is that the recursion recurs within the reduction. I will write this out as loops, but it's been killing me that I've not been able tail recurse this.


Here is my loop version:

function html2text(html) {
    var div = document.createElement('div');
    div.innerHTML = html;
    var accum = '';
    var stack = [];
    stack.push([div, 0]);
    while (stack.length !== 0) {
        var frame = stack.pop();
        var el = frame[0];
        var i = frame[1];
        for (; i < el.childNodes.length; i++) {
            var node = el.childNodes[i];
            if (node.nodeType === Node.ELEMENT_NODE) {
                stack.push([el, i+1]);
                stack.push([node, 0]);
                break;
            } else if (node.nodeType === Node.TEXT_NODE) {
                accum += ' ' + node.textContent;
            }
        }
    }
    return accum;
}

Here's the function above written with a recursive tail call, passing the stack:

function html2text(html) {                                                                                                                                                                              

    function recurse(stack, accum) {                                                                                                                                                                    
        if (!stack.length) {                                                                                                                                                                            
            return accum;                                                                                                                                                                               
        }                                                                                                                                                                                               
        var frame = stack.pop();                                                                                                                                                                        
        var el = frame[0];                                                                                                                                                                              
        var i = frame[1];                                                                                                                                                                               
        for (; i < el.childNodes.length; i++) {                                                                                                                                                         
            var node = el.childNodes[i];                                                                                                                                                                
            if (node.nodeType === Node.ELEMENT_NODE) {                                                                                                                                                  
                stack.push([el, i+1]);                                                                                                                                                                  
                stack.push([node, 0]);                                                                                                                                                                  
                break;                                                                                                                                                                                  
            } else if (node.nodeType === Node.TEXT_NODE) {                                                                                                                                              
                accum += ' ' + node.textContent;                                                                                                                                                        
            }                                                                                                                                                                                           
        }                                                                                                                                                                                               
        return recurse(stack, accum)                                                                                                                                                                    
    }                                                                                                                                                                                                   

    var div = document.createElement('div');                                                                                                                                                            
    div.innerHTML = html;                                                                                                                                                                               
    var stack = [];                                                                                                                                                                                     
    stack.push([div, 0]);                                                                                                                                                                               
    return recurse(stack, '');                                                                                                                                                                          
} 

The inner loop can be converted to another recursion, but without using immutable datastructures none of this really makes too much sense for me.

Upvotes: 2

Views: 225

Answers (1)

sqykly
sqykly

Reputation: 1596

function html2text(current, text = "") {
    "use strict";
    if (current === null) return text;
    var nextNode = current.nextSibling || current.parentNode.nextSibling,
        more = current.nodeType === 3 ? current.textContent : "";
    return html2text(nextNode, text + more);
}

You were right, you can do the concatenation before the tail call. I think this fits all the criteria, but ES6 is new, so please do double check.

Upvotes: 2

Related Questions