volume one
volume one

Reputation: 7563

How to make this string replacement code work with `<br/>` tags?

From a question asked over here about replacing ordinary text within a string into a URL.... I want to make it work if the link text is surrounded by <br/> tags.

This is the code I am using so far which does 'linkify' text within an element that appears to be a hyperlink:

function linkify(inputText) {
    var replacedText, replacePattern1, replacePattern2, replacePattern3;

    //URLs starting with http://, https://, or ftp://
    replacePattern1 = /(\b(https?|ftp):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/gim;
    replacedText = inputText.replace(replacePattern1, '<a href="$1" target="_blank">$1</a>');

    //URLs starting with "www." (without // before it, or it'd re-link the ones done above).
    replacePattern2 = /(^|[^\/])(www\.[\S]+(\b|$))/gim;
    replacedText = replacedText.replace(replacePattern2, '$1<a href="http://$2" target="_blank">$2</a>');

    return replacedText;
}

Of course the problem is that if the link text is like this:

<p>Is this:<br/><br/>http://www.google.com<br/><br/>THE best search engine around?</p>

Then the output I get ends up being this!

<p>Is this:<a href="http://www.google.com">http://www.google.comTHE</a> best search engine around</p>

So two problems are that the <br/> tags are getting stripped out completely and also that the text after the <br/> tag ('THIS') is being treated as part of the hyperlink text.

How could I overcome this small but deadly issue?

Upvotes: 8

Views: 375

Answers (2)

adeneo
adeneo

Reputation: 318182

I would rely a lot more on the browsers built in parsing abilities, and let the browser figure out what's valid HTML etc.

Something like this should work

function linkify(inputText) {

  var dom = new DOMParser(),
      doc = dom.parseFromString('<div id="wrap">'+ inputText +'</div>', 'text/html'),
      ref = doc.getElementById('wrap'),
      reg = /[-a-zA-Z0-9@:%_\+.~#?&//=]{2,256}\.[a-z]{2,4}\b(\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?/gi,
      arr = [];

  Array.prototype.forEach.call(ref.querySelectorAll('*'), function(node) {
    Array.prototype.forEach.call(node.childNodes, function(innerNode) {
      if (innerNode.nodeType === 3) arr.push(innerNode);
    });
  });

  arr.forEach(function(node, index) {
    node.nodeValue = node.nodeValue.replace(reg, function(x) {
      var nxtNode = arr[index+1],
          anchor  = doc.createElement('a');

      if (nxtNode && "nodeValue" in nxtNode) {
        anchor.href = x;
        anchor.innerHTML = nxtNode.nodeValue;
        nxtNode.parentNode.removeChild(nxtNode);
        node.parentNode.insertBefore(anchor, node);
        node.parentNode.removeChild(node);
      }
    });
  });

  return ref.innerHTML;
}

would return

<p>
    <br><br>
    <a href="http://www.google.com">THE best search engine around</a>
    <br><br>
</p>`

keeping all the breaks, but placing them outside the anchor

FIDDLE

Upvotes: 3

gaetanoM
gaetanoM

Reputation: 42044

I propose to add another replacement in your function in order to execute your strip:

function linkify(inputText) {
  var replacedText, replacePattern1, replacePattern2, replacePattern3;

  //URLs starting with http://, https://, or ftp://
  replacePattern1 = /(\b(https?|ftp):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/gim;
  replacedText = inputText.replace(/<br\/>/gi, '').replace(replacePattern1, '<a href="$1" target="_blank">$1</a>');

  //URLs starting with "www." (without // before it, or it'd re-link the ones done above).
  replacePattern2 = /(^|[^\/])(www\.[\S]+(\b|$))/gim;
  replacedText = replacedText.replace(replacePattern2, '$1<a href="http://$2" target="_blank">$2</a>');

  return replacedText;
}

Upvotes: 1

Related Questions