Caleb Liu
Caleb Liu

Reputation: 757

How to get the content of an HTML comment using JavaScript

How can I get the content of an HTML comment using JavaScript? I tried this:

function getContent(comment) {
  return comment
    .replaceAll("<!--", "")
    .replaceAll("-->", "")
}

// Expected output: "hello world"
console.log(getContent("<!--hello world-->"));

// Expected output: "some text", but the real content of the comment is "some <!--text"
console.log(getContent("<!--some <!--text-->"));

But when the edge case where there is additional <!--'s happens, the additional <!--'s get removed. How can I fix this.

Upvotes: 1

Views: 304

Answers (1)

CertainPerformance
CertainPerformance

Reputation: 370799

Instead of replacing all, use a regular expression to replace <!-- eventually followed by --> so that only proper pairs get removed, together.

function getContent(comment) {
  return comment
    .replace(/<!--(.*?)-->/g, "$1");
}

// Expected output: "hello world"
console.log(getContent("<!--hello world-->"));

// Expected output: "some text", but the real content of the comment is "some text"
console.log(getContent("<!--some <!--text-->"));

Another option would be to use DOMParser to turn the text into a document, then iterate over only comments in the document and remove them, then return the content that remains in the document.

function getContent(comment) {
  const doc = new DOMParser().parseFromString(comment, 'text/html');
  var walker = document.createTreeWalker(
    doc.body, 
    NodeFilter.SHOW_COMMENT, 
    null, 
    false
  );
  const textNodes = [];
  while(node = walker.nextNode()) {
    textNodes.push(node);
  }
  for (const node of textNodes) {
    node.remove();
  }
  return doc.body.innerHTML;
}

console.log(getContent("<!--hello world-->"));
console.log(getContent("<!--some <!--text-->"));
console.log(getContent("foobar <!--some <!--text--> barbaz <!-- another comment -->"));

Upvotes: 1

Related Questions