Reputation: 757
How can I get the content of an HTML comment using JavaScript? I tried this:
function getContent(comment) {
return comment
.replaceAll("<!--", "")
.replaceAll("-->", "")
}
// Expected output: "hello world"
console.log(getContent("<!--hello world-->"));
// Expected output: "some text", but the real content of the comment is "some <!--text"
console.log(getContent("<!--some <!--text-->"));
But when the edge case where there is additional <!--
's happens, the additional <!--
's get removed. How can I fix this.
Upvotes: 1
Views: 304
Reputation: 370799
Instead of replacing all, use a regular expression to replace <!--
eventually followed by -->
so that only proper pairs get removed, together.
function getContent(comment) {
return comment
.replace(/<!--(.*?)-->/g, "$1");
}
// Expected output: "hello world"
console.log(getContent("<!--hello world-->"));
// Expected output: "some text", but the real content of the comment is "some text"
console.log(getContent("<!--some <!--text-->"));
Another option would be to use DOMParser to turn the text into a document, then iterate over only comments in the document and remove them, then return the content that remains in the document.
function getContent(comment) {
const doc = new DOMParser().parseFromString(comment, 'text/html');
var walker = document.createTreeWalker(
doc.body,
NodeFilter.SHOW_COMMENT,
null,
false
);
const textNodes = [];
while(node = walker.nextNode()) {
textNodes.push(node);
}
for (const node of textNodes) {
node.remove();
}
return doc.body.innerHTML;
}
console.log(getContent("<!--hello world-->"));
console.log(getContent("<!--some <!--text-->"));
console.log(getContent("foobar <!--some <!--text--> barbaz <!-- another comment -->"));
Upvotes: 1