Reputation: 491
I have string variable with HTML entities:
var str = 'Some text & text';
I want to convert (decode) it to original characters:
Some text & text
.
JavaScript doesn't have built-in function to achieve wanted result. I can't use jQuery or DOM objects because I need it to work in Google Apps Script.
How can I do that in simple way?
Upvotes: 13
Views: 8289
Reputation: 16780
In 2024 none of those solutions (XML
, XmlService
) worked 😟 for me, so I did it 💪 manually.
Following is my hard work solution.
To use it just call const fixedString = htmlEntitiesDecode(crazyEncodedString);
function htmlEntitiesDecode(input) {
entities.forEach(function(substitution) {
var regex = new RegExp(substitution.entity, 'g');
input = input.replace(regex, substitution.character);
});
return input;
}
const entities = [
{ entity: "À", character: "À" },
{ entity: "Á", character: "Á" },
{ entity: "Â", character: "Â" },
{ entity: "Ã", character: "Ã" },
{ entity: "Ä", character: "Ä" },
{ entity: "Å", character: "Å" },
{ entity: "à", character: "à" },
{ entity: "á", character: "á" },
{ entity: "â", character: "â" },
{ entity: "ã", character: "ã" },
{ entity: "ä", character: "ä" },
{ entity: "å", character: "å" },
{ entity: "Æ", character: "Æ" },
{ entity: "æ", character: "æ" },
{ entity: "ß", character: "ß" },
{ entity: "Ç", character: "Ç" },
{ entity: "ç", character: "ç" },
{ entity: "È", character: "È" },
{ entity: "É", character: "É" },
{ entity: "Ê", character: "Ê" },
{ entity: "Ë", character: "Ë" },
{ entity: "è", character: "è" },
{ entity: "é", character: "é" },
{ entity: "ê", character: "ê" },
{ entity: "ë", character: "ë" },
{ entity: "ƒ", character: "ƒ" },
{ entity: "Ì", character: "Ì" },
{ entity: "Í", character: "Í" },
{ entity: "Î", character: "Î" },
{ entity: "Ï", character: "Ï" },
{ entity: "ì", character: "ì" },
{ entity: "í", character: "í" },
{ entity: "î", character: "î" },
{ entity: "ï", character: "ï" },
{ entity: "Ñ", character: "Ñ" },
{ entity: "ñ", character: "ñ" },
{ entity: "Ò", character: "Ò" },
{ entity: "Ó", character: "Ó" },
{ entity: "Ô", character: "Ô" },
{ entity: "Õ", character: "Õ" },
{ entity: "Ö", character: "Ö" },
{ entity: "ò", character: "ò" },
{ entity: "ó", character: "ó" },
{ entity: "ô", character: "ô" },
{ entity: "õ", character: "õ" },
{ entity: "ö", character: "ö" },
{ entity: "Ø", character: "Ø" },
{ entity: "ø", character: "ø" },
{ entity: "Œ", character: "Œ" },
{ entity: "œ", character: "œ" },
{ entity: "Š", character: "Š" },
{ entity: "š", character: "š" },
{ entity: "Ù", character: "Ù" },
{ entity: "Ú", character: "Ú" },
{ entity: "Û", character: "Û" },
{ entity: "Ü", character: "Ü" },
{ entity: "ù", character: "ù" },
{ entity: "ú", character: "ú" },
{ entity: "û", character: "û" },
{ entity: "ü", character: "ü" },
{ entity: "µ", character: "µ" },
{ entity: "×", character: "×" },
{ entity: "Ý", character: "Ý" },
{ entity: "Ÿ", character: "Ÿ" },
{ entity: "ý", character: "ý" },
{ entity: "ÿ", character: "ÿ" },
{ entity: "°", character: "°" },
{ entity: "†", character: "†" },
{ entity: "‡", character: "‡" },
{ entity: "<", character: "<" },
{ entity: ">", character: ">" },
{ entity: "±", character: "±" },
{ entity: "«", character: "«" },
{ entity: "»", character: "»" },
{ entity: "¿", character: "¿" },
{ entity: "¡", character: "¡" },
{ entity: "·", character: "·" },
{ entity: "•", character: "•" },
{ entity: "™", character: "™" },
{ entity: "©", character: "©" },
{ entity: "®", character: "®" },
{ entity: "§", character: "§" },
{ entity: "¶", character: "¶" },
{ entity: """, character: "\"" },
{ entity: " ", character: " " },
{ entity: "–", character: "-" },
{ entity: "&", character: "&" },
{ entity: "“", character: "“" },
{ entity: "•", character: "•" },
{ entity: "”", character: "”" },
{ entity: "ª", character: "ª" },
{ entity: "º", character: "º" },
{ entity: "ª", character: "ª" },
{ entity: "ª", character: "ª" },
{ entity: "ª", character: "ª" },
{ entity: "ª", character: "ª" },
{ entity: "ª", character: "ª" },
];
Here is the gist with the code.
If I missed some symbol, please, comment here or there.
Upvotes: 2
Reputation: 868
You can use Drive API Advanced Service for this. First you need to enable it. Then when you insert
(create) a new Google Doc file with data from an HTML blob, it automatically renders HTML in your Doc. After that you get the text your Doc with the following code:
function htmltotext(html) {
var id = Drive.Files.insert(
{title: 'temp',
mimeType: MimeType.GOOGLE_DOCS},
Utilities.newBlob(html, MimeType.HTML)).id;
var doc = DocumentApp.openById(id);
var text = doc.getBody().getText();
doc.saveAndClose();
Drive.Files.remove(id); // to remove completely avoiding trash
return text;
}
Thanks @tanaike for suggestion
Upvotes: 0
Reputation: 491
You can use built-in Xml Services (reference):
var str = 'Some text & text';
var decode = XmlService.parse('<d>' + str + '</d>');
var strDecoded = decode.getRootElement().getText();
or you can use built-in E4X XML class.
var str = 'Some text & text';
var decode = new XML('<d>' + str + '</d>');
var strDecoded = decode.toString();
Upvotes: 21