Reputation: 77
I have a XML string which contains some special characters(<,>,&) in it and hence can not be parsed by using jQuery $.parseXML.
This is the sample XML string
<?xml version="1.0" encoding="UTF-8"?>
<BackgroundCheck userId="{Username}" password="{Password}">
<BackgroundSearchPackage action="submit" type="{PackageName}">
<ReferenceId>ab<</ReferenceId>
<UserArea>
<PositionDetail>
<EmploymentState>{StateJob}</EmploymentState>
<ProposedSalary>{AnnualSalary}</ProposedSalary>
</PositionDetail>
</UserArea>
<PersonalData>
<PersonName>
<GivenName>{FirstName}</GivenName>
<MiddleName>{MiddleName}</MiddleName>
<FamilyName>{LastName}</FamilyName>
<Affix>{Generation}</Affix>
</PersonName>
<EmailAddress>{Email}</EmailAddress>
<DemographicDetail>
<GovernmentId countryCode="US" issuingAuthority="SSN">{SSN}</GovernmentId>
<DateOfBirth>{DateOfBirth}</DateOfBirth>
</DemographicDetail>
{Aliases}
{PostalAddress}
</PersonalData>
<Screenings useConfigurationDefaults="no">
{Screenings}
<AdditionalItems type="x:interface">
<Text>{Search&Type}</Text>
</AdditionalItems>
<AdditionalItems type="x:return_xml_results">
<Text>yes</Text>
</AdditionalItems>
<AdditionalItems type="x:embed_credentials">
<Text>true</Text>
</AdditionalItems>
<AdditionalItems type="x:integration_type">
<Text>Sample XML</Text>
</AdditionalItems>
<AdditionalItems type="x:postback_url">
<Text>{CallbackURL}</Text>
</AdditionalItems>
{AdditionalItems}
</Screenings>
{Documentation}
</BackgroundSearchPackage>
</BackgroundCheck>
Note the value of tag ReferenceId on 4th line, it contains special character and hence this string can not be parsed to XML.
What I need is to replace those special characters with escape sequences(<,>,&). The closest I came across is this
how to escape xml entities in javascript?
But this answer assumes that we have XML node values already with us.
My requirements is different, I have the complete xml as a string and I want to replace only the node values without touching the tag names(tags also contain <,>).
This is what i tried using jQuery
$(xml).each(function() {
var t = $(this).wrap('<p/>').parent().html();
t.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
xml = t;
});
This is working fine, the only problem with this code is that it is converting the XML tags to lower case. I thing this is because of jQuery's behavior.
Please suggest be a fix/solution for this.Thanks
Upvotes: 0
Views: 6598
Reputation: 77
I finally achieved what i needed. Thanks to @mplungjan. As he also pointed out, an XML file must be valid to get parsed and making it valid should be done where the XML is created.
My scenario was somewhat different. I HAD TO fix the invalid XML string in javascript only, before i could parse it.
I had to apply a dirty hack to achive it. While parsing the XML string in javascript(or any other programming language), we get an error specifying whats wrong AND the line number. What i did was this
Here's what i did
var oParser = new DOMParser();
function escapeRegExp(str) {
return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
}
function remove_error(str, line) {
var allTexts = str.split("\n");
var illegal = allTexts[line - 1];
var extract = illegal.match(/>(.*)</);
extract = extract.pop();
var fix_extract = extract.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
fix_extract = fix_extract.trim();console.log(fix_extract);
var re = new RegExp(">"+escapeRegExp(extract.trim())+"<");
str = str.replace(re,">"+fix_extract+"<");
return str;
}
var fixed = "", final_fixed = "";
function process(orig_str) {
var newDOM = oParser.parseFromString(orig_str, "text/xml");
var error = newDOM.getElementsByTagName("parsererror");
if (error && error.length) {
if (error[0] && error[0].innerHTML) {
var err_html = error[0].innerHTML;
var parse_err = oParser.parseFromString(err_html, "text/html");
var err_log = parse_err.getElementsByTagName("div")[0].innerHTML;
console.log(err_log);
//var string = err_log.substring(err_log.lastIndexOf("line")+1, err_log.lastIndexOf("at"));
var regex = /line\s*(.*?)\s*at/g;
var matches = [];
while (m = regex.exec(err_log)) {
matches.push(m[1]);
}console.log(matches);
var err_line = matches[0];
fixed = remove_error(orig_str, err_line);
if (fixed) {console.log(fixed);//return;
process(fixed);
}
//console.log(fixed);
//alert('Invalid XML:' + err_log);
} else {
alert('XML could not be parsed');
return;
}
$('.welcome-page section.welcome .inner').html("<h3 class='text-center'>Invalid XML</h3>");
} else {
final_fixed = orig_str;
}
if(final_fixed) {
return final_fixed;
}
}
var newDOM = process(res[0][0]);
if (!newDOM) {
alert('XML could not be parsed');
return;
}
I know what i did is just a hack. But i didn't have any other options.
PS- Any edits to this answer are welcome.
Upvotes: 1
Reputation: 178094
var oParser = new DOMParser();
function replaceIllegalXML(t) {
var oDOM = oParser.parseFromString(t, "text/html");
var nok = oDOM.documentElement.nodeName == "parsererror";
if (nok) {
console.log("Could not parse the string");
return;
}
var allTexts = oDOM.documentElement.textContent.split("\n");
for (var i=0;i<allTexts.length;i++) {
var repl = allTexts[i].replace(/&/g, '&')
.replace(/</g, '\<')
.replace(/>/g, '\>')
.replace(/"/g, '\"')
.replace(/'/g, '\'')
if (repl != allTexts[i]) {
repl = repl.trim();
var re = new RegExp(">"+allTexts[i].trim()+"<");
console.log("string:",allTexts[i])
console.log("replace",repl)
console.log("re",re)
t = t.replace(re,">"+repl+"<");
}
}
return t;
}
var t = `<?xml version="1.0" encoding="UTF-8"?>
<BackgroundCheck userId="{Username}" password="{Password}">
<BackgroundSearchPackage action="submit" type="{PackageName}">
<ReferenceId>ab<</ReferenceId>
<UserArea>
<PositionDetail>
<EmploymentState>{StateJob}</EmploymentState>
<ProposedSalary>{AnnualSalary}</ProposedSalary>
</PositionDetail>
</UserArea>
<PersonalData>
<PersonName>
<GivenName>{FirstName}</GivenName>
<MiddleName>{MiddleName}</MiddleName>
<FamilyName>{LastName}</FamilyName>
<Affix>{Generation}</Affix>
</PersonName>
<EmailAddress>{Email}</EmailAddress>
<DemographicDetail>
<GovernmentId countryCode="US" issuingAuthority="SSN">{SSN}</GovernmentId>
<DateOfBirth>{DateOfBirth}</DateOfBirth>
</DemographicDetail>
{Aliases}
{PostalAddress}
</PersonalData>
<Screenings useConfigurationDefaults="no">
{Screenings}
<AdditionalItems type="x:interface">
<Text>{Search&Type}</Text>
</AdditionalItems>
<AdditionalItems type="x:return_xml_results">
<Text>yes</Text>
</AdditionalItems>
<AdditionalItems type="x:embed_credentials">
<Text>true</Text>
</AdditionalItems>
<AdditionalItems type="x:integration_type">
<Text>Sample XML</Text>
</AdditionalItems>
<AdditionalItems type="x:postback_url">
<Text>{CallbackURL}</Text>
</AdditionalItems>
{AdditionalItems}
</Screenings>
{Documentation}
</BackgroundSearchPackage>
</BackgroundCheck>`
t = replaceIllegalXML(t);
var newDOM = oParser.parseFromString(t, "text/xml")
var nok = newDOM.documentElement.nodeName == "parsererror";
if (nok) console.log("xml parsing failed");
else console.log(newDOM.getElementsByTagName("ReferenceId")[0].textContent);
Upvotes: 2