Reputation: 976
I made the following function to parse all child elements from a class:
function getBodyHTML(data){
var elements = "";
var content = $(data).find(".mw-content-ltr").children();
$(content).each(function(i, row){
if($(row).is("h2")){
elements = elements.concat("<h2 class="header2">Header 2 content is: "+($(row).text())+"</h1>");
}else if($(row).is("h1")){
elements = elements.concat("<h1 class="header1">Header 1 content is: "+($(row).text())+"</h1>");
}else if($(row).is("h3")){
elements = elements.concat("<h3>"+($(row).text())+"</h3>");
}else if($(row).is("p")){
elements = elements.concat("<p>"+$(row).text()+"</p>");
}
});
return elements;
}
That function does what I want for this kind of html:
<h2 class="main-header">Some Text</h2>
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing elit,
sed diam nonummy nibh. </p>
<p>euismod tincidunt ut laoreet dolore magna aliquam erat
volutpat. Ut wisi enim</p>
<h1>Another text</h1>
<p>euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim</p>
But I cann't get the full text when the HTML comes this way:
<h2 class="main-header">Some Text</h2>
Lorem ipsum dolor sit amet, consectetuer adipiscing elit,
sed diam nonummy nibh.
<h1>Another text</h1>
<p>euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim</p>
As you can see, the first text after the h2 has not any tag.
What condition should I add to my function to select those kind of text without tag too?
Thanks in advance!
Upvotes: 1
Views: 404
Reputation: 27012
You can use contents()
instead of children()
to get text nodes.
Text nodes have a nodeType
of 3
.
Here are a few examples:
Here's how you could change your code:
function getBodyHTML(data) {
var elements = "";
var content = $(data).find(".mw-content-ltr").contents();
$(content).each(function (i, row) {
if (row.nodeType == 3) {
var text = $.trim(row.textContent);
if (text.length > 0)
elements = elements.concat("text content: " + text);
else if ($(row).is("h2")) {
elements = elements.concat("<h2 class="
header2 ">Header 2 content is: " + ($(row).text()) + "</h1>");
} else if ($(row).is("h1")) {
elements = elements.concat("<h1 class="
header1 ">Header 1 content is: " + ($(row).text()) + "</h1>");
} else if ($(row).is("h3")) {
elements = elements.concat("<h3>" + ($(row).text()) + "</h3>");
} else if ($(row).is("p")) {
elements = elements.concat("<p>" + $(row).text() + "</p>");
}
});
return elements;
}
Upvotes: 3
Reputation: 144659
You can use .contents()
method instead of .children()
and check the nodeType
of the node:
$(data).find(".mw-content-ltr").contents().each(function(_, row) {
// ...
} else if ( row.nodeType === 3 && $.trim(row.nodeValue).length ) {
var textNodeValue = $.trim(row.nodeValue);
}
});
Upvotes: 2
Reputation: 7449
Add else in your if block
else{
elements = elements.concat($(row).html());
}
Something like:
function getBodyHTML(data){
var elements = "";
var content = $(data).find(".mw-content-ltr").children();
$(content).each(function(i, row){
if($(row).is("h2")){
elements = elements.concat("<h2 class="header2">Header 2 content is: "+($(row).text())+"</h1>");
}else if($(row).is("h1")){
elements = elements.concat("<h1 class="header1">Header 1 content is: "+($(row).text())+"</h1>");
}else if($(row).is("h3")){
elements = elements.concat("<h3>"+($(row).text())+"</h3>");
}else if($(row).is("p")){
elements = elements.concat("<p>"+$(row).text()+"</p>");
}
else{
elements = elements.concat($(row).html());
}
});
return elements;
}
Upvotes: 1