Reputation: 6334
I need help in parsing href
tags. Currently, everything is being parsed as text, however I need to parse the links so that I can send it to the php page later using AJAX.
my HTML looks like:
<div id="word_content">
<br>Testing Time: 2015-10-29 17:57:11<br>
Total Age: 19<br>
Total Friemd: 9<br>
Total Family: 10<br>
<br>
Here are the suggestions - Him_530037_: <a href="www.mytarget.com="_blank">93358546</a>
<h3>Overview</h3><br>
<ul>
<li>(The overlap provided is not good)</li>
</ul>
<h3>Structure</h3><br>
<h4>Target:</h4><br>
<ul>
<li>Audience.</li>
<li>Lookalike</li>
<li>Overlap of Audience</li>
<a href="https://www.myPage.com/lolPagess/?id=06" target="_blank">06<font name="names" hidden="" style="display: inline;"> - Page Likes</font></a>
</ul>
Jquery Code is something like this:
var headTags = $("div#word_content").find("*").filter(function(){
return /^h/i.test(this.nodeName);
});
var output = {};
$(headTags).each(function(){
var currentHead = $(this);
var nextNextElem = currentHead.next().next();
var innerText = [];
if(nextNextElem.prop("tagName") == "UL")
{
nextNextElem.find("li").each(function(){
innerText.push($(this).text());
});
}
output[currentHead.text()] = innerText;
});
Currently, the Jquery is fetching the data, but it is capturing only the text and not the link. I need to parse the link as well, so that this link could be used in further pages. Can someone please help.
Upvotes: 1
Views: 106
Reputation: 11859
use this:
nextNextElem.find("a").each(function(){
innerText.push($(this).text()+" & href is:"+$(this).attr("href"));
});
var headTags = $("div#word_content").find("*").filter(function(){
return /^h/i.test(this.nodeName);
});
var output = {};
$(headTags).each(function(){
var currentHead = $(this);
var nextNextElem = currentHead.next().next();
var innerText1 = [];
if(nextNextElem.prop("tagName") == "UL")
{
nextNextElem.find("li").each(function(index){
innerText1.push(this.firstChild.data);
$(this).children().each(function(index){
innerText1.push("<a href='"+$(this).attr("href")+"'>"+$(this)[0].innerText+"</a>");
if($(this).prop('nextSibling')){
innerText1.push($(this).prop('nextSibling').nodeValue);
}
});
});
}
output[currentHead.text()] = innerText1;
}); console.log(output);
$("#data").html(JSON.stringify(output));
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="word_content">
<br>Testing Time: 2015-10-29 17:57:11<br>
Total Age: 19<br>
Total Friemd: 9<br>
Total Family: 10<br>
<br>
Here are the suggestions - Him_530037_: <a href="www.mytarget.com="_blank">93358546</a>
<h3>Overview</h3><br>
<ul>
<li>Multiple Countries
<a href="https://www.myTarget.com/ads/?id=603" target="_blank">603<font name="names" hidden="" style="display: none;"> - Post: "သင့္ရဲ့ Data အသံုးျပဳ မွုကို အေၾကာင္းၾကားေပးေသာ..."</font></a> (MM, SG),
<a href="https://www.myTarget.com/ads/?id=602" target="_blank">602<font name="names" hidden="" style="display: none;"> - Post: "Mynamar pics."</font></a></li>
</ul>
</div>
<span>OUTPUT AREA:</span>
<div id="data"></div>
Upvotes: 1
Reputation: 9381
Check every href
inside an a
$("a").each(function () {
isUrlValid($(this).attr("href"));
});
borrowed from Validating url with jQuery without the validate-plugin?:
function isUrlValid(url) {
return /^(https?|s?ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i.test(url);
}
This regex will test for valid url's.
Upvotes: 0
Reputation: 8276
You can use something like this to parse links in the site:
$("a").each(function(i, o) {
console.log("Link: " + (i + 1));
console.log(" Text is: " + $(o).text());
console.log(" Link is: " + $(o).attr('href'));
})
Result:
www.mytarget.com=
https://www.myPage.com/lolPagess/?id=06
Upvotes: 0