Reputation: 3205
Suppose I have this HTML in a string:
<meta http-equiv="Set-Cookie" content="COOKIE1_VALUE_HERE">
<meta http-equiv="Set-Cookie" content="COOKIE2_VALUE_HERE">
<meta http-equiv="Set-Cookie" content="COOKIE3_VALUE_HERE">
And I have this regular expression, to get the values inside the content
attributes:
/<meta http-equiv=[\"']?set-cookie[\"']? content=[\"'](.*)[\"'].*>/ig
How do I, in JavaScript, get all three content
values?
I've tried:
var setCookieMetaRegExp = /<meta http-equiv=[\"']?set-cookie[\"']? content=[\"'](.*)[\"'].*>/ig;
var match = setCookieMetaRegExp.exec(htmlstring);
but match
doesn't contain the values I need. Help?
Note: the regular expression is already correct (see here). I just need to match it to the string. Note: I'm using NodeJS
Upvotes: 7
Views: 14310
Reputation: 805
Try this
(?:class|href)([\s='"./]+)([\w-./?=&\\#"]+)((['#\\&?=/".\w\d]+|[\w)('-."\s]+)['"]|)
example :
function getTagAttribute(tag, attribute){
var regKey = '(?:' + attribute + ')([\\s=\'"./]+)([\\w-./?=\\#"]+)(([\'#\\&?=/".\\w\\d]+|[\\w)(\'-."\\s]+)[\'"]|)'
var regExp = new RegExp(regKey,'g');
var regResult = regExp.exec(tag);
if(regResult && regResult.length>0){
var splitKey = '(?:(' + attribute + ')+(|\\s)+([=])+(|\\s|[\'"])+)|(?:([\\s\'"]+)$)'
return regResult[0].replace(new RegExp(splitKey,'g'),'');
}else{
return '';
}
}
getTagAttribute('<a href = "./test.html#bir/deneme/?k=1&v=1" class= "xyz_bir-ahmet abc">','href');'
//return "./test.html#bir/deneme/?k=1&v=1"
Upvotes: 1
Reputation: 423
Try this:
var myString = '<meta http-equiv="Set-Cookie" content="COOKIE2_VALUE_HERE">';
var myRegexp = /<meta http-equiv=[\"']?set-cookie[\"']? content=[\"'](.*)[\"'].*>/ig;
var match = myRegexp.exec(myString);
alert(match[1]); // should show you the part
Upvotes: 0
Reputation: 5607
You were so close! All that needs to be done now is a simple loop:
var htmlString = '<meta http-equiv="Set-Cookie" content="COOKIE1_VALUE_HERE">\n'+
'<meta http-equiv="Set-Cookie" content="COOKIE2_VALUE_HERE">\n'+
'<meta http-equiv="Set-Cookie" content="COOKIE3_VALUE_HERE">\n';
var setCookieMetaRegExp = /<meta http-equiv=[\"']?set-cookie[\"']? content=[\"'](.*)[\"'].*>/ig;
var matches = [];
while (setCookieMetaRegExp.exec(htmlString)) {
matches.push(RegExp.$1);
}
//contains all cookie values
console.log(matches);
JSBIN: http://jsbin.com/OpepUjeW/1/edit?js,console
Upvotes: 3
Reputation: 42746
no need for regular expressions just do some dom work
var head = document.createElement("head");
head.innerHTML = '<meta http-equiv="Set-Cookie" content="COOKIE1_VALUE_HERE"><meta http-equiv="Set-Cookie" content="COOKIE2_VALUE_HERE"><meta http-equiv="Set-Cookie" content="COOKIE3_VALUE_HERE">';
var metaNodes = head.childNodes;
for(var i=0; i<metaNodes.length; i++){
var contentValue = metaNodes[i].attributes.getNamedItem("content").value;
}
As you are using nodejs and BlackSheep mentions using cheerio
you could use their syntax if you wish to use that lib:
//Assume htmlString contains the html
var cheerio = require('cheerio'),
$ = cheerio.load(htmlString);
var values=[];
$("meta").each(function(i, elem) {
values[i] = $(this).attr("content");
});
Upvotes: 1
Reputation: 46435
Keep it simple:
/content=\"(.*?)\">/gi
demo: http://regex101.com/r/dF9cD8
Update (based on your comment):
/<meta http-equiv=\"Set-Cookie\" content=\"(.*?)\">/gi
runs only on this exact string. Demo: http://regex101.com/r/pT0fC2
You really need the (.*?)
with the question mark, or the regex will keep going until the last >
it finds (or newline). The ?
makes the search stop at the first "
(you can change this to [\"']
if you want to match either single or double quote).
Upvotes: 2
Reputation: 423
try this:
var setCookieMetaRegExp = "/<meta http-equiv=[\"']?set-cookie[\"']? content=[\"'](.*)[\"'].*>/ig";
var match = stringToFindPartFrom.match(setCookieMetaRegExp);
Upvotes: 0