Eduardo SR
Eduardo SR

Reputation: 57

Search on string and capture value of specific HTML attribute

I need a regex to get the src value of all following strings, considering single, double and no quotes.

With Double Quotes:

var a = '<script class="anyClass" src="anyFile.js" id="anyID">';
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';

With Single Quotes:

var e = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var f = "<script class='anyClass' src='anyFile.js'>";
var g = "<script src='anyFile.js' id='anyID'>";
var h = "<script src='anyFile.js'>";

Without Quotes:

var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";

Expected match/return:

anyFile.js

I have this poor looking solution with split:

var file = "";
var match = 'src="';
if(a.indexOf(match)>=0){
  file = a.split(match);
  file = file[1];
  file = file.split('.js"');
  file = file[0] + ".js";
}
else{
  match = "src='";
  if(a.indexOf(match)>=0){
    file = a.split(match);
    file = file[1];
    file = file.split(".js'");
    file = file[0] + ".js";
  }
  else{
    match = "src=";
    if(a.indexOf(match)>=0){
      file = a.split(match);
      file = file[1];
      file = file.split('.js');
      file = file[0] + ".js";
    }
    else {
    file = "no match";
    }
  }
}

But it can match wrong src attributes, so I need a regex.

Any help would be greatly appreciated.

Solution (special thanks to Rajesh)

var a = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';

var e = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var f = "<script class='anyClass' src='anyFile.js'>";
var g = "<script src='anyFile.js' id='anyID'>";
var h = "<script src='anyFile.js'>";

var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";
var l= "<script src=anyFile.js>";

function getSrc(str){
  var regex = /src=["']*[^"' >]+/;
  var match = str.match(regex);
  if(match!==null){
    match = match[0];
    var replaceRegex = /src=["' ]*/;
    match = match.replace(replaceRegex, "")
  }
  else{
    match = "no match";
  }
  console.log(match);
}

[a,b,c,d,e,f,g,h,i,j,k,l].forEach(getSrc)

Upvotes: 1

Views: 95

Answers (3)

revo
revo

Reputation: 48721

I prefer going the right way, using DOM:

var scripts = document.getElementsByTagName('script');
Array.prototype.forEach.call(scripts, (script => console.log(script.src)));

Full javascript code:

var a = '<script src="anyFile1.js"><\/script>';
var b = '<script src=\'anyFile2.js\'><\/script>';
var c = '<script src=anyFile3.js><\/script>';

var html = document.createElement('div');
html.innerHTML = a + b + c;

var scripts = html.getElementsByTagName('script');
[].forEach.call(scripts, (script => console.log(script.src)));

The Regex way

<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?

Live demo

str.replace(/<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?/g, (
    (match, $0, $1) => $1 ? array.push($1) : ''
))

Full JS code:

const str = `<script class="anyClass" src="anyFile.js" id="anyID">
<script class="anyClass" src="anyFile.js">
<script src="anyFile.js" id="anyID">
<script src="anyFile.js">


<script class='anyClass' src='anyFile.js' id='anyID'>
<script class='anyClass' src='anyFile.js'
<script src='anyFile.js' id='anyID'>
<script src='anyFile.js'>

<script class=anyClass src=anyFile.js id=anyID>
<script class=anyClass src=anyFile.js>
<script src=anyFile.js id=anyID>
<script src=anyFile.js >`;

var array = [];

str.replace(/<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?/g, (
    (match, $0, $1) => $1 ? array.push($1) : ''
))

console.log(array)

Upvotes: 1

Rajesh
Rajesh

Reputation: 24925

You can try something like this:

Logic:

  • Search for pattern src= followed by single/double quotes [optional] followed by anything thats not single/double quotes.
  • This will yield something like src="anyfile.js.
  • Now replace initials i.e. src= followed by optional quote.

Regex version

var a = '<script class="anyClass" src="anyFile.js" id="anyID">';
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';
var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";

function getSrc(str){
  var regex = /src=["']*[^"' >]+/;
  var match = str.match(regex)[0];
  var replaceRegex = /src=["' ]*/;
  match = match.replace(replaceRegex, "")
  console.log(match)
}

[a,b,c,d,i,j,k,l].forEach(getSrc)

Upvotes: 1

ThePerplexedOne
ThePerplexedOne

Reputation: 2950

Here's what I came up with to achieve this. Any improvements are welcome:

script src=["']?(\w+\.js)

https://regex101.com/r/6NCj94/2

or for cases where your script's name is something like jquery.min.js:

script src=["']?(.*\.js)

https://regex101.com/r/6NCj94/3

Upvotes: 1

Related Questions