KiRa
KiRa

Reputation: 924

Split html element and get text

I have two sample code below the sample 2 seems working for me to split the element. But the problem is my element has a style to color the text inside it, as you can see in the sample 1.

Desired Output for sample 1

A2 Award Notice

or

A2 Lease Contract

Sample 1

var str = '<div class="col-md-10"> <a style="color:green">A2 Award Notice   </a> <a style="color:black">or</a> <a style="color:orange">A2 Lease Contract  </a> </div>';

console.log(str.match(/<a>.*?<\/a>/g));
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

Sample 2

var str = '<div><a>A</a><a>B</a><p>Foobar</p><a>C</a></div>';
console.log(str.match(/<a>.*?<\/a>/g));
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script>

TIA.

Upvotes: 0

Views: 2494

Answers (3)

AuxTaco
AuxTaco

Reputation: 5171

Don't use regular expressions to parse HTML. jQuery can parse it for you:

var str = '<div class="col-md-10"> <a style="color:green">A2 Award Notice   </a> <a style="color:black">or</a> <a style="color:orange">A2 Lease Contract  </a> </div>';

console.log(
  $(str)
    .find('a')
    .map((index, element) => $(element).text())
    .get()
);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

So can vanilla JavaScript:

var str = '<div class="col-md-10"> <a style="color:green">A2 Award Notice   </a> <a style="color:black">or</a> <a style="color:orange">A2 Lease Contract  </a> </div>';

var parser = new DOMParser();
var doc = parser.parseFromString(str, 'text/html');
console.log(
  [...doc.querySelectorAll('a')]
    .map(element => element.textContent)
);

Upvotes: 1

EmandM
EmandM

Reputation: 960

You are attempting to match directly for <a>...</a>. Just remove the check for the end of the opening anchor tag so you capture any attributes placed on it (check for <a...</a> instead).

Sample 1

var str = '<div class="col-md-10"> <a style="color:green">A2 Award Notice   </a> <a style="color:black">or</a> <a style="color:orange">A2 Lease Contract  </a> </div>';

console.log(str.match(/<a.*?<\/a>/g));
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

The above method isn't a great or foolproof way of finding the inner text of anchor tags. Instead, you probably want to use jquery to parse the actual DOM structure of the given HTML:

var str = '<div class="col-md-10"> <a style="color:green">A2 Award Notice   </a> <a style="color:black">or</a> <a style="color:orange">A2 Lease Contract  </a> </div>';

var html = $.parseHTML(str);

function findAnchorText(node) {
  $.each(node, function(index, element) {
    if (element.nodeName === 'A') {
      console.log(element.innerHTML);
    }
    // Use recursion to access all elements on the DOM tree
    findAnchorText(element.children)
  });
}

findAnchorText(html)
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

Upvotes: 1

G. Mayer
G. Mayer

Reputation: 26

With another trick to extract the text from the html tag and a for loop you'll get it:

function extractContent(s) {
    var span = document.createElement('span');
    span.innerHTML = s;
    return span.textContent || span.innerText;
};

var str = '<div class="col-md-10"> <a style="color:green">A2 Award Notice   </a> <a style="color:black">or</a> <a style="color:orange">A2 Lease Contract  </a> </div>';
var array = str.match(/<a.*?<\/a>/g);

var i;
for (i = 0; i < array.length; i++) { 
    array[i] = extractContent(array[i]);
};

console.log(array);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

Upvotes: 1

Related Questions