Reputation: 3390
What I am doing:
b
p
& a
).View more
anchor tag. This links it to the website's post page, that contains the complete post. Something like:Hey this is a sample post text
<b>message</b>
. Lorem ipsum dolor sit amit...<a href="someurl">
View more</a>
The problem:
During word count and truncation, it is possible that I truncate the string in between an html tag as I am simply calculating words on basis of space. Something like:
I am sharing a link with you.
<a style="color:
...<a href="someurl">
View more</a>
Now this will break the html.
Possible solution:
indexOf()
(or some other method) to find starting and ending indices of each tag.Question:
Is there a better way to do this. I don't know what search terms I should be searching on google, to get help with this.
P.S. The code is flexible and I can change the flow if there is a significantly better solution. Also, I am not good with post titles. If you can, please modify it to something that reflects the question.
EDIT:
This is what I came up with after Alex's answer. Hope it helps someone else:
/**
* Counter: Takes a string and returns words and characters count
* @param value
* @returns obj: {
* 'wordCount': (int),
* 'totalChars': (int),
* 'charCount': (int),
* 'charCountNoSpace': (int)
* }
*/
var counter = function(value){
var regex = /\s+/gi;
if (!value.length) {
return {
wordCount: 0,
totalChars: 0,
charCount: 0,
charCountNoSpace: 0
};
}
else {
return {
wordCount: value.trim().replace(regex, ' ').split(' ').length,
totalChars: value.length,
charCount: value.trim().length,
charCountNoSpace: value.replace(regex, '').length
};
}
}
/**
* htmlSubString - Creates excerpt from markup(or even plain text) without creating malformed HTML tags
* @param markup {string} - Markup/text to take excerpt out of
* @param limit {int} - Total word count of excerpt. Note that only text (not the html tag) counts as a valid word.
* @returns {string} - Excerpt
*/
var htmlSubString = function(markup, limit){
var htmlParser = require("htmlparser2");
var tagCount = 0;
var wordCount = 0;
var excerpt = '';
function addToExcerpt(type, text, attribs) {
if ((wordCount >= limit && tagCount == 0) || (tagCount === 1 && type === 'tagOpen' && wordCount >= limit)) {
return false;
}
else if (wordCount < limit || tagCount) {
if (type === 'text') {
var wordCountSubString = $scope.counter(text).wordCount;
if (wordCountSubString + wordCount > limit && tagCount === 0) {
var length = limit - wordCount;
var wordList = text.trim().split(' ');
for (var i = 0; i < length; i++) {
excerpt += ' ' + wordList[i];
wordCount++;
}
} else {
wordCount += wordCountSubString;
excerpt += text;
}
} else if (type === 'tagOpen') {
excerpt += '<' + text;
for (var prop in attribs) {
excerpt += ' ' + prop + '="' + attribs[prop] + '"';
}
excerpt += '>';
} else if (type === 'tagClose') {
excerpt += '</' + text + '>';
}
}
return true;
}
var parser = new htmlParser.Parser({
onopentag: function (name, attribs) {
if(wordCount < limit){
++tagCount;
addToExcerpt('tagOpen', name, attribs);
}
},
ontext: function (text) {
if(wordCount < limit){
addToExcerpt('text', text);
}
},
onclosetag: function (tagName) {
if(wordCount < limit || tagCount > 0){
addToExcerpt('tagClose', tagName);
--tagCount;
}
}
});
parser.write(markup);
parser.end();
return excerpt;
}
Usage:
var wordCountLimit = 20;
var markup = "/* some markup/text */";
var excerpt = htmlSubString(markup, wordCountLimit);
Upvotes: 1
Views: 422
Reputation: 59203
Now, you'll definitely be able to find some HTML tag matching regular expressions. That said, I don't recommend it. At first you'll be all happy and everything will work just fine. Then tomorrow you'll find a small edge-case. "No worries!" You'll say, as you modify the expression to account for the discrepancy. Then the next day, a new tweak, and a new one, and yet another, etc etc until you can't take it anymore.
I highly recommend you find an already established HTML parsing library. There appears to be quite a few on npm. This one seems to be fairly popular.
PS - You did fine with your question. I wish more questions took as much time and provided as much detail :)
Upvotes: 1