Reputation: 66488
I have this code that break longer lines into array of equal length strings keeping words it also take into account the formatting like [[u;#fff;]some text]
, it split text so each string can be converted into html independently:
var format_re = /\[\[([!gbiuso]*;[^;\]]*;[^;\]]*(?:;|[^\]()]*);?[^\]]*)\]([^\]]*\\\][^\]]*|[^\]]*|[^\[]*\[[^\]]*)\]?/gi;
var format_begin_re = /(\[\[[!gbiuso]*;[^;]*;[^\]]*\])/i;
var format_last_re = /\[\[[!gbiuso]*;[^;]*;[^\]]*\]?$/i;
$.terminal.split_equal = function(str, length, words) {
var formatting = false;
var in_text = false;
var prev_format = '';
var result = [];
// add format text as 5th paramter to formatting it's used for
// data attribute in format function
var array = str.replace(format_re, function(_, format, text) {
var semicolons = format.match(/;/g).length;
// missing semicolons
if (semicolons == 2) {
semicolons = ';;';
} else if (semicolons == 3) {
semicolons = ';';
} else {
semicolons = '';
}
// return '[[' + format + ']' + text + ']';
// closing braket will break formatting so we need to escape
// those using html entity equvalent
return '[[' + format + semicolons +
text.replace(/\\\]/g, ']').replace(/\n/g, '\\n') + ']' +
text + ']';
}).split(/\n/g);
for (var i = 0, len = array.length; i < len; ++i) {
if (array[i] === '') {
result.push('');
continue;
}
var line = array[i];
var first_index = 0;
var count = 0;
var space = -1;
for (var j=0, jlen=line.length; j<jlen; ++j) {
if (line[j] === '[' && line[j+1] === '[') {
formatting = true;
} else if (formatting && line[j] === ']') {
if (in_text) {
formatting = false;
in_text = false;
} else {
in_text = true;
}
} else if ((formatting && in_text) || !formatting) {
if (line[j] === '&') { // treat entity as one character
var m = line.substring(j).match(/^(&[^;]+;)/);
if (!m) {
// should never happen if used by terminal,
// because it always calls $.terminal.encode
// before this function
throw new Error("Unclosed html entity in line " +
(i+1) + ' at char ' + (j+1));
}
j+=m[1].length-2; // because continue adds 1 to j
// if entity is at the end there is no next loop
// issue #77
if (j === jlen-1) {
result.push(output + m[1]);
}
continue;
} else if (line[j] === ']' && line[j-1] === '\\') {
// escape \] counts as one character
--count;
} else {
++count;
}
}
function is_space() {
return line.substring(j-6, j) == ' ' ||
line.substring(j-1, j) == ' ';
}
if (is_space() && ((formatting && in_text) || !formatting)) {
space = j;
}
if ((count === length || j === jlen-1) &&
((formatting && in_text) || !formatting)) {
var output;
var after = line.substring(space, j+length+1);
var text = $('<span>' + after + '</span>').text();
var can_break = text.match(/\s/);
if (words && space != -1 && j !== jlen-1 && can_break) {
// get text to last space
output = line.substring(first_index, space);
j = space-1;
space = -1;
} else {
output = line.substring(first_index, j+1);
}
if (words) {
output = output.replace(/^( |\s)+|( |\s)+$/g, '');
}
first_index = j+1;
count = 0;
if (prev_format) {
output = prev_format + output;
if (output.match(']')) {
prev_format = '';
}
}
// Fix output if formatting not closed
var matched = output.match(format_re);
if (matched) {
var last = matched[matched.length-1];
if (last[last.length-1] !== ']') {
prev_format = last.match(format_begin_re)[1];
output += ']';
} else if (output.match(format_last_re)) {
var line_len = output.length;
// why this line ???
//var f_len = line_len-last[last.length-1].length;
output = output.replace(format_last_re, '');
prev_format = last.match(format_begin_re)[1];
}
}
result.push(output);
}
}
}
return result;
};
It work almost right but some lines are shorter then it should like:
is cracker.The term
in this FIDDLE, it work right when you strip formatting, checking checkbox. I work on this for couple of hours and have no clue why that line is shorter, any help will be very appreciated.
Upvotes: 7
Views: 2263
Reputation: 16233
The npm package paragraph-builder splits continued text into so called paragraphs evenly distributed and all approximately with the same size in number of words. This concept of paragraph seems to be what you search for.
You can define the number of words for the paragraphs. You can extend the principle of paragraphs to pages, considering that a page has on average approximately the same number of characters, space included.
This paragraph builder node script generates paragraphs from continuous text. It outputs a text wherein the size of each paragraph is approximately the same, providing an even distribution of paragraphs within the text. It doesn't split the text on numbers such as "1.2".
There is an option to define the break character between paragraphs or you can fetch the paragraphs into an array of strings from which you can apply the html tag <p>
. Check its documentation for further clarification.
Upvotes: 1
Reputation: 20125
Here's how to fix the original code:
Add the following after line 40:
in_text = false;
The in_text
flag is used by the code to determine if the current position is in regular text. However, it was not clearing the flag when it entered a region of formatting markup. This was the cause care of the main issue described in the question with the ultra-short line.
Change the if statement at line 76/77 to:
if (is_space() && ((formatting && in_text) || !formatting || (line[j] === '[' && line[j+1] === '['))) {
This takes care of a lesser problem where line breaks were not happening on spaces between regular text and formatted text.
Working fiddle here: https://jsfiddle.net/2w10xp3m/1/
Upvotes: 5
Reputation: 28091
I think I've solved the problem using a much simpler approach. First break up all words, then re-assemble the lines while keeping track of the current format. See JsFiddle.
JavaScript
$.terminal.split_equal = function(str, length, words) {
var result = [],
currentFormat = null,
currentLine = '',
currentLineLengthWithoutFormatting = 0;
// 1. Split words on
words = str.split(/ /g);
// 2. Re-assemble lines while keeping track of current formats
words.forEach(function(word) {
// Keep track of current format
var format = word.match(/^\[\[([^\]]+)\]/g),
wordWithFormatting, wordLength;
if (format !== null && format[0]) {
currentFormat = format[0];
word = word.slice(format[0].length);
}
// Apply current format to each word separatly
wordLength = word.length;
wordWithFormatting = (currentFormat || '') + word;
if (currentFormat) {
if (word.indexOf(']') !== -1) {
wordLength--;
currentFormat = null;
} else {
wordWithFormatting += ']';
}
}
// Assemble line
if (currentLineLengthWithoutFormatting + wordLength <= length) {
// Word still fits on current line
if (currentLineLengthWithoutFormatting > 0) {
currentLine += ' ';
currentLineLengthWithoutFormatting++;
}
} else {
// Need to start new line
result.push(currentLine);
currentLine = '';
currentLineLengthWithoutFormatting = 0;
}
currentLine += wordWithFormatting;
currentLineLengthWithoutFormatting += wordLength;
});
if (currentLineLengthWithoutFormatting > 0)
result.push(currentLine);
return result;
};
Upvotes: 4