Reputation: 92417
I have following input string
Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia ...
Splitting rules by example
[
"Lorem ipsum dolor", // A: Tree words <6 letters
"sit amet", // B: Two words <6 letters if next word >6 letters
"consectetur", // C: One word >=6 letters if next word >=6 letters
"adipiscing elit", // D: Two words: first >=6, second <6 letters
"sed doeiusmod", // E: Two words: firs<6, second >=6 letters
"tempor" // rule C
"incididunt ut" // rule D
"Duis aute irure" // rule A
"dolor in" // rule B
"reprehenderit in" // rule D
"esse cillum" // rule E
"dolor eu fugia" // rule D
...
]
So as you can see string in array can have min one and max tree words. I try to do it as follows but doesn't work - how to do it?
let s="Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
let a=[""];
s.split(' ').map(w=> {
let line=a[a.length-1];
let n= line=="" ? 0 : line.match(/ /g).length // num of words in line
if(n<3) line+=w+' ';
n++;
if(n>=3) a[a.length-1]=line
});
console.log(a);
UPDATE
Boundary conditions: if last words/word not match any rules then just add them as last array element (but two long words cannot be newer in one string)
SUMMARY AND INTERESTING CONCLUSIONS
We get 8 nice answer for this question, in some of them there was discussion about self-describing (or self-explainable) code. The self-describing code is when the person which not read the question is able to easy say what exactly code do after first look. Sadly any of answers presents such code - so this question is example which shows that self-describing is probably a myth
Upvotes: 7
Views: 2274
Reputation: 1787
I saw very clever solutions here, thank you all!
However I think there is room here for a solution optimized for "self documenting". Note that my goal was that -- self documentation -- so this solution is surely not the shortest code nor the fastest nor the least memory hungry.
"use strict;"
console.log(splitTextIntoWordGroups("Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia"));
function splitTextIntoWordGroups(text) {
const words = text.split(' ');
const wordGroups = [];
while(true) {
if(next3WordsAreAllShorterThan6Chars(words)) {
wordGroups.push(moveNext3WordsToWordGroup(words));
}
else if(next2WordsAreAllShorterThan6CharsAndSubsequentWordIsLongerThan6Chars(words)) {
wordGroups.push(moveNext2WordsToWordGroup(words));
}
else if(nextWordIsLongerOrEqualThan6CharsAndSubsequentWordIsLongerOrEqualThan6Chars(words)) {
wordGroups.push(moveNextWordToWordGroup(words));
}
else if(nextWordIsLongerOrEqualThan6CharsAndSubsequentWordIsShorterThan6Chars(words)) {
wordGroups.push(moveNext2WordsToWordGroup(words));
}
else if(nextWordIsShorterThan6CharsAndSubsequentWordIsLongerOrEqualThan6Chars(words)) {
wordGroups.push(moveNext2WordsToWordGroup(words));
}
else {
let remainingWordGroup = moveRemainingWordsToWordGroup(words);
if(remainingWordGroup) {
wordGroups.push(remainingWordGroup);
}
break;
}
}
return wordGroups;
}
function next3WordsAreAllShorterThan6Chars(words) {
if(words.length < 3) return false;
if(words[0].length < 6 && words[1].length < 6 && words[2].length < 6) return true;
return false;
}
function next2WordsAreAllShorterThan6CharsAndSubsequentWordIsLongerThan6Chars(words) {
if(words.length < 3) return false;
if(words[0].length < 6 && words[1].length < 6 && words[2].length > 6) return true;
return false;
}
function nextWordIsLongerOrEqualThan6CharsAndSubsequentWordIsLongerOrEqualThan6Chars(words) {
if(words.length < 2) return false;
if(words[0].length >= 6 && words[1].length >= 6) return true;
return false;
}
function nextWordIsLongerOrEqualThan6CharsAndSubsequentWordIsShorterThan6Chars(words) {
if(words.length < 2) return false;
if(words[0].length >= 6 && words[1].length < 6) return true;
return false;
}
function nextWordIsShorterThan6CharsAndSubsequentWordIsLongerOrEqualThan6Chars(words) {
if(words.length < 2) return false;
if(words[0].length < 6 && words[1].length >= 6) return true;
return false;
}
function moveNext3WordsToWordGroup(words, results) {
return moveNextNWordsToWordGroup(words, 3);
}
function moveNext2WordsToWordGroup(words, results) {
return moveNextNWordsToWordGroup(words, 2);
}
function moveNextWordToWordGroup(words, results) {
return moveNextNWordsToWordGroup(words, 1);
}
function moveNextNWordsToWordGroup(words, n) {
wordGroup = [];
for(let i=0; i < n; i++) {
wordGroup.push(words.shift());
}
return wordGroup.join(' ');
}
function moveRemainingWordsToWordGroup(words) {
if(words.length > 0) {
wordGroup = [];
wordGroup.push(...words);
return wordGroup.join(' ');
}
}
Upvotes: 1
Reputation: 92417
I write in short and faster (in terms of time complexity: I not calc sum by reduce in each loop iteration) version of idea proposed in BoltKey answer (if you want vote up please do it on his answer).
Main idea
ws
is word size where we have only two values 1 (short word) and 2 (long word)s
is current line size in loop (we iterate over each word size)l
, and it size to line size s
l
to output array r
and clean l
and s
l
to result if l
is not emptylet s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusd tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
function split(n,str) {
let words= str.split(' '), s=0, l=[], r=[];
words.forEach(w=>{
let ws= w.length<n ? 1:2;
if(s+ws>3) r.push(l.join(' ')), s=0, l=[];
l.push(w), s+=ws;
})
return l.length ? r.concat(l.join(' ')) : r;
}
console.log( split(6,s) );
Upvotes: -1
Reputation: 50797
(Updated to incorporate suggestion from user633183.)
I found this an interesting problem. I wanted to write a more generic version immediately, and I settled on one that accepted a list of rules, each of which described the number of words that it would gather and a test for each of those words. So with lt6
being essentially (str) => str.length < 6
, the first rule (A) would look like this:
[3, lt6, lt6, lt6],
This, it turns out, is quite similar to the solution from CertainPerformance; that answer uses strings to represent two different behaviors; this one uses actual functions. But they are quite similar. The implementation, though is fairly different.
const allMatch = (fns, xs) =>
fns.every ( (fn, i) => fn ( xs[i] ) )
const splitByRules = (rules) => {
const run =
( xs
, res = []
, [count] = rules .find
( ([count, ...fns]) =>
count <= xs .length
&& allMatch (fns, xs)
)
|| [1] // if no rules match, choose next word only
) => xs.length === 0
? res
: run
( xs .slice (count)
, res .concat ([xs .slice (0, count) ])
)
return (str) =>
run (str .split (/\s+/) )
.map (ss => ss .join (' '))
}
const shorterThan = (n) => (s) =>
s .length < n
const atLeast = (n) => (s) =>
s .length >= n
const lt6 = shorterThan (6)
const gte6 = atLeast (6)
const rules = [
// +------------- Number of words to select in next block
// | +--------- Functions to test againt each word
// | _____|_____
// V / \
[3, lt6, lt6, lt6], // A
[2, lt6, lt6, gte6], // B
[1, gte6, gte6], // C
[2, gte6, lt6], // D
[2, lt6, gte6], // E
]
const words = 'Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia ...';
console .log (
splitByRules (rules) (words)
)
This uses a recursive function that bottoms out when the remaining list of words is empty and otherwise searches for the first rule that matches (with, again like CertainPerformance, a default rule that simply takes the next word) and selects the corresponding number of words, recurring on the remaining words.
For simplicity, the recursive function accepts an array of words and returns an array of arrays of words. A wrapper function handles converting these to and from strings.
The only other function of substance in here is the helper function allMatch
. It is essentially ([f1, f2, ... fn], [x1, x2, ..., xn, ...]) => f1(x1) && f2(x2) && ... && fn(xn)
.
Of course the currying means that splitByRules (myRules)
returns a function you can store and run against different strings.
The order of the rules might be important. If two rules could overlap, you need to put the preferred match ahead of the the other.
This added generality may or may not be of interest to you, but I think this technique has a significant advantage: it's much easier to modify if the rules ever change. Say you now also want to include four words, if they all are fewer than five characters long. Then we would just write const lt5 = shorterThan(5)
and include the rule
[4, lt5, lt5, lt5, lt5]
at the beginning of the list.
To me that's a big win.
Upvotes: 2
Reputation: 875
This sounds like a problem you would get during a job interview or on a test. The right way to approach this problem is to think about how to simplify the problem into something that we can understand and write legible code for.
We know that there are two conditions: smaller than six or not. We can represent each word in the string as a binary digit being 0(smaller than 6) or 1(larger than 6).
Turning the string of words into a string of binary will make it easier to process and understand:
const s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
const b = s.split(' ').reduce((array, word) => {
return array + (word.length >= 6 ? "1" : "0");
}, "");
console.log(b);
Next we need to simplify the rules. Each rule can be thought of as a string of binary(a set of words). Since some rules are more complicated than others, adding the next word we will think of as part of the string:
For a string of numbers remaining, whichever rule fits at the beginning will be the next set of strings. This is a pretty simple logical operation:
const s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
let b = s.split(' ').reduce((array, word) => {
return array + (word.length >= 6 ? "1" : "0");
}, "");
//console.log(b);
let a = '';
while (b != "") {
switch (0) {
case b.indexOf('000'):
b = b.substring(3);
a += '3';
break;
case b.indexOf('10'):
b = b.substring(2);
a += '2';
break;
case b.indexOf('01'):
b = b.substring(2);
a += '2';
break;
case b.indexOf('001'):
b = b.substring(2);
a += '2';
break;
case b.indexOf('11'):
b = b.substring(1);
a += '1';
break;
}
}
console.log(a);
//Go through the string of multi-word lengths and turn the old string into separate strings.
const acc = [];
words = s.split(' ');
for (let index in a) {
acc.push(words.splice(0, a[index]).join(' '));
}
console.log(acc);
YAY! We successfully converted a complex problem into something easy to understand. While this is not the shortest solution, it is very elegant, and there is still room for improvement without sacrificing readability(compared to some other solutions).
This way of conceptualizing the problem opens doors for more rules or even more complex states(0,1,2).
Upvotes: 1
Reputation: 2047
No tricks needed. This code traverses the array of words, and check the rules for each sequence of 3. The rules are applied trying to do less loops and creating less intermediary objects possible, resulting in a good performance and memory usage.
function apply_rules(stack, stack_i) {
let small_word_cnt = 0;
for(let i = 0; i<= 2; i++){
//Not enough elements to trigger a rule
if(!stack[stack_i+i]){
return stack.slice(stack_i, stack.length);
}
//Increment the small word counter
small_word_cnt += stack[stack_i+i].length < 6;
//2 big words
if(i== 1 && small_word_cnt == 0){
return [stack[stack_i]];
}
//3 small words
if(small_word_cnt == 3){
return stack.slice(stack_i,stack_i+3);
}
}
//mixed small and big words;
return stack.slice(stack_i,stack_i+2);
}
function split_text(text) {
const words = text.split(' '), results = [];
let i = 0;
while(i < words.length) {
const chunk = apply_rules(words, i);
i+= chunk.length;
results.push(chunk.join(' '));
}
return results;
}
console.log(split_text("Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia"));
Upvotes: 1
Reputation: 135227
I also found this problem very interesting. This is a long-format answer which shows the process of how I arrived at the final program. There are several code blocks labeled sketch
along the way. I hope for this approach to be helpful to beginners in functional style.
Using the data.maybe module, I started out with -
// sketch 1
const wordsToLines = (words = [], r = []) =>
words.length === 0
? Just (r)
: ruleA (words)
.orElse (_ => ruleB (words))
.orElse (_ => ruleC (words))
.orElse (_ => ruleD (words))
.orElse (_ => ruleE (words))
.orElse (_ => defaultRule (words))
.chain (({ line, next }) =>
wordsToLines (next, [...r, line ])
)
Then I started writing some of the rules ...
// sketch 2
const success = (line, next) =>
Just ({ line, next })
const defaultRule = ([ line, ...next ]) =>
success (line, next)
const ruleA = ([ a, b, c, ...more ]) =>
small (a) && small (b) && small(c)
? success (line (a, b, c), more)
: Nothing ()
const ruleB = ([ a, b, c, ...more ]) =>
small (a) && small (b) && large (c)
? success (line (a, b), [c, ...more])
: Nothing ()
// ...
Way too messy and repetitive, I thought. As the author of these functions, it's my job to make them work for me! So I restarted this time designing the rules to do the hard work -
// sketch 3
const rule = (guards = [], take = 0) =>
// TODO: implement me...
const ruleA =
rule
( [ small, small, small ] // pattern to match
, 3 // words to consume
)
const ruleB =
rule ([ small, small, large ], 2)
// ruleC, ruleD, ruleE, ...
const defaultRule =
rule ([ always (true) ], 1)
These rules are much simpler. Next, I wanted to clean up wordsToLines
a bit -
// sketch 4
const wordsToLines = (words = [], r = []) =>
words.length === 0
? Just (r)
: oneOf (ruleA, ruleB, ruleC, ruleD, ruleE, defaultRule)
(words)
.chain (({ line, next }) =>
wordsToLines (next, [...r, line ])
)
In our initial sketch, the rules constructed a {line, next}
object, but a higher-order rule
means we can hide even more complexity away. And the oneOf
helper makes it easy to move our rules inline -
// final revision
const wordsToLines = (words = [], r = []) =>
words.length === 0
? Just (r)
: oneOf
( rule ([ small, small, small ], 3) // A
, rule ([ small, small, large ], 2) // B
, rule ([ large, large ], 1) // C
, rule ([ large, small ], 2) // D
, rule ([ small, large ], 2) // E
, rule ([ always (true) ], 1) // default
)
([ words, r ])
.chain (apply (wordsToLines))
Finally, we can write our main function, formatSentence
-
const formatSentence = (sentence = "") =>
wordsToLines (sentence .split (" "))
.getOrElse ([])
The wires are mostly untangled now. We just have to supply the remaining dependencies -
const { Just, Nothing } =
require ("data.maybe")
const [ small, large ] =
dual ((word = "") => word.length < 6)
const oneOf = (init, ...more) => x =>
more.reduce((r, f) => r .orElse (_ => f(x)), init (x))
const rule = (guards = [], take = 0) =>
([ words = [], r = [] ]) =>
guards .every ((g, i) => g (words[i]))
? Just
( [ words .slice (take)
, [ ...r, words .slice (0, take) .join (" ") ]
]
)
: Nothing ()
And some functional primitives -
const identity = x =>
x
const always = x =>
_ => x
const apply = (f = identity) =>
(args = []) => f (...args)
const dual = f =>
[ x => Boolean (f (x))
, x => ! Boolean (f (x))
]
Let's run the program -
formatSentence ("Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia ...")
// [ 'Lorem ipsum dolor'
// , 'sit amet'
// , 'consectetur'
// , 'adipiscing elit'
// , 'sed doeiusmod'
// , 'tempor'
// , 'incididunt ut'
// , 'Duis aute irure'
// , 'dolor in'
// , 'reprehenderit in'
// , 'esse cillum'
// , 'dolor eu fugia'
// , '...'
// ]
View the complete program on repl.it and run it to see the results -
Upvotes: 3
Reputation: 214969
You can express your rules as abbreviated regular expressions, build a real regex from them and apply it to your input:
text = "Lorem ipsum, dolor. sit amet? consectetur, adipiscing, elit! sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia bla?";
rules = ['(SSS)', '(SS(?=L))', '(L(?=L))', '(SL)', '(LS)', '(.+)']
regex = new RegExp(
rules
.join('|')
.replace(/S/g, '\\w{1,5}\\W+')
.replace(/L/g, '\\w{6,}\\W+')
, 'g')
console.log(text.match(regex))
If the rules don't change, the regex construction part is only needed once.
Note that this also handles punctuation in a reasonable way.
Upvotes: 6
Reputation: 370769
One option is to first create an array of rules, like:
const rules = [
// [# of words to splice if all conditions met, condition for word1, condition for word2, condition for word3...]
[3, 'less', 'less', 'less'],
// the above means: splice 3 words if the next 3 words' lengths are <6, <6, <6
[2, 'less', 'less', 'eqmore'],
// the above means: splice 2 words if the next 3 words' lengths are <6, <6, >=6
[1, 'eqmore', 'eqmore'],
[2, 'eqmore', 'less'],
[2, 'less', 'eqmore']
];
Then iterate through the array of rules, finding the rule that matches, extracting the appropriate number of words to splice from the matching rule, and push to the output array:
const rules = [
[3, 'less', 'less', 'less'],
[2, 'less', 'less', 'eqmore'],
[1, 'eqmore', 'eqmore'],
[2, 'eqmore', 'less'],
[2, 'less', 'eqmore']
];
const s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
const words = s.split(' ');
const output = [];
const verify = (cond, word) => cond === 'less' ? word.length < 6 : word.length >= 6;
while (words.length) {
const [wordCount] = rules.find(
([wordCount, ...conds]) => conds.every((cond, i) => verify(cond, words[i]))
);
output.push(words.splice(0, wordCount).join(' '));
}
console.log(output);
Of course, the .find
assumes that every input string will always have a matching rule for each position spliced.
For the additional rule that any words not matched by the previous rules just be added to the output, put [1]
into the bottom of the rules
array:
const rules = [
[3, 'less', 'less', 'less'],
[2, 'less', 'less', 'eqmore'],
[1, 'eqmore', 'eqmore'],
[2, 'eqmore', 'less'],
[2, 'less', 'eqmore'],
[1]
];
const s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusmod tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
const words = s.split(' ');
const output = [];
const verify = (cond, word) => cond === 'less' ? word.length < 6 : word.length >= 6;
while (words.length) {
const [wordCount] = rules.find(
([wordCount, ...conds]) => conds.every((cond, i) => words[i] && verify(cond, words[i]))
);
output.push(words.splice(0, wordCount).join(' '));
}
console.log(output);
Upvotes: 5
Reputation: 2198
If we define words with length <6 to have size 1 and >=6 to have size 2, we can rewrite the rules to "if the next word would make the total size of the current row >= 4, start next line".
function wordSize(word) {
if (word.length < 6)
return 1;
return 2;
}
let s = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed doeiusd tempor incididunt ut Duis aute irure dolor in reprehenderit in esse cillum dolor eu fugia";
var result = [];
var words = s.split(" ");
var row = [];
for (var i = 0; i < words.length; ++i) {
if (row.reduce((s, w) => s + wordSize(w), 0) + wordSize(words[i]) >= 4) {
result.push(row);
row = [];
}
row.push(words[i]);
}
result.push(row);
result = result.map(a => a.join(" "));
console.log(result);
Upvotes: 5