Reputation:
My function goes through phrases in the text and displays all the phrases that appear at least once in the text. I didn't find a way to not show a phrase that appears only once in the text.
function toPhrases(text, wordCount) {
const words = text.match(/[\w\u0402-\u045f]+/ig)
const phrases = new Map()
for (let i = 0; i < words.length; i++) {
let phrase = words.slice(i, i + wordCount).join(' ')
let hashedPhrases = phrases.get(phrase)
if (hashedPhrases) {
phrases.set(phrase, hashedPhrases + 1)
} else {
phrases.set(phrase, 1)
}
if (i + wordCount >= words.length) {
break
}
}
return phrases
}
function createPhrases() {
const text = document.getElementById('textarea').value;
document.getElementById('output-2').innerHTML = JSON.stringify([...toPhrases(text.toString(), 2)]);
document.getElementById('output-3').innerHTML = JSON.stringify([...toPhrases(text.toString(), 3)]);
document.getElementById('output-4').innerHTML = JSON.stringify([...toPhrases(text.toString(), 4)]);
}
Upvotes: 0
Views: 63
Reputation: 4226
You can filter your results by phrase-count like this:
const thing = "do the thing that is the thing that you do";
console.log([...toPhrases(thing, 2)]);
function toPhrases(text, wordCount) {
const words = text.match(/[\w\u0402-\u045f]+/ig)
const phrases = new Map();
for (let i = 0; i < words.length; i++) {
let
phrase = words.slice(i, i + wordCount).join(' '),
hashedPhrases = phrases.get(phrase);
if (hashedPhrases) { phrases.set(phrase, hashedPhrases + 1); }
else { phrases.set(phrase, 1); }
if (i + wordCount >= words.length) { break; }
}
// Each member of phrases is actually a two-element array like [phrase, count], so...
let duplicatePhrases = [...phrases].filter(phrase => phrase[1] > 1);
return duplicatePhrases;
}
Upvotes: 0
Reputation: 5773
This should work:
function toPhrases(text, wordCount){
const words = text.match(/[\w\u0402-\u045f]+/ig)
const groups = words.reduce((acc, w) => {
acc[w] = (acc[w] + 1) || 1;
return acc;
}, {});
// group is an object where keys are all the words and values are the occurrence of that word
// now filter to get all the words that has only one occurrence
return Object.keys(groups).filter(k => groups[k] === 1)
}
Upvotes: 1