user12016595
user12016595

Reputation:

How not to display a phrase that only appears once in a string?

My function goes through phrases in the text and displays all the phrases that appear at least once in the text. I didn't find a way to not show a phrase that appears only once in the text.

function toPhrases(text, wordCount) {
    const words = text.match(/[\w\u0402-\u045f]+/ig)
    const phrases = new Map()
    for (let i = 0; i < words.length; i++) {
        let phrase = words.slice(i, i + wordCount).join(' ')
        let hashedPhrases = phrases.get(phrase)
        if (hashedPhrases) {
            phrases.set(phrase, hashedPhrases + 1)
        } else {
            phrases.set(phrase, 1)
        }
        if (i + wordCount >= words.length) {
            break
        }
    }
    return phrases
}

function createPhrases() {
    const text = document.getElementById('textarea').value;
    document.getElementById('output-2').innerHTML = JSON.stringify([...toPhrases(text.toString(), 2)]);
    document.getElementById('output-3').innerHTML = JSON.stringify([...toPhrases(text.toString(), 3)]);
    document.getElementById('output-4').innerHTML = JSON.stringify([...toPhrases(text.toString(), 4)]);
}

Upvotes: 0

Views: 63

Answers (2)

Cat
Cat

Reputation: 4226

You can filter your results by phrase-count like this:

const thing = "do the thing that is the thing that you do";
console.log([...toPhrases(thing, 2)]);

function toPhrases(text, wordCount) {
  const words = text.match(/[\w\u0402-\u045f]+/ig)
  const phrases = new Map();
  for (let i = 0; i < words.length; i++) {
    let
      phrase = words.slice(i, i + wordCount).join(' '),
      hashedPhrases = phrases.get(phrase);
    if (hashedPhrases) { phrases.set(phrase, hashedPhrases + 1); }
    else { phrases.set(phrase, 1); }
    if (i + wordCount >= words.length) { break; }
  }
  // Each member of phrases is actually a two-element array like [phrase, count], so...
  let duplicatePhrases = [...phrases].filter(phrase => phrase[1] > 1);
  return duplicatePhrases;
}

Upvotes: 0

ema
ema

Reputation: 5773

This should work:

function toPhrases(text, wordCount){
  const words = text.match(/[\w\u0402-\u045f]+/ig)

  const groups = words.reduce((acc, w) => {
    acc[w] = (acc[w] + 1) || 1;
    return acc;
  }, {});

  // group is an object where keys are all the words and values are the occurrence of that word


  // now filter to get all the words that has only one occurrence
  return Object.keys(groups).filter(k => groups[k] === 1)

}

Upvotes: 1

Related Questions