Avisek Das
Avisek Das

Reputation: 23

Find once and return different strings replaced with different words

const str = 'i love code. i write code everyday.'
const regex = /code/g

const outputJs   = str.replace(regex, 'js'  )
const outputCss  = str.replace(regex, 'css' )
const outputHtml = str.replace(regex, 'html')

Here I am searching for /code/g three times and replacing with different strings.

Is there any way to optimize this code?

Because if the regex gets complicated and the str is very long then the javascript engine will take some time to search for the same thing again and again just to replace with a different word.

I hope you understand what I'm trying to do.

EDIT: A workaround would be:

  1. Generate a set of template strings using the regex.exec(str) and some string manipulation.
  2. Use those template strings to generate output by concatenating replacement strings in-between them.

Upvotes: 1

Views: 122

Answers (4)

phatfingers
phatfingers

Reputation: 10250

If performance is critical, there may be an optimization you can perform using string templates, provided you can constrain your inputs to a finite set of predictable patterns. For example, in your sample code, you are replacing a string literal that occurs twice in your text, but could occur an unknown number of times in other text. If you can be sure that literal occurs, say, 0 to 10 times, then you can write 11 templates and select the appropriate one.

    function interleave(list,word) {
      switch (list.length) {
       case 1:
        return `${list[0]}`; 
       case 2:
        return `${list[0]}${word}${list[1]}`; 
       case 3:
        return `${list[0]}${word}${list[1]}${word}${list[2]}`; 
        // add as many cases as you need
       default:
        return list.join(word);
      }
    }

    let outputJs, outputCss, outputHtml;
    let orig='i love code. i write code everyday.';

    const startTime = Date.now();

    let fragments=orig.split(/\bcode\b/);
    for (let i=0; i<1000000; i++) {
        outputJs = interleave(fragments,'js');
        outputCss = interleave(fragments,'css');
        outputHtml = interleave(fragments,'html');
    }

    const endTime = Date.now();
    console.log('outputJs:   ' + outputJs);
    console.log('outputCss:  ' + outputCss);
    console.log('outputHtml: ' + outputHtml);
    console.log(`time: ${endTime - startTime} ms`);

Adding the improvement AvisekDas mentioned in the comments below (to allow people to run and verify the performance gain).

function interleave(list,word) {
    let v=list[0];
    for (let i=1; i<list.length; i++) {
        v+=word+list[i];
    }
    return v;
}

let outputJs, outputCss, outputHtml;
let orig='i love code. i write code everyday.';

const startTime = Date.now();

let fragments=orig.split(/\bcode\b/);
for (let i=0; i<1000000; i++) {
    outputJs = interleave(fragments,'js');
    outputCss = interleave(fragments,'css');
    outputHtml = interleave(fragments,'html');
}

const endTime = Date.now();
console.log('outputJs:   ' + outputJs);
console.log('outputCss:  ' + outputCss);
console.log('outputHtml: ' + outputHtml);
console.log(`time: ${endTime - startTime} ms`);

Upvotes: 2

phatfingers
phatfingers

Reputation: 10250

If performance is critical, there may be an optimization you can perform using string templates, provided you can constrain your inputs to a finite set of predictable patterns. For example, in your sample code, you are replacing a string literal that occurs twice in your text, but could occur an unknown number of times in other text. If you can be sure that literal occurs, say, 0 to 10 times, then you can write 11 templates and select the appropriate one.

    function interleave(list,word) {
      switch (list.length) {
       case 1:
        return `${list[0]}`; 
       case 2:
        return `${list[0]}${word}${list[1]}`; 
       case 3:
        return `${list[0]}${word}${list[1]}${word}${list[2]}`; 
        // add as many cases as you need
       default:
        return 'ERROR : No qualifying case found.';
      }
    }

    let outputJs, outputCss, outputHtml;
    let orig='i love code. i write code everyday.';

    const startTime = Date.now();

    let fragments=orig.split(/\bcode\b/);
    for (let i=0; i<1000000; i++) {
        outputJs = interleave(fragments,'js');
        outputCss = interleave(fragments,'css');
        outputHtml = interleave(fragments,'html');
    }

    const endTime = Date.now();
    console.log('outputJs:   ' + outputJs);
    console.log('outputCss:  ' + outputCss);
    console.log('outputHtml: ' + outputHtml);
    console.log(`time: ${endTime - startTime} ms`);

Upvotes: 0

Peter Thoeny
Peter Thoeny

Reputation: 7616

You can use a split, and work on the array to do the replace:

const str = 'i love code. i write code everyday.';
let outputJs;
let outputCss;
let outputHtml;
let startTime = new Date();
for(let i = 0; i < 1000000; i++) {
  outputJs = [];
  outputCss = [];
  outputHtml = [];
  str.split(/\b(code)\b/).forEach(item => {
    if(item === 'code') {
      outputJs.push('js');
      outputCss.push('css');
      outputHtml.push('html');
    } else {
      outputJs.push(item);
      outputCss.push(item);
      outputHtml.push(item);
    }
  });
  outputJs = outputJs.join('');
  outputCss = outputCss.join('');
  outputHtml = outputHtml.join('');
}
console.log('outputJs:   ' + outputJs);
console.log('outputCss:  ' + outputCss);
console.log('outputHtml: ' + outputHtml);
console.log('time: ' + (new Date() - startTime) + ' ms');
Output:

outputJs:   i love js. i write js everyday.
outputCss:  i love css. i write css everyday.
outputHtml: i love html. i write html everyday.
time: 1652 ms

In my test 1,000,000 runs took 1652 ms.

Now let's compare that to distinct replaces:

const str = 'i love code. i write code everyday.';
let outputJs;
let outputCss;
let outputHtml;
let startTime = new Date();
for(let i = 0; i < 1000000; i++) {
  outputJs = str.replace(/\b(code)\b/g, 'js');
  outputCss = str.replace(/\b(code)\b/g, 'css');
  outputHtml = str.replace(/\b(code)\b/g, 'html');
}
console.log('outputJs:   ' + outputJs);
console.log('outputCss:  ' + outputCss);
console.log('outputHtml: ' + outputHtml);
console.log('time: ' + (new Date() - startTime) + ' ms');
Output:

outputJs:   i love js. i write js everyday.
outputCss:  i love css. i write css everyday.
outputHtml: i love html. i write html everyday.
time: 494 ms

In my test 1,000,000 runs took 494 ms. As you can see this is 5 times faster, e.g. the split method does make it slower. Regular expressions are highly optimized, only in corner cases it is worth looking for alternatives.

Note that I added the word boundary \b in the regex on purpose to avoid false positives in your input string such as codex, isocodeine, postcode, sarcoderm

Upvotes: 1

Bergi
Bergi

Reputation: 665362

Here's a fairer benchmark:

  • Baseline

    const str = 'i love code. i write code everyday.';
    let outputJs;
    let outputCss;
    let outputHtml;
    const startTime = Date.now();
    for (let i = 0; i < 1000000; i++) {
      outputJs = str.replace(/\bcode\b/g, 'js');
      outputCss = str.replace(/\bcode\b/g, 'css');
      outputHtml = str.replace(/\bcode\b/g, 'html');
    }
    const endTime = Date.now();
    console.log('outputJs:   ' + outputJs);
    console.log('outputCss:  ' + outputCss);
    console.log('outputHtml: ' + outputHtml);
    console.log(`time: ${endTime - startTime} ms`);

  • split once, join thrice:

    const str = 'i love code. i write code everyday.';
    let outputJs;
    let outputCss;
    let outputHtml;
    const startTime = Date.now();
    for (let i = 0; i < 1000000; i++) {
      const parts = str.split(/\bcode\b/g);
      outputJs = parts.join('js');
      outputCss = parts.join('css');
      outputHtml = parts.join('html');
    }
    const endTime = Date.now();
    console.log('outputJs:   ' + outputJs);
    console.log('outputCss:  ' + outputCss);
    console.log('outputHtml: ' + outputHtml);
    console.log(`time: ${endTime - startTime} ms`);

  • split once, use forEach to replace, join by empty string (from @PeterThoeny's answer)

    const str = 'i love code. i write code everyday.';
    let outputJs;
    let outputCss;
    let outputHtml;
    const startTime = Date.now();
    for (let i = 0; i < 1000000; i++) {
      const jsParts = [];
      const cssParts = [];
      const htmlParts = [];
      str.split(/\b(code)\b/).forEach(item => {
        if(item === 'code') {
          jsParts.push('js');
          cssParts.push('css');
          htmlParts.push('html');
        } else {
          jsParts.push(item);
          cssParts.push(item);
          htmlParts.push(item);
        }
      });
      outputJs = jsParts.join('');
      outputCss = cssParts.join('');
      outputHtml = htmlParts.join('');
    }
    const endTime = Date.now();
    console.log('outputJs:   ' + outputJs);
    console.log('outputCss:  ' + outputCss);
    console.log('outputHtml: ' + outputHtml);
    console.log(`time: ${endTime - startTime} ms`);

Actually the original, simple code is the fastest. Regex engines are well-optimised.

Upvotes: 0

Related Questions