Check if image A exists in image B

Question

I need to check if an image exists in another image using JavaScript, I need to know what are the best approaches (algorithm) and solutions (ex: librarie) to do this operations

I explained what I need to do in this image:

Blindman67 · Accepted Answer

Using the GPU to help in image processing.

Using the 2D API and some simple tricks you can exploit the GPUs power to speed up Javascript.

Difference

To find an image you need to compare the pixels you are looking for (A) against the pixels in the image (B). If the difference between the Math.abs(A-B) === 0 then the pixels are the same.

A function to do this may look like the following

function findDif(imageDataSource, imageDataDest, xx,yy)
    const ds = imageDataSource.data;
    const dd = imageDataDest.data;
    const w =  imageDataSource.width;
    const h =  imageDataSource.height;
    var x,y;
    var dif = 0;
    for(y = 0; y < h; y += 1){
        for(x = 0; x < w; x += 1){
            var indexS = (x + y * w) * 4;
            var indexD = (x + xx + (y + yy) * imageDataDest.width) * 4;
            dif += Math.abs(ds[indexS]-dd[indexD]);
            dif += Math.abs(ds[indexS + 1]-dd[indexD + 1]);
            dif += Math.abs(ds[indexS + 2]-dd[indexD + 2]);
         }
     }
     return dif;
  }

var source = sourceCanvas.getContext("2d").getImageData(0,0,sourceCanvas.width,sourceCanvas.height);
var dest = destinationCanvas.getContext("2d").getImageData(0,0,destinationCanvas.width,destinationCanvas.height);

if(findDif(source,dest,100,100)){ // is the image at 100,100?
     // Yes image is very similar
}

Where the source is the image we are looking for and the dest is the image we want to find it in. We run the function for every location that the image may be and if the result is under a level then its a good chance we have found it.

But this is very very slow in JS. This is where the GPU can help. Using the ctx.globalCompositeOperation = "difference"; operation we can speed up the process as it will do the difference calculation for us

When you render with the comp operation "difference" the resulting pixels are the difference between the pixels you are drawing and those that are already on the canvas. Thus if you draw on something that is the same the result is all pixels are black (no difference)

To find a similar image in the image you render the image you are testing for at each location on the canvas that you want to test for. Then you get the sum of all the pixels you just rendered on, if the result is under a threshold that you have set then the image under that area is very similar to the image you are testing for.

But we still need to count all the pixels one by one.

A GPU mean function

The comp op "difference" already does the pixel difference calculation for you, but to get the sum you can use the inbuilt image smoothing.

After you have rendered to find the difference you take that area and render it at a smaller scale with ctx.imageSmoothingEnabled = true the default setting. The GPU will do something similar to an average and can reduce the amount of work JS has to do by several orders of magnitude.

Now instead of 100s or 1000s of pixels you can reduce it down to as little at 4 or 16 depending on the accuracy you need.

An example.

Using these methods you can get a near realtime image in image search with just the basic numerical analysis.

Click to start a test. Results are shown plus the time it took. The image that is being searched for is in the top right.

//------------------------------------------------------------------------
// Some helper functions 
var imageTools = (function () {
    var tools = {
        canvas(width, height) {  // create a blank image (canvas)
            var c = document.createElement("canvas");
            c.width = width;
            c.height = height;
            return c;
        },
        createImage : function (width, height) {
            var i = this.canvas(width, height);
            i.ctx = i.getContext("2d");
            return i;
        },
        image2Canvas(img) {
            var i = this.canvas(img.width, img.height);
            i.ctx = i.getContext("2d");
            i.ctx.drawImage(img, 0, 0);
            return i;
        },
        copyImage(img){ // just a named stub
            return this.image2Canvas(img);
        },
    };
    return tools;
})();
const U = undefined; 
const doFor = (count, callback) => {var i = 0; while (i < count && callback(i ++) !== true ); };
const setOf = (count, callback) => {var a = [],i = 0; while (i < count) { a.push(callback(i ++)) } return a };
const randI = (min, max = min + (min = 0)) => (Math.random() * (max - min) + min) | 0;
const rand  = (min, max = min + (min = 0)) => Math.random() * (max - min) + min;
const randA = (array) => array[(Math.random() * array.length) | 0];
const randG  = (min, max = min + (min = 0)) => Math.random() * Math.random() * Math.random() * Math.random() * (max - min) + min;

// end of helper functions
//------------------------------------------------------------------------


function doit(){
  document.body.innerHTML = ""; // clear the page;
  var canvas = document.createElement("canvas");
  document.body.appendChild(canvas);
  var ctx = canvas.getContext("2d");
  // a grid of 36 images
  canvas.width = 6 * 64;
  canvas.height = 6 * 64;
  console.log("test");

  // get a random character to look for
  const digit = String.fromCharCode("A".charCodeAt(0) + randI(26));
  // get some characters we dont want
  const randomDigits = setOf(6,i=>{
      return String.fromCharCode("A".charCodeAt(0) + randI(26));
  })
  randomDigits.push(digit); // add the image we are looking for
  
  var w = canvas.width;
  var h = canvas.height;
  
  // create a canvas for the image we are looking for
  const imageToFind = imageTools.createImage(64,64);
  
  // and a larger one to cover pixels on the sides
  const imageToFindExtend = imageTools.createImage(128,128);
  
  // Draw the character onto the image with a white background and scaled to fit
  imageToFindExtend.ctx.fillStyle = imageToFind.ctx.fillStyle = "White";
  imageToFind.ctx.fillRect(0,0,64,64);
  imageToFindExtend.ctx.fillRect(0,0,128,128);
  ctx.font = imageToFind.ctx.font = "64px arial black";
  ctx.textAlign = imageToFind.ctx.textAlign = "center";
  ctx.textBaseline = imageToFind.ctx.textBaseline = "middle";
  const digWidth = imageToFind.ctx.measureText(digit).width+8;
  const scale = Math.min(1,64/digWidth);
  imageToFind.ctx.fillStyle = "black";
  imageToFind.ctx.setTransform(scale,0,0,scale,32,32);
  imageToFind.ctx.fillText(digit,0,0);
  imageToFind.ctx.setTransform(1,0,0,1,0,0);
  imageToFindExtend.ctx.drawImage(imageToFind,32,32);
  imageToFind.extendedImage = imageToFindExtend;
  
  // Now fill the canvas with images of other characters 
  ctx.fillStyle = "white";
  ctx.setTransform(1,0,0,1,0,0);
  ctx.fillRect(0,0,w,h);
  ctx.fillStyle = "black";
  ctx.strokeStyle = "white";
  ctx.lineJoin = "round";
  ctx.lineWidth = 12;
  
  // some characters will be rotated 90,180,-90 deg
  const dirs = [
      [1,0,0,1,0,0],
      [0,1,-1,0,1,0],
      [-1,0,0,-1,1,1],
      [0,-1,1,0,0,1],

  ]
  // draw random characters at random directions
  doFor(h / 64, y => {
      doFor(w / 64, x => {
          const dir = randA(dirs)
          ctx.setTransform(dir[0] * scale,dir[1] * scale,dir[2] * scale,dir[3] * scale,x * 64 + 32, y * 64 + 32);
          const d = randA(randomDigits);
          ctx.strokeText(d,0,0);
          ctx.fillText(d,0,0);
      });
  });  
  ctx.setTransform(1,0,0,1,0,0);
  
  // get a copy of the canvas
  const saveCan = imageTools.copyImage(ctx.canvas);
  
  // function that finds the images
  // image is the image to find
  // dir is the matrix direction to find 
  // smapleSize is the mean sampling size samller numbers are quicker
  function checkFor(image,dir,sampleSize){
      const can = imageTools.copyImage(saveCan);
      const c = can.ctx;
      const stepx = 64;
      const stepy = 64;
      // the image that will contain the reduced means of the differences
      const results = imageTools.createImage(Math.ceil(w / stepx) * sampleSize,Math.ceil(h / stepy) * sampleSize);
      const e = image.extendedImage;
      // for each potencial image location 
      // set a clip area and draw the source image on it with
      // comp mode  "difference";
      for(var y = 0 ; y < h; y += stepy ){
          for(var x = 0 ; x < w; x += stepx ){
              c.save();
              c.beginPath();
              c.rect(x,y,stepx,stepy);
              c.clip();
              c.globalCompositeOperation = "difference";
              c.setTransform(dir[0],dir[1],dir[2],dir[3],x +32 ,y +32 );
              c.drawImage(e,-64,-64);
              c.restore();
          }

      }
      // Apply the mean (reducing nnumber of pixels to check
      results.ctx.drawImage(can,0,0,results.width,results.height);
      // get the pixel data
      var dat = new Uint32Array(results.ctx.getImageData(0,0,results.width,results.height).data.buffer);
      // for each area get the sum of the difference
      for(var y = 0; y < results.height; y += sampleSize){
          for(var x = 0; x < results.width; x += sampleSize){
              var val = 0;
              for(var yy = 0; yy < sampleSize && y+yy < results.height; yy += 1){
                  var i = x + (y+yy)*results.width;
                  for(var xx = 0; xx < sampleSize && x + xx < results.width ; xx += 1){
                      val += dat[i++] & 0xFF;
                  }
              }
              // if the sum is under the threshold we have found an image
              // and we mark it
              if(val < sampleSize * sampleSize * 5){
                  ctx.strokeStyle = "red";
                  ctx.fillStyle = "rgba(255,0,0,0.5)";
                  ctx.lineWidth = 2;
                  ctx.strokeRect(x * (64/sampleSize),y * (64/sampleSize),64,64);
                  ctx.fillRect(x * (64/sampleSize),y * (64/sampleSize),64,64);
                  foundCount += 1;
              }
          }
      }
  }
  var foundCount = 0;
  // find the images at different orientations 
  var now = performance.now();
  checkFor(imageToFind,dirs[0],4);
  checkFor(imageToFind,dirs[1],6); // rotated images need larger sample size
  checkFor(imageToFind,dirs[2],6);
  checkFor(imageToFind,dirs[3],6);
  var time = performance.now() - now;
  var result = document.createElement("div");
  result.textContent = "Found "+foundCount +" matching images in "+time.toFixed(3)+"ms. Click to try again.";
  document.body.appendChild(result);
  
  // show the image we are looking for
  imageToFind.style.left = (64*6 + 16) + "px";
  imageToFind.id = "lookingFor";
  document.body.appendChild(imageToFind);
}

document.addEventListener("click",doit);

canvas { 
    border : 2px solid black;
    position : absolute;
    top : 28px;
    left : 2px;
}
#lookingFor { 
    border : 4px solid red;
}
div { 
    border : 2px solid black;
    position : absolute;
    top : 2px;
    left : 2px;
}

Click to start test.

Not perfect

The example is not perfect and will sometimes make mistakes. There is a huge amount of room for improving both the accuracy and the speed. This is just something I threw together as an example to show how to use the GPU via the 2D API. Some further maths will be needed to find the statistically good results.

This method can also work for different scales, and rotations, you can even use some of the other comp modes to remove colour and normalize contrast. I have used a very similar approch to stabilize webcam by tracking points from one frame to the next, and a veriaty of other image tracking uses.