Reputation: 341
how can i remove background noise and leave only the text? Example image:
My code:
var Tesseract = require('tesseract.js');
var Jimp = require("jimp");
Jimp.read("12.png").then(function (image) {
image
.color([
{ apply: 'brighten', params: [20] }
])
.contrast(1)
.greyscale()
.write("img-opt.jpg");
})
.then(function() {
Tesseract.recognize('img-opt.jpg', {
tessedit_char_whitelist: 'AN%D%P'
})
.progress(function(message){console.log(message)})
.catch(function(err){console.error(err)})
.then(function(result){console.log(result.text)})
})
.catch(function (err) {
console.error(err);
});
And i got this result:
So, how can i can remove background noise? Thanks!
Upvotes: 3
Views: 3745
Reputation: 38543
I got better results by removing the brigthen
transformation.
Also desaturate
seems to work better than grayscale
.
image
.color([{apply: 'desaturate', params: [90]}])
.contrast(1)
.write("img-opt.jpg");
You just need a little bit of trial and error.
Upvotes: 2