Reputation: 67
How can I extract a number from an image using Javascript with OCR with Tesseract.js, then add the number together.
I could extract the number in words using this code but I do not know how to convert them to array and then sum the four numbers together. let say the number image that I want to scan on my phone is 4567.
If I change the text to number from console.log it doesn't show the number 4567
var myImage = document.getElementById('userImage');
Tesseract.recognize(myImage).then(function(result) {
console.log(result.text);
})
<script src='https://cdn.rawgit.com/naptha/tesseract.js/1.0.10/dist/tesseract.js'></script>
<img id="userImage" src="https://via.placeholder.com/728x90.png?text=4567" />
Upvotes: 5
Views: 8961
Reputation: 359
function startProcessing() {
let img = document.getElementById('ocr');
Tesseract.recognize(
img,
'eng', {
logger: m => console.log(m)
}
).then((res) => res).then(({
data
}) => {
console.log(data.text, typeof(data.text)); // returns type as string
console.log(Number(data.text), parseInt(data.text)); // converting string to number
// Array with number i.e '4567' --> [4,5,6,7]
let convertedNumber = [...data.text].map((num) => Number(num));
// Calculating the sum of the numbers in convertedNumber Array
let sum = convertedNumber.reduce((acc, curr) => acc + curr);
console.log(':::SUM:::', sum);
})
}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
<script src='https://unpkg.com/[email protected]/dist/tesseract.min.js'></script>
<script src="index.js"></script>
</head>
<body>
<img id="ocr" onload="startProcessing()" width="500px" height="300px" src="./ocr1.png" alt="random">
<!-- image downloaded from link http://podam.org/ocr/ocr.html -->
<!-- Make sure you download the image and save it locally if you try to fetch it from url you will get CORS error -->
</body>
</html>
<img id="userImage" src="ocr.png"/>
var myImage= document.getElementById('userImage'); Tesseract.recognize(myImage).then(function(result){ console.log(result.text); alert(result.text); })
Note: This solution works provided we scan only numbers, if it is combination of both then some more conditions might require to handle string + number situations, Also make sure image is clear enough with porper contrast
Upvotes: 0
Reputation: 359
Just created a sample or a simple implementation, not the best code but obviously you can refer this https://github.com/Mondal10/image-scanner
Upvotes: 3