user9538877
user9538877

Reputation: 198

How to extract both automated and handwritten text in image using GCP Vision or OpenCV

I have written a piece of code where the automated text along with the written text is present in the image. GCP vision API is not able to give me the correct response. Can any one suggest me any work around ?

This is my image

enter image description here

This is my code:

import os
import io
from google.cloud import vision
from google.cloud.vision import types
import pandas as pd

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'key.json'
client = vision.ImageAnnotatorClient()

IMAGE_FILE = 'datasetone.png'
FILE_PATH = os.path.join(IMAGE_FILE)

with io.open(FILE_PATH, 'rb') as image_file:
    content = image_file.read()

image = vision.types.Image(content=content)
response = client.document_text_detection(image=image)

docText = response.full_text_annotation.text
print(docText)


pages = response.full_text_annotation.pages
for page in pages:
    for block in page.blocks:
        print('block confidence:', block.confidence)

        for paragraph in block.paragraphs:
            print('paragraph confidence:', paragraph.confidence)

            for word in paragraph.words:
                word_text = ''.join([symbol.text for symbol in word.symbols])

                print('Word text: {0} (confidence: {1}'.format(
                    word_text, word.confidence))

                for symbol in word.symbols:
                    print('\tSymbol: {0} (confidence: {1}'.format(
                        symbol.text, symbol.confidence))

The ouput is

Worksheet
Car number: - 1002110021
Data:- Endter
Shell
Qilchange
Phone: 982838238823
email:[email protected]
name
pod
pos
Technician
name
desc
code
112121223 Benz
10-10-19
10-10-20
Jhondoe
nil
12244334
Baleno
20-10-10
| 20-20-10
Machel
nil
D901.27
'RIOR

block confidence: 0.9800000190734863
paragraph confidence: 0.9800000190734863
Word text: Worksheet (confidence: 0.9900000095367432
        Symbol: W (confidence: 0.9900000095367432
        Symbol: o (confidence: 0.9900000095367432
        Symbol: r (confidence: 1.0
        Symbol: k (confidence: 0.9900000095367432
        Symbol: s (confidence: 1.0
        Symbol: h (confidence: 1.0
        Symbol: e (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: t (confidence: 1.0
Word text: Car (confidence: 0.9800000190734863
        Symbol: C (confidence: 0.9800000190734863
        Symbol: a (confidence: 0.9900000095367432
        Symbol: r (confidence: 0.9900000095367432
Word text: number (confidence: 0.9800000190734863
        Symbol: n (confidence: 0.9599999785423279
        Symbol: u (confidence: 0.9800000190734863
        Symbol: m (confidence: 0.9800000190734863
        Symbol: b (confidence: 0.9800000190734863
        Symbol: e (confidence: 1.0
        Symbol: r (confidence: 1.0
Word text: : (confidence: 0.9900000095367432
        Symbol: : (confidence: 0.9900000095367432
Word text: - (confidence: 0.949999988079071
        Symbol: - (confidence: 0.949999988079071
Word text: 1002110021 (confidence: 0.9700000286102295
        Symbol: 1 (confidence: 0.9599999785423279
        Symbol: 0 (confidence: 0.9800000190734863
        Symbol: 0 (confidence: 0.9800000190734863
        Symbol: 2 (confidence: 0.9399999976158142
        Symbol: 1 (confidence: 0.9599999785423279
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9599999785423279
        Symbol: 0 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
Word text: Data (confidence: 0.9900000095367432
        Symbol: D (confidence: 0.9900000095367432
        Symbol: a (confidence: 0.9900000095367432
        Symbol: t (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
Word text: : (confidence: 0.9900000095367432
        Symbol: : (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: Endter (confidence: 0.9900000095367432
        Symbol: E (confidence: 0.9900000095367432
        Symbol: n (confidence: 1.0
        Symbol: d (confidence: 0.9900000095367432
        Symbol: t (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: r (confidence: 1.0
block confidence: 0.9700000286102295
paragraph confidence: 0.9700000286102295
Word text: Shell (confidence: 0.9900000095367432
        Symbol: S (confidence: 0.9900000095367432
        Symbol: h (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: l (confidence: 1.0
        Symbol: l (confidence: 1.0
Word text: Qilchange (confidence: 0.8999999761581421
        Symbol: Q (confidence: 0.6100000143051147
        Symbol: i (confidence: 0.9100000262260437
        Symbol: l (confidence: 0.9100000262260437
        Symbol: c (confidence: 0.8500000238418579
        Symbol: h (confidence: 0.9200000166893005
        Symbol: a (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: g (confidence: 1.0
        Symbol: e (confidence: 1.0
Word text: Phone (confidence: 0.9900000095367432
        Symbol: P (confidence: 0.9900000095367432
        Symbol: h (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: n (confidence: 1.0
        Symbol: e (confidence: 1.0
Word text: : (confidence: 1.0
        Symbol: : (confidence: 1.0
Word text: 982838238823 (confidence: 0.9800000190734863
        Symbol: 9 (confidence: 0.9599999785423279
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 8 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
Word text: email (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: m (confidence: 0.9900000095367432
        Symbol: a (confidence: 0.9900000095367432
        Symbol: i (confidence: 1.0
        Symbol: l (confidence: 0.9900000095367432
Word text: : (confidence: 0.9900000095367432
        Symbol: : (confidence: 0.9900000095367432
Word text: xyz (confidence: 0.9900000095367432
        Symbol: x (confidence: 0.9900000095367432
        Symbol: y (confidence: 0.9900000095367432
        Symbol: z (confidence: 0.9900000095367432
Word text: @ (confidence: 0.9900000095367432
        Symbol: @ (confidence: 0.9900000095367432
Word text: xyz (confidence: 0.9700000286102295
        Symbol: x (confidence: 0.949999988079071
        Symbol: y (confidence: 0.9900000095367432
        Symbol: z (confidence: 0.9900000095367432
Word text: . (confidence: 0.9900000095367432
        Symbol: . (confidence: 0.9900000095367432
Word text: com (confidence: 0.9900000095367432
        Symbol: c (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: m (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: name (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: m (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: pod (confidence: 0.9900000095367432
        Symbol: p (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: d (confidence: 0.9900000095367432
block confidence: 0.5699999928474426
paragraph confidence: 0.5699999928474426
Word text: pos (confidence: 0.5699999928474426
        Symbol: p (confidence: 0.20000000298023224
        Symbol: o (confidence: 0.9900000095367432
        Symbol: s (confidence: 0.5400000214576721
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: Technician (confidence: 0.9900000095367432
        Symbol: T (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: c (confidence: 0.9900000095367432
        Symbol: h (confidence: 1.0
        Symbol: n (confidence: 1.0
        Symbol: i (confidence: 1.0
        Symbol: c (confidence: 1.0
        Symbol: i (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: n (confidence: 1.0
Word text: name (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: m (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: desc (confidence: 0.9900000095367432
        Symbol: d (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
        Symbol: s (confidence: 1.0
        Symbol: c (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: code (confidence: 0.9900000095367432
        Symbol: c (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: d (confidence: 0.9900000095367432
        Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 112121223 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 1.0
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 1.0
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
Word text: Benz (confidence: 0.9900000095367432
        Symbol: B (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: z (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 1.0
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 19 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 9 (confidence: 0.9900000095367432
block confidence: 0.9800000190734863
paragraph confidence: 0.9800000190734863
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 20 (confidence: 0.9800000190734863
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9800000190734863
block confidence: 0.9599999785423279
paragraph confidence: 0.9599999785423279
Word text: Jhondoe (confidence: 0.9599999785423279
        Symbol: J (confidence: 0.8100000023841858
        Symbol: h (confidence: 0.9900000095367432
        Symbol: o (confidence: 1.0
        Symbol: n (confidence: 0.9900000095367432
        Symbol: d (confidence: 0.9900000095367432
        Symbol: o (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9800000190734863
block confidence: 0.9700000286102295
paragraph confidence: 0.9700000286102295
Word text: nil (confidence: 0.9700000286102295
        Symbol: n (confidence: 0.9900000095367432
        Symbol: i (confidence: 0.9900000095367432
        Symbol: l (confidence: 0.9399999976158142
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 12244334 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 1.0
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 4 (confidence: 0.9900000095367432
        Symbol: 4 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 3 (confidence: 0.9900000095367432
        Symbol: 4 (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: Baleno (confidence: 0.9900000095367432
        Symbol: B (confidence: 0.9900000095367432
        Symbol: a (confidence: 1.0
        Symbol: l (confidence: 0.9900000095367432
        Symbol: e (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: o (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 20 (confidence: 0.9900000095367432
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
        Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9900000095367432
block confidence: 0.8899999856948853
paragraph confidence: 0.8899999856948853
Word text: | (confidence: 0.4300000071525574
        Symbol: | (confidence: 0.4300000071525574
Word text: 20 (confidence: 0.8399999737739563
        Symbol: 2 (confidence: 0.699999988079071
        Symbol: 0 (confidence: 0.9800000190734863
Word text: - (confidence: 0.949999988079071
        Symbol: - (confidence: 0.949999988079071
Word text: 20 (confidence: 0.9800000190734863
        Symbol: 2 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 0.9800000190734863
Word text: - (confidence: 1.0
        Symbol: - (confidence: 1.0
Word text: 10 (confidence: 0.9900000095367432
        Symbol: 1 (confidence: 0.9900000095367432
        Symbol: 0 (confidence: 1.0
block confidence: 0.9300000071525574
paragraph confidence: 0.9300000071525574
Word text: Machel (confidence: 0.9300000071525574
        Symbol: M (confidence: 0.9200000166893005
        Symbol: a (confidence: 0.949999988079071
        Symbol: c (confidence: 0.9200000166893005
        Symbol: h (confidence: 0.9599999785423279
        Symbol: e (confidence: 0.949999988079071
        Symbol: l (confidence: 0.9200000166893005
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: nil (confidence: 0.9900000095367432
        Symbol: n (confidence: 0.9900000095367432
        Symbol: i (confidence: 1.0
        Symbol: l (confidence: 0.9900000095367432
block confidence: 0.7599999904632568
paragraph confidence: 0.7599999904632568
Word text: D901 (confidence: 0.7200000286102295
        Symbol: D (confidence: 0.6499999761581421
        Symbol: 9 (confidence: 0.8700000047683716
        Symbol: 0 (confidence: 0.49000000953674316
        Symbol: 1 (confidence: 0.8999999761581421
Word text: . (confidence: 0.6399999856948853
        Symbol: . (confidence: 0.6399999856948853
Word text: 27 (confidence: 0.8600000143051147
        Symbol: 2 (confidence: 0.9399999976158142
        Symbol: 7 (confidence: 0.7799999713897705
Word text: ' (confidence: 0.9300000071525574
        Symbol: ' (confidence: 0.9300000071525574
Word text: RIOR (confidence: 0.7400000095367432
        Symbol: R (confidence: 0.9599999785423279
        Symbol: I (confidence: 0.7799999713897705
        Symbol: O (confidence: 0.8399999737739563
        Symbol: R (confidence: 0.4000000059604645

The last column in the dataset where the code is present is handwritten so can anyone help me how to identify the text when handwritten and automated text is present?

Also is there any way to find out and print the data as per each row in the opencv if not google cloud vision?

Upvotes: 4

Views: 2801

Answers (1)

nathancy
nathancy

Reputation: 46600

You can try preprocessing the image by removing the horizontal and vertical lines which may result in better output

Method #1: Horizontal/vertical kernel and "filling" in contours to remove lines

enter image description here

import cv2

image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(thresh, [c], -1, (0,0,0), 1)

# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,30))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=1)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(thresh, [c], -1, (0,0,0), 1)

# Remove small noise
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    area = cv2.contourArea(c)
    if area < 10:
        cv2.drawContours(thresh, [c], -1, (0,0,0), -1)

# Bitwise mask with input image
result = cv2.bitwise_and(image, image, mask=thresh)
result[thresh==0] = (255,255,255)

cv2.imshow('result', result)
cv2.imwrite('result.png', result)
cv2.waitKey()

Method #2: Horizontal/vertical kernel with masking and bitwise-operations

enter image description here

import cv2

image = cv2.imread('1.png')

kernel_vertical = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
remove_vertical = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel_vertical)

horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
remove_horizontal = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)

remove_both = cv2.add(remove_vertical, remove_horizontal)
result = cv2.add(remove_both, image)

cv2.imshow('result', result)
cv2.waitKey()

Results using Pytesseract OCR

Worksheet
Car number - 1002110021
Data - Endter
Shell
Qilchange
Phone 982838238823
email xvz@xyz com
id name pod pos Technician desc code

name
112121223 Benz 10-10-19 10-10-20 Jhondoe nil

ve O \

12244334 Baleno 20-10-10 20-20-10 Machel nil \ \

It doesn't seem to capture the handwritten text, you may need to train your own classifier using machine/deep learning but it works fairly well for automated text. We use the --psm 6 configuration option to assume a uniform block of text. Look here for more configuration options

import cv2
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

image = cv2.imread('result.png')
data = pytesseract.image_to_string(image, lang='eng',config='--psm 6')
print(data)

Upvotes: 2

Related Questions