Reputation: 198
I have written a piece of code where the automated text along with the written text is present in the image. GCP vision API is not able to give me the correct response. Can any one suggest me any work around ?
This is my image
This is my code:
import os
import io
from google.cloud import vision
from google.cloud.vision import types
import pandas as pd
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'key.json'
client = vision.ImageAnnotatorClient()
IMAGE_FILE = 'datasetone.png'
FILE_PATH = os.path.join(IMAGE_FILE)
with io.open(FILE_PATH, 'rb') as image_file:
content = image_file.read()
image = vision.types.Image(content=content)
response = client.document_text_detection(image=image)
docText = response.full_text_annotation.text
print(docText)
pages = response.full_text_annotation.pages
for page in pages:
for block in page.blocks:
print('block confidence:', block.confidence)
for paragraph in block.paragraphs:
print('paragraph confidence:', paragraph.confidence)
for word in paragraph.words:
word_text = ''.join([symbol.text for symbol in word.symbols])
print('Word text: {0} (confidence: {1}'.format(
word_text, word.confidence))
for symbol in word.symbols:
print('\tSymbol: {0} (confidence: {1}'.format(
symbol.text, symbol.confidence))
The ouput is
Worksheet
Car number: - 1002110021
Data:- Endter
Shell
Qilchange
Phone: 982838238823
email:[email protected]
name
pod
pos
Technician
name
desc
code
112121223 Benz
10-10-19
10-10-20
Jhondoe
nil
12244334
Baleno
20-10-10
| 20-20-10
Machel
nil
D901.27
'RIOR
block confidence: 0.9800000190734863
paragraph confidence: 0.9800000190734863
Word text: Worksheet (confidence: 0.9900000095367432
Symbol: W (confidence: 0.9900000095367432
Symbol: o (confidence: 0.9900000095367432
Symbol: r (confidence: 1.0
Symbol: k (confidence: 0.9900000095367432
Symbol: s (confidence: 1.0
Symbol: h (confidence: 1.0
Symbol: e (confidence: 0.9900000095367432
Symbol: e (confidence: 1.0
Symbol: t (confidence: 1.0
Word text: Car (confidence: 0.9800000190734863
Symbol: C (confidence: 0.9800000190734863
Symbol: a (confidence: 0.9900000095367432
Symbol: r (confidence: 0.9900000095367432
Word text: number (confidence: 0.9800000190734863
Symbol: n (confidence: 0.9599999785423279
Symbol: u (confidence: 0.9800000190734863
Symbol: m (confidence: 0.9800000190734863
Symbol: b (confidence: 0.9800000190734863
Symbol: e (confidence: 1.0
Symbol: r (confidence: 1.0
Word text: : (confidence: 0.9900000095367432
Symbol: : (confidence: 0.9900000095367432
Word text: - (confidence: 0.949999988079071
Symbol: - (confidence: 0.949999988079071
Word text: 1002110021 (confidence: 0.9700000286102295
Symbol: 1 (confidence: 0.9599999785423279
Symbol: 0 (confidence: 0.9800000190734863
Symbol: 0 (confidence: 0.9800000190734863
Symbol: 2 (confidence: 0.9399999976158142
Symbol: 1 (confidence: 0.9599999785423279
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9599999785423279
Symbol: 0 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Word text: Data (confidence: 0.9900000095367432
Symbol: D (confidence: 0.9900000095367432
Symbol: a (confidence: 0.9900000095367432
Symbol: t (confidence: 0.9900000095367432
Symbol: a (confidence: 1.0
Word text: : (confidence: 0.9900000095367432
Symbol: : (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
Symbol: - (confidence: 0.9900000095367432
Word text: Endter (confidence: 0.9900000095367432
Symbol: E (confidence: 0.9900000095367432
Symbol: n (confidence: 1.0
Symbol: d (confidence: 0.9900000095367432
Symbol: t (confidence: 0.9900000095367432
Symbol: e (confidence: 1.0
Symbol: r (confidence: 1.0
block confidence: 0.9700000286102295
paragraph confidence: 0.9700000286102295
Word text: Shell (confidence: 0.9900000095367432
Symbol: S (confidence: 0.9900000095367432
Symbol: h (confidence: 0.9900000095367432
Symbol: e (confidence: 0.9900000095367432
Symbol: l (confidence: 1.0
Symbol: l (confidence: 1.0
Word text: Qilchange (confidence: 0.8999999761581421
Symbol: Q (confidence: 0.6100000143051147
Symbol: i (confidence: 0.9100000262260437
Symbol: l (confidence: 0.9100000262260437
Symbol: c (confidence: 0.8500000238418579
Symbol: h (confidence: 0.9200000166893005
Symbol: a (confidence: 0.9900000095367432
Symbol: n (confidence: 0.9900000095367432
Symbol: g (confidence: 1.0
Symbol: e (confidence: 1.0
Word text: Phone (confidence: 0.9900000095367432
Symbol: P (confidence: 0.9900000095367432
Symbol: h (confidence: 0.9900000095367432
Symbol: o (confidence: 1.0
Symbol: n (confidence: 1.0
Symbol: e (confidence: 1.0
Word text: : (confidence: 1.0
Symbol: : (confidence: 1.0
Word text: 982838238823 (confidence: 0.9800000190734863
Symbol: 9 (confidence: 0.9599999785423279
Symbol: 8 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 8 (confidence: 0.9900000095367432
Symbol: 3 (confidence: 0.9900000095367432
Symbol: 8 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 3 (confidence: 0.9900000095367432
Symbol: 8 (confidence: 0.9900000095367432
Symbol: 8 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 3 (confidence: 0.9900000095367432
Word text: email (confidence: 0.9900000095367432
Symbol: e (confidence: 0.9900000095367432
Symbol: m (confidence: 0.9900000095367432
Symbol: a (confidence: 0.9900000095367432
Symbol: i (confidence: 1.0
Symbol: l (confidence: 0.9900000095367432
Word text: : (confidence: 0.9900000095367432
Symbol: : (confidence: 0.9900000095367432
Word text: xyz (confidence: 0.9900000095367432
Symbol: x (confidence: 0.9900000095367432
Symbol: y (confidence: 0.9900000095367432
Symbol: z (confidence: 0.9900000095367432
Word text: @ (confidence: 0.9900000095367432
Symbol: @ (confidence: 0.9900000095367432
Word text: xyz (confidence: 0.9700000286102295
Symbol: x (confidence: 0.949999988079071
Symbol: y (confidence: 0.9900000095367432
Symbol: z (confidence: 0.9900000095367432
Word text: . (confidence: 0.9900000095367432
Symbol: . (confidence: 0.9900000095367432
Word text: com (confidence: 0.9900000095367432
Symbol: c (confidence: 0.9900000095367432
Symbol: o (confidence: 1.0
Symbol: m (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: name (confidence: 0.9900000095367432
Symbol: n (confidence: 0.9900000095367432
Symbol: a (confidence: 1.0
Symbol: m (confidence: 0.9900000095367432
Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: pod (confidence: 0.9900000095367432
Symbol: p (confidence: 0.9900000095367432
Symbol: o (confidence: 1.0
Symbol: d (confidence: 0.9900000095367432
block confidence: 0.5699999928474426
paragraph confidence: 0.5699999928474426
Word text: pos (confidence: 0.5699999928474426
Symbol: p (confidence: 0.20000000298023224
Symbol: o (confidence: 0.9900000095367432
Symbol: s (confidence: 0.5400000214576721
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: Technician (confidence: 0.9900000095367432
Symbol: T (confidence: 0.9900000095367432
Symbol: e (confidence: 1.0
Symbol: c (confidence: 0.9900000095367432
Symbol: h (confidence: 1.0
Symbol: n (confidence: 1.0
Symbol: i (confidence: 1.0
Symbol: c (confidence: 1.0
Symbol: i (confidence: 0.9900000095367432
Symbol: a (confidence: 1.0
Symbol: n (confidence: 1.0
Word text: name (confidence: 0.9900000095367432
Symbol: n (confidence: 0.9900000095367432
Symbol: a (confidence: 1.0
Symbol: m (confidence: 0.9900000095367432
Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: desc (confidence: 0.9900000095367432
Symbol: d (confidence: 0.9900000095367432
Symbol: e (confidence: 1.0
Symbol: s (confidence: 1.0
Symbol: c (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: code (confidence: 0.9900000095367432
Symbol: c (confidence: 0.9900000095367432
Symbol: o (confidence: 1.0
Symbol: d (confidence: 0.9900000095367432
Symbol: e (confidence: 1.0
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 112121223 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 1.0
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 1.0
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 3 (confidence: 0.9900000095367432
Word text: Benz (confidence: 0.9900000095367432
Symbol: B (confidence: 0.9900000095367432
Symbol: e (confidence: 0.9900000095367432
Symbol: n (confidence: 0.9900000095367432
Symbol: z (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 1.0
Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
Symbol: - (confidence: 0.9900000095367432
Word text: 19 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 9 (confidence: 0.9900000095367432
block confidence: 0.9800000190734863
paragraph confidence: 0.9800000190734863
Word text: 10 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
Symbol: - (confidence: 0.9900000095367432
Word text: 20 (confidence: 0.9800000190734863
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9800000190734863
block confidence: 0.9599999785423279
paragraph confidence: 0.9599999785423279
Word text: Jhondoe (confidence: 0.9599999785423279
Symbol: J (confidence: 0.8100000023841858
Symbol: h (confidence: 0.9900000095367432
Symbol: o (confidence: 1.0
Symbol: n (confidence: 0.9900000095367432
Symbol: d (confidence: 0.9900000095367432
Symbol: o (confidence: 0.9900000095367432
Symbol: e (confidence: 0.9800000190734863
block confidence: 0.9700000286102295
paragraph confidence: 0.9700000286102295
Word text: nil (confidence: 0.9700000286102295
Symbol: n (confidence: 0.9900000095367432
Symbol: i (confidence: 0.9900000095367432
Symbol: l (confidence: 0.9399999976158142
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 12244334 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 1.0
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 4 (confidence: 0.9900000095367432
Symbol: 4 (confidence: 0.9900000095367432
Symbol: 3 (confidence: 0.9900000095367432
Symbol: 3 (confidence: 0.9900000095367432
Symbol: 4 (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: Baleno (confidence: 0.9900000095367432
Symbol: B (confidence: 0.9900000095367432
Symbol: a (confidence: 1.0
Symbol: l (confidence: 0.9900000095367432
Symbol: e (confidence: 0.9900000095367432
Symbol: n (confidence: 0.9900000095367432
Symbol: o (confidence: 0.9900000095367432
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: 20 (confidence: 0.9900000095367432
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9900000095367432
Word text: - (confidence: 0.9900000095367432
Symbol: - (confidence: 0.9900000095367432
Word text: 10 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9900000095367432
block confidence: 0.8899999856948853
paragraph confidence: 0.8899999856948853
Word text: | (confidence: 0.4300000071525574
Symbol: | (confidence: 0.4300000071525574
Word text: 20 (confidence: 0.8399999737739563
Symbol: 2 (confidence: 0.699999988079071
Symbol: 0 (confidence: 0.9800000190734863
Word text: - (confidence: 0.949999988079071
Symbol: - (confidence: 0.949999988079071
Word text: 20 (confidence: 0.9800000190734863
Symbol: 2 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 0.9800000190734863
Word text: - (confidence: 1.0
Symbol: - (confidence: 1.0
Word text: 10 (confidence: 0.9900000095367432
Symbol: 1 (confidence: 0.9900000095367432
Symbol: 0 (confidence: 1.0
block confidence: 0.9300000071525574
paragraph confidence: 0.9300000071525574
Word text: Machel (confidence: 0.9300000071525574
Symbol: M (confidence: 0.9200000166893005
Symbol: a (confidence: 0.949999988079071
Symbol: c (confidence: 0.9200000166893005
Symbol: h (confidence: 0.9599999785423279
Symbol: e (confidence: 0.949999988079071
Symbol: l (confidence: 0.9200000166893005
block confidence: 0.9900000095367432
paragraph confidence: 0.9900000095367432
Word text: nil (confidence: 0.9900000095367432
Symbol: n (confidence: 0.9900000095367432
Symbol: i (confidence: 1.0
Symbol: l (confidence: 0.9900000095367432
block confidence: 0.7599999904632568
paragraph confidence: 0.7599999904632568
Word text: D901 (confidence: 0.7200000286102295
Symbol: D (confidence: 0.6499999761581421
Symbol: 9 (confidence: 0.8700000047683716
Symbol: 0 (confidence: 0.49000000953674316
Symbol: 1 (confidence: 0.8999999761581421
Word text: . (confidence: 0.6399999856948853
Symbol: . (confidence: 0.6399999856948853
Word text: 27 (confidence: 0.8600000143051147
Symbol: 2 (confidence: 0.9399999976158142
Symbol: 7 (confidence: 0.7799999713897705
Word text: ' (confidence: 0.9300000071525574
Symbol: ' (confidence: 0.9300000071525574
Word text: RIOR (confidence: 0.7400000095367432
Symbol: R (confidence: 0.9599999785423279
Symbol: I (confidence: 0.7799999713897705
Symbol: O (confidence: 0.8399999737739563
Symbol: R (confidence: 0.4000000059604645
The last column in the dataset where the code is present is handwritten so can anyone help me how to identify the text when handwritten and automated text is present?
Also is there any way to find out and print the data as per each row in the opencv if not google cloud vision?
Upvotes: 4
Views: 2801
Reputation: 46600
You can try preprocessing the image by removing the horizontal and vertical lines which may result in better output
Method #1: Horizontal/vertical kernel and "filling" in contours to remove lines
import cv2
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(thresh, [c], -1, (0,0,0), 1)
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,30))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=1)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(thresh, [c], -1, (0,0,0), 1)
# Remove small noise
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 10:
cv2.drawContours(thresh, [c], -1, (0,0,0), -1)
# Bitwise mask with input image
result = cv2.bitwise_and(image, image, mask=thresh)
result[thresh==0] = (255,255,255)
cv2.imshow('result', result)
cv2.imwrite('result.png', result)
cv2.waitKey()
Method #2: Horizontal/vertical kernel with masking and bitwise-operations
import cv2
image = cv2.imread('1.png')
kernel_vertical = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
remove_vertical = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel_vertical)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
remove_horizontal = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)
remove_both = cv2.add(remove_vertical, remove_horizontal)
result = cv2.add(remove_both, image)
cv2.imshow('result', result)
cv2.waitKey()
Results using Pytesseract OCR
Worksheet
Car number - 1002110021
Data - Endter
Shell
Qilchange
Phone 982838238823
email xvz@xyz com
id name pod pos Technician desc code
name
112121223 Benz 10-10-19 10-10-20 Jhondoe nil
ve O \
12244334 Baleno 20-10-10 20-20-10 Machel nil \ \
It doesn't seem to capture the handwritten text, you may need to train your own classifier using machine/deep learning but it works fairly well for automated text. We use the --psm 6
configuration option to assume a uniform block of text. Look here for more configuration options
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('result.png')
data = pytesseract.image_to_string(image, lang='eng',config='--psm 6')
print(data)
Upvotes: 2