Reputation: 11
What I'm trying to do is to make a screenshot of a number with pyautogui and tranform the number to a string with pytesseract. The code: import pyautogui import time import PIL from PIL import Image import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C://Program Files (x86)//Tesseract-OCR//tesseract'
# Create image
time.sleep(5)
image = pyautogui.screenshot('projects/output.png', region=(1608, 314, 57, 41))
# Resize image
basewidth = 2000
img = Image.open('projects/output.png')
wpercent = (basewidth/float(img.size[0]))
hsize = int((float(img.size[1])*float(wpercent)))
img = img.resize((basewidth,hsize), PIL.Image.ANTIALIAS)
img.save('projects/output.png')
col = Image.open('projects/output.png')
gray = col.convert('L')
bw = gray.point(lambda x: 0 if x<128 else 255, '1')
bw.save('projects/output.png')
# Image to string
screen = Image.open('projects/output.png')
print(pytesseract.image_to_string(screen, config='tessedit_char_whitelist=0123456789'))
Now it seems that pytesseract doesn't accept the screenshot pyautogui creates. The code runs fine without problems but prints an empty string. If I create an image in paint however, and save it as 'output.png' to the correct folder exactly like the screenshot otherwise made, it does work.
Image output after resize and adjustments
Anyone has an idea where I'm missing something?
Upvotes: 1
Views: 3940
Reputation: 6065
Convert it to a numpy array, pytesseract accepts those.
import numpy as np
import pyautogui
img = np.array(pyautogui.screenshot())
print(pytesseract.image_to_string(img, config='tessedit_char_whitelist=0123456789'))
Alternatively I would recommend 'mss' for screenshots as they are much faster.
import mss
with mss.mss() as sct:
img = np.array(sct.grab(sct.monitors[1]))
print(pytesseract.image_to_string(img, config='tessedit_char_whitelist=0123456789'))
Upvotes: 2
Reputation: 116
Modify the path and try the following:
import numpy as np
from numpy import *
from PIL import Image
from PIL import *
import pytesseract
import cv2
src_path = "C:\\Users\\USERNAME\\Documents\\OCR\\"
def get_region(box):
#Grabs the region of the box coordinates
im = ImageGrab.grab(box)
#Change size of image to 200% of the original size
a, b, c, d = box
doubleX = (c - a) * 2
doubleY = (d - b) * 2
im.resize((doubleX, doubleY)).save(os.getcwd() + "\\test.png", 'PNG')
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
return result
def main():
#Grab the region of the screenshot (box area)
region = (1354,630,1433,648)
get_region(region)
#Output results
print ("OCR Output: ")
print (get_string(src_path + "test.png"))
Upvotes: 2