Reputation: 443
My code
for index, img in enumerate(data): # data is list of base64 decoded strings
b64 = base64.b64decode(bytes(img[22:], encoding='utf-8'))
raw = BytesIO(b64)
im = Image.open(raw).convert('LA')
pixels = im.load()
width, height = im.size
for x in range(width):
for y in range(height):
if pixels[x, y][0] > 100: pixels[x, y] = (255, 255)
else: pixels[x, y] = (0, 255)
print(pytesseract.image_to_string(im, config='tessedit_char_whitelist=1234567890plus?'))
Output:
Te Ys
What I can do to make this better, I tried to use every psm from 0 to 13 and -c flag in config
Upvotes: 0
Views: 331
Reputation: 416
This code worked fine for me but spaces were not detected.
img = ~cv2.imread("18.png",0)
rows,cols = img.shape[:2]
# M = np.float32([[1,0,25],[0,1,15]])
# img = cv2.warpAffine(img,M,(cols*2,rows*2),borderValue=(255,255,255))
custom_oem_psm_config = r'--oem 3 --psm 3 -c tessedit_char_whitelist="1234567890plus?"'# -c preserve_interword_spaces=1'
print(pytesseract.image_to_string(img,config=custom_oem_psm_config))
Output:
18plus16?
Upvotes: 1