Reputation: 11
I am trying to load all images in a folder and extract text from images. I keep getting error message for the second for loop. For example,
AttributeError: 'numpy.ndarray' object has no attribute 'read'
It seems I cannot access list Img. Any idea?
# import OpenCV, Numpy, Python image library, Tesseract OCR
import os
import cv2
import numpy
from PIL import Image
import pytesseract
import glob
#set tesseract path
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
#read all image with .jpg format in a specifying folder
img = []
for i in glob.glob("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\\Reports\\Image\\*.jpg"):
n= cv2.imread(i,0) #convert image to grayscale
print(i)
img.append(n)
for j in img:
im = Image.open(j)
text = pytesseract.image_to_string (j, lang='eng')
with open("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\Reports\\Image\\test.txt", "w") as f:
f.write(text.encode('utf8'))
Upvotes: 1
Views: 2952
Reputation: 37
I have Mac OSX but you can adjust this code to file Window's path directory.
import os
from os import path
from glob import glob
from pytesseract import image_to_string
from PIL import Image, ImageEnhance, ImageFilter
def enhance_img(filename):
# Enhance image and save as under new name
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('newfilename')
def convert_img(filename):
image = Image.open(filename)
# Convert image to text
file = open ('parsing.txt', 'a')
file.write(image_to_string(image))
file.close
def find_ext(dir, ext):
return glob(path.join(dir, "*.{}".format(ext)))
# use the following for change directory
# os.chdir(path)
filename = find_ext("","png")
for file in filename:
# convert image to text
convert_img(file)
If you want to enhance the image then include the following block and adjust the code above to loop through the new filenames.
def enhance_img(filename):
# Enhance image and save as under new name
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('newfilename')
For file in filename:
# to enhance image if needed
newfilename = filename[-3] + '_1.png'
enhance_img(file)
Upvotes: 1