Remove unwanted lines and smooth CAPTCHA text

Question

I trying to get text Image from captcha image using opencv to build my dataset for training.My extracted captcha is not so smooth and optimized.Can someone help me to optimize and suggest the best method to remove the unwanted noise,and get the smooth captcha text. Right now,i am first adding some padding, thresholding it and find the contours.
Issues: 1.adding padding using border_replicate may add some noise
2.thresholding is not smooth and perfect
Original Image :
Threshold Image:
Contour Image:
desired Image/Result:
final separated numbers required:

code:

import os
import os.path
import cv2
import glob
import imutils
import matplotlib.pyplot as plt

CAPTCHA_IMAGE_FOLDER = "generated_captcha_images"
OUTPUT_FOLDER = "extracted_letter_images"


# Get a list of all the captcha images we need to process
captcha_image_files = glob.glob(os.path.join(CAPTCHA_IMAGE_FOLDER, "*"))
counts = {}

# loop over the image paths
for (i, captcha_image_file) in enumerate(captcha_image_files):
    print("[INFO] processing image {}/{}".format(i + 1, len(captcha_image_files)))

    # Since the filename contains the captcha text (i.e. "2A2X.png" has the text "2A2X"),
    # grab the base filename as the text
    filename = os.path.basename(captcha_image_file)
    captcha_correct_text = os.path.splitext(filename)[0]

    # Load the image and convert it to grayscale
    image = cv2.imread(captcha_image_file)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Add some extra padding around the image
    gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)
    # threshold the image (convert it to pure black and white)

    thresh = cv2.threshold(
        gray, 36, 255, cv2.THRESH_BINARY_INV)[1]

    # find the contours (continuous blobs of pixels) the image
    contours = cv2.findContours(
        thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Hack for compatibility with different OpenCV versions
    contours = contours[1] if imutils.is_cv3() else contours[0]

TLousky · Accepted Answer

You might be looking for a skeleton rather than contours here. Check out this Skeletonize function in Scikit Image:

from skimage.morphology import skeletonize
skeleton = skeletonize(thresh.astype(np.float32) / 255)

f, axs = plt.subplots(2,1,figsize=(10,6))
for ax, img, t in zip(axs, [thresh, skeleton], ['Thresholded Image', 'SKImage Skeleton']):
    ax.imshow( img, 'gray' )
    ax.set_title(t)
    ax.axis('off')

plt.show()

Remove unwanted lines and smooth CAPTCHA text

Answers (1)

Related Questions