Reputation: 11
I am trying to find the bounding boxes (rectangles) of text in an image. So, I like to scan entire image at once and get the all textual area. Issue is few texts are ignored and few are goruped together. The grouping should be word wise. It should not be grouped together having more than one word. Each word should have its own rectangle. I am using code given below.
int COCR::method_gradient(int nonGradient, int showOutput)
{
Mat large = imread(INPUT_FILE);
Mat rgb;
rgb = large; //--> change
Mat small;
cvtColor(rgb, small, CV_BGR2GRAY);
// morphological gradient
Mat grad;
Mat morphKernel;
if (!nonGradient)
{
morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
}
else
grad = small;
// binarize
Mat bw;
threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
// connect horizontally oriented regions
Mat connected;
morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
// find contours
Mat mask = Mat::zeros(bw.size(), CV_8UC1);
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
if (nonGradient)
rgb = MatGradient;
// filter contours
for (int idx = 0; idx >= 0; idx = hierarchy[idx][0])
{
Rect rect = boundingRect(contours[idx]);
Mat maskROI(mask, rect);
maskROI = Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
// ratio of non-zero pixels in the filled region
double r = (double)countNonZero(maskROI) / (rect.width*rect.height);
if (r > .25 /* assume at least 25% of the area is filled if it contains text */
&&
(rect.height > 8 && rect.width > 8)
)
{
rectangle(rgb, rect, Scalar(0, 255, 0), 2);
}
}
if (!nonGradient)
MatGradient = rgb;
if (showOutput)
imwrite(OUTPUT_FOLDER_PATH, rgb);
return 0;
}
// calling
COCR obj_ocr;
obj_ocr.method_gradient(0, 0);
obj_ocr.method_gradient(1, 1);
source img Please notice the red part. output img
source_2 Please see red part. output_2
please advise how can I rectify the missing area show in Red eclipse.
Upvotes: 1
Views: 1198
Reputation: 118
Changing the closing from 9,1 to 5,1 fixes it for both images.
// connect horizontally oriented regions
cv::Mat connected;
morphKernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 1));
cv::morphologyEx(bw, connected, cv::MORPH_CLOSE, morphKernel);
Upvotes: 1