Reputation: 550
This is my first openCV program, so be forgiving if I seem ignorant to some basic computer vision concepts.
UPDATE: See new code/new problem at bottom thanks to the answer by sturkmen
I am working on "digitizing" a large set of images, like the ones attached, as a project. All images come from the same source. The end goal is to pass extracted chunks of text to tesseract, the OCR library.
(Source code at bottom) I am going to explain my current approach, and then state my questions.
My current approach is as follows:
Apply inverse binary threshold
Dilate image and find contours
Create a boundingRect
from each contour, then filter for minimum and maximum dimensions
This has worked ok
My desired end result is to have one boundingRect
around each column. So for the provided pictures that would be seven of them.
So, the problem is that the tabulated "mini sections" in the image are not reliably picked up (best example would be the one in the far right column that does not have a boundingRect
around it).
I can think of two possible solutions (so as to not be an open-ended / opinion type question) but if you know of a better solution do share it!
1) combine boundingRect
s that are vertical neighbors to capture the columns. Contains possible edge-case failures.
2) Find a different way to manipulate the image before finding the contours. From my research, the run length smoothing algorithm looks promising?
So my question is, which approach is best? Have I overlooked a better solution? I am inexperienced in this department, so no suggestion is too small.
Thanks for reading!
#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
using namespace cv;
using namespace std;
int main(int argc, char* argv[])
Mat image = imread(path_to_file);
Mat gray;
cvtColor(image, gray, COLOR_BGR2GRAY);
Mat fin;
double thresh = threshold(gray, fin, 160, 255, THRESH_BINARY_INV);
//size impacts dilation
Mat kernel = getStructuringElement(MORPH_CROSS, Size(2, 4));
Mat dilated;
dilate(fin, dilated, kernel, Point(-1,-1), 6);
Mat hierarchy;
vector<vector<Point> >contours;
findContours(dilated, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE);
//potentially sort by x
for (const auto& c : contours)
// x y
//columns 850 x 5400
Rect r = boundingRect(c);
if (r.height > 3000 || r.width > 875)
if (r.height < 100 || r.width < 500)
rectangle(image, r, Scalar(255, 0, 255), 2); //made thicker
imwrite("test.png", image);
return 0;
Original Image:
Updated code
int main(int argc, char* argv[])
Mat image = imread(path_to_file);
Mat gray;
cvtColor(image, gray, COLOR_BGR2GRAY);
Mat fin;
double thresh = threshold(gray, fin, 160, 255, THRESH_BINARY_INV);
Mat kernel = getStructuringElement(MORPH_CROSS, Size(2, 4));
Mat dilated;
dilate(fin, dilated, kernel, Point(-1,-1), 6);
vector<Vec4i> hierarchy;
vector<vector<Point> >contours;
findContours(dilated, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE);
vector<Rect> rects;
Rect big_rect = Rect(image.cols/2,image.rows/2,1,1);
for (const auto& c : contours)
// x y
//columns 850 x 5400
Rect r = boundingRect(c);
if (r.height > 5500 || r.width > 875)
if (r.height < 300 || r.width < 500)
big_rect = big_rect | r; // here we will find bounding box of all Rects
rects.push_back( r ); // stores rects
for ( size_t i = 0; i < rects.size(); i++ )
// sets y and height of all rects
//cout << rects[i].x << endl;
rects[i].y = big_rect.y;
rects[i].height = big_rect.height;
//groupRectangles(rects, 1); DIDN'T WORK
for ( size_t i = 0; i < rects.size(); i++ )
rectangle(image, rects[i], Scalar(255, 0, 255), 2);
imshow("test", image);
New Result:
New Problem: There are many boundingRect
s around each column (you probably can't tell by looking at the picture). This is a problem, because I want to make a sub-image of each column e.g. Mat ROI = image(rects[i])
which would render much more than the desired 7 images.
New Question: How can I combine the multitude of rectangles per column, into one? I have seen openCV's groupRectangles
, but it failed to work.
Upvotes: 7
Views: 420
Reputation: 3550
just to show a method i tried to change your code as below.
#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
using namespace cv;
using namespace std;
int main(int argc, char* argv[])
Mat image = imread(argv[1]);
Mat gray;
cvtColor(image, gray, COLOR_BGR2GRAY);
Mat fin;
double thresh = threshold(gray, fin, 160, 255, THRESH_BINARY_INV);
//size impacts dilation
Mat kernel = getStructuringElement(MORPH_CROSS, Size(2, 4));
Mat dilated;
dilate(fin, dilated, kernel, Point(-1,-1), 1);
Mat hierarchy;
vector<vector<Point> >contours;
findContours(dilated, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE);
vector<Rect> rects;
Rect big_rect = Rect(image.cols/2,image.rows/2,1,1);
//potentially sort by x
for (const auto& c : contours)
// x y
//columns 850 x 5400
Rect r = boundingRect(c);
if (r.height > 3000 || r.width > 875)
if (r.height < 10 || r.width < 10) // changed for test small image
big_rect = big_rect | r; // here we will find bounding box of all Rects
rects.push_back( r ); // stores rects
for ( size_t i = 0; i < rects.size(); i++ )
// sets y and height of all rects
rects[i].y = big_rect.y;
rects[i].height = big_rect.height;
for ( size_t i = 0; i < rects.size(); i++ )
rectangle(image, rects[i], Scalar(255, 0, 255), 2);
imshow("result", image);
return 0;
i know it is incomplete but i hope you will understand the way and complete it by filtering the rects to find desired seven rect or i will complete the code soon.
EDIT: the code maybe a bit dirty but vector<Rect> final_rects
contains only the rects you need.
#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
using namespace cv;
using namespace std;
struct sorter_func
bool operator ()( Rect a, Rect b )
return a.x < b.x;
int main(int argc, char* argv[])
Mat image = imread(argv[1]);
Mat gray;
cvtColor(image, gray, COLOR_BGR2GRAY);
Mat fin;
double thresh = threshold(gray, fin, 160, 255, THRESH_BINARY_INV);
//size impacts dilation
Mat kernel = getStructuringElement(MORPH_CROSS, Size(2, 4));
Mat dilated;
dilate(fin, dilated, kernel, Point(-1,-1), 1);
Mat hierarchy;
vector<vector<Point> >contours;
findContours(dilated, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_NONE);
vector<Rect> rects;
Rect big_rect = Rect(image.cols/2,image.rows/2,1,1);
//potentially sort by x
for (const auto& c : contours)
// x y
//columns 850 x 5400
Rect r = boundingRect(c);
if (r.height > 3000 || r.width > 875)
if (r.height < 10 || r.width < 10) // changed for test small image
big_rect = big_rect | r; // here we will find bounding box of all Rects
rects.push_back( r ); // stores rects
for ( size_t i = 0; i < rects.size(); i++ )
// sets y and height of all rects
rects[i].y = big_rect.y;
rects[i].height = big_rect.height;
std::sort(rects.begin(), rects.end(), sorter_func());
for ( size_t i = 1; i < rects.size(); i++ )
Rect big_rect = rects[i-1] | rects[i];
if( big_rect.width < rects[i-1].width + rects[i].width )
rects[i-1] = Rect();
rects[i] = big_rect;
vector<Rect> final_rects;
for ( size_t i = 1; i < rects.size(); i++ )
if( rects[i].width > 0 )
rectangle(image, rects[i], Scalar(rand()&255,rand()&255,rand()&255), 2);
final_rects.push_back( rects[i] );
cerr << final_rects.size() << endl;
imshow("result", image);
return 0;
Upvotes: 1