Converting graphs from a scanned document into data

Question

I'm currently trying to write something that can extract data from some uncommon graphs in a book. I scanned the pages of the book, and by using opencv I would like to detect some features from the graphs in order to convert it into useable data. In the left graph I'm looking for the height of the "triangles" and in the right graph the distance from the center to the points where the dotted lines intersect with the gray area. In both cases I would like to convert these values into numeric data for further usage.

The first thing I thought of was detecting the lines of the charts, in the hopes I could somehow measure their length or position. For this I'm using the Hough Line Transform. The following snippet of code shows how far I've gotten already.

import numpy as np
import cv2

# Reading the image
img = cv2.imread('test2.jpg')
# Convert the image to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Apply edge detection
edges = cv2.Canny(gray,50,150,apertureSize = 3)

# Line detection
lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength=50,maxLineGap=20)

for line in lines:
    x1,y1,x2,y2 = line[0]
    cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)

cv2.imwrite('linesDetected.jpg',img)

The only problem is that this detection algorithm is not accurate at all. At least not for me. And in order to extract some data from the charts, the detection of the lines should be somewhat accurate. Is their any way I could do this? Or is my strategy to detect lines just wrong in the first place? Should I maybe start with detecting something else, like circles,object sizes, contours or colors?

Stephen Meschke · Accepted Answer

Using color segmentation is an easy way to convert this graph to data. This method does require some manual annotation. After the graph is segmented, count the pixels for each color. Check out the 'watershed' demo in the demo files that are included in the OpenCV library:

import numpy as np
import cv2 as cv
from common import Sketcher

class App:
    def __init__(self, fn):
        self.img = cv.imread(fn)
        self.img = cv.resize(self.img, (654,654))
        h, w = self.img.shape[:2]
        self.markers = np.zeros((h, w), np.int32)
        self.markers_vis = self.img.copy()
        self.cur_marker = 1
        self.colors = np.int32( list(np.ndindex(2, 2, 3)) ) * 123
        self.auto_update = True
        self.sketch = Sketcher('img', [self.markers_vis, self.markers], self.get_colors)

    def get_colors(self):
        return list(map(int, self.colors[self.cur_marker])), self.cur_marker

    def watershed(self):
        m = self.markers.copy()
        cv.watershed(self.img, m)
        cv.imshow('img', self.img)        
        overlay = self.colors[np.maximum(m, 0)]
        vis = cv.addWeighted(self.img, 0.5, overlay, 0.5, 0.0, dtype=cv.CV_8UC3)
        cv.imshow('overlay', np.array(overlay, np.uint8))
        cv.imwrite('/home/stephen/Desktop/overlay.png', np.array(overlay, np.uint8))
        cv.imshow('watershed', vis)

    def run(self):
        while cv.getWindowProperty('img', 0) != -1 or cv.getWindowProperty('watershed', 0) != -1:
            ch = cv.waitKey(50)
            if ch >= ord('1') and ch <= ord('9'):
                self.cur_marker = ch - ord('0')
                print('marker: ', self.cur_marker)
            if self.sketch.dirty and self.auto_update:
                self.watershed()
                self.sketch.dirty = False
            if ch == 27: break
        cv.destroyAllWindows()


fn = '/home/stephen/Desktop/test.png'
App(cv.samples.findFile(fn)).run()

The output will be an image like this:

You can count the pixels for each color using this code:

# Extract the values from the image
vals = []
img = cv.imread('/home/stephen/Desktop/overlay.png')
# Get the colors in the image
flat = img.reshape(-1, img.shape[-1])
colors = np.unique(flat, axis=0)
# Iterate through the colors (ignore the first and last colors)
for color in colors[1:-1]:
    a,b,c = color
    lower = a-1, b-1, c-1
    upper = a+1,b+1,c+1
    lower = np.array(lower)
    upper = np.array(upper)
    mask = cv.inRange(img, lower, upper)
    vals.append(sum(sum(mask)))
    cv.imshow('mask', mask)
    cv.waitKey(0)
cv.destroyAllWindows()

And print out the output data using this code:

names = ['alcohol', 'esters', 'biter', 'hoppy', 'acid', 'zoetheid', 'mout']
print(list(zip(names, vals)))

The output is:

[('alcohol', 22118), ('esters', 26000), ('biter', 16245), ('hoppy', 21170), ('acid', 19156), ('zoetheid', 11090), ('mout', 7167)]

Converting graphs from a scanned document into data

Answers (1)

Related Questions