Reputation: 1312
I have a list of points that almost create a straight line (but they are not perfectly align on that line). I want to create a line that best describes those points.
For example, for points:
points = [(150, 250),(180, 220), (200, 195), (225, 180), (250, 150), (275, 115), (300, 100)]
I want to create line similar to this:
The problem is that sometimes there are points that are very far from that line (outliers). I want to ignore those outliers while creating the line:
How can I create this line?
P.S. this is the code for colab to generate the points:
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
img = np.zeros([400,500,3],dtype=np.uint8)
points = [(150, 250),(180, 225), (200, 200), (225, 100), (250, 150), (275, 115), (300, 100)]
#points = [(150, 250),(180, 220), (200, 195), (225, 180), (250, 150), (275, 115), (300, 100)]
for idx, p in enumerate(points):
img = cv2.circle(img, p, radius=0, color=(0, 0, 255), thickness=10)
text_x, text_y = p
p = round(text_x-20), round(text_y+5)
img = cv2.putText(img=img, text=str(idx), fontFace=cv2.FONT_HERSHEY_SCRIPT_COMPLEX, org=p, fontScale=0.5, color=(0,255,0))
image = cv2.line(img, points[0], points[-1], (255, 0, 255), 1)
cv2_imshow(img)
In my code, I generate the line between first and last element of the list of points, so of course if the last point is outlier, all the line is disrupted:
Upvotes: 1
Views: 450
Reputation: 1312
Thanks for @Christoph Rackwitz
's answer, I followed sklearn's doc for RANSAC, and created simple script to calculate the RANSAC
(of course that it's need to be polished):
import numpy as np
from matplotlib import pyplot as plt
from sklearn import linear_model, datasets
"""
Add points:
"""
points = [(150, 250),(175, 225), (200, 200), (225, 175), (250, 150), (275, 115), (300, 150)]
Y = []
X = []
for x,y in points:
Y.append(y)
X.append(x)
Y = np.array(Y)
X = np.array(X)
lr = linear_model.LinearRegression()
lr.fit(X.reshape(-1, 1), Y)
# Robustly fit linear model with RANSAC algorithm
ransac = linear_model.RANSACRegressor()
ransac.fit(X.reshape(-1, 1), Y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)
# Predict data of estimated models
line_X = np.arange(X.min(), X.max())[:, np.newaxis]
line_y = lr.predict(line_X)
line_y_ransac = ransac.predict(line_X)
# Compare estimated coefficients
print("Estimated coefficients (true, linear regression, RANSAC):")
print(coef, lr.coef_, ransac.estimator_.coef_)
lw = 2
plt.gca().invert_yaxis() # Mirror points
plt.scatter(
X[inlier_mask], Y[inlier_mask], color="yellowgreen", marker=".", label="Inliers"
)
plt.scatter(
X[outlier_mask], Y[outlier_mask], color="gold", marker=".", label="Outliers"
)
plt.plot(line_X, line_y, color="navy", linewidth=lw, label="Linear regressor")
plt.plot(
line_X,
line_y_ransac,
color="cornflowerblue",
linewidth=lw,
label="RANSAC regressor",
)
plt.legend(loc="lower right")
plt.xlabel("Input")
plt.ylabel("Response")
plt.show()
And I got the following image (which looks great):
Upvotes: 2