Scikit Learn Perceptron always returns same threshold for 1D vector and converges

Question

I am trying to run a simulation where the true population is 2 classes of normal distribution, both mean 0, standard deviation 4000. I am attempting to use a perceptron to determine the relation of sample size to degree of overfitting. However, the perceptron always converges after 6 iterations with a threshold of 0 despite a sample size of 10 per each class which you can clearly see should not have a threshold of 0. Why is the threshold always 0? Also, is there a better way to output the threshold than my code below? I'm using perceptron because I want the simplest possible classifier - is there a simpler, easier classifier to use? Note, Logistic Regression does seem to have thresholds other than 0 when used in exactly the same way.

import numpy as np
mu, sigma = 0, 4000 # mean and standard deviation
pop_size=int(1e4)
p1 = (np.random.normal(mu, sigma, pop_size)) #1 million, pinky
p2 = (np.random.normal(mu, sigma, pop_size))

#take 10 samples of each group and plot on the same plot
def sample_pop(n):
    s1 = np.random.choice(p1, size=n, replace=False)
    s2 = np.random.choice(p2, size=n, replace=False)
    plt.subplot(211)
    count, bins, ignored = plt.hist(p1, 50, density=False, color='green', range=[-15000, 15000], histtype='bar', ec='black')
    plt.ylabel("n with Rebel Alliance")
    ymax=plt.gca().get_ylim()
    plt.plot(s1,[ymax]*n,'o',color='green')
    plt.subplot(212)
    count, bins, ignored = plt.hist(p2, 50, density=False, color = "red", range=[-15000, 15000], histtype='bar', ec='black')
    plt.xlabel("Midi Clorian Rate (The Force)")
    plt.ylabel("n with Dark Side")
    ymax=plt.gca().get_ylim()[1]
    plt.plot(s2,[ymax]*n,'x',color='red')
    plt.show()
    return s1,s2
n=10
s1,s2=sample_pop(n)

from sklearn.linear_model import Perceptron
clf = Perceptron()
s_all=np.hstack((s1,s2)).reshape(-1, 1)
y=np.hstack( ( [0]*len(s1), [1]*len(s2) ) )
clf.fit(s_all, y)

def plot1D(X, y, model,show=True):

    # adapted from https://github.com/tirthajyoti/Machine-Learning-with #Python/blob/master/Utilities/ML-Python-utils.py
    
    # Step size of the mesh. Decrease to increase the quality of the VQ.
    h = 0.2    # point in the mesh [x_min, m_max]x[y_min, y_max].    

    # Plot the decision boundary. For that, we will assign a color to each
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = -.1,.1# X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1))
    # Predictions to obtain the classification results
    #Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
    Z = model.predict(np.arange(x_min, x_max, 0.1).reshape(-1, 1))
    dZ=np.diff(Z)
    print(Z[np.where(abs(dZ)>0)[0]]) #this is the threshold
    

    # Plotting
    if show:
        plt.figure(figsize=(6,6))
    plt.contourf(xx, yy, np.vstack((Z,Z)), alpha=0.4)
    plt.scatter(X[:, 0], np.array( [-.05]*len(X) ), c=y, alpha=0.8, edgecolor="k")
    plt.ylim(-.1,0)
    plt.gca().get_yaxis().set_ticks([]) #set_visible(False)
    plt.xlabel('Midichlorian Rate (The Force)')
    if show:
        plt.show()
        
plot1D(s_all,y,clf)

from sklearn.metrics import accuracy_score
acc=accuracy_score(y, clf.predict(s_all))
acc
clf.n_iter_

PS - adding this image to answer Chris' comment below:

Scikit Learn Perceptron always returns same threshold for 1D vector and converges <10 iter

Answers (1)

Related Questions

Scikit Learn Perceptron always returns same threshold for 1D vector and converges &lt;10 iter

Answers (1)

Related Questions

Scikit Learn Perceptron always returns same threshold for 1D vector and converges <10 iter