andre
andre

Reputation: 765

Principle Component Analysis using Python

Here, figure of my data is the example of 2D scatter data but when I draw the eigenvectors, the plot is compressed into 1D image like this.

I'm trying to do PCA on a temperature and strain data.

Why I have a 1D vector of the combination of the scattering and plotting the eigen vector ?

def process_data_PCA(temperature, strain):
    print("process data")
    T1 = temperature['T1'].tolist()
    T2 = temperature['T2'].tolist()
    T3 = temperature['T3'].tolist()
    T4 = temperature['T4'].tolist()
    T5 = temperature['T5'].tolist()
    T6 = temperature['T6'].tolist()
    T7 = temperature['T7'].tolist()
    T8 = temperature['T8'].tolist()
    T9 = temperature['T9'].tolist()
    T10 = temperature['T10'].tolist()

    W_A1 = strain[0]
    N = len(T1)
    xData =  T1
    yData =  W_A1
    xData = np.reshape(xData, (N, 1))
    yData = np.reshape(yData, (N, 1))

    data = np.hstack((xData, yData))
    print(data)
    mu = data.mean(axis=0)
    data = data - mu
    # data = (data - mu)/data.std(axis=0)  # Uncommenting this reproduces mlab.PCA results
    eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
    projected_data = np.dot(data, eigenvectors)
    sigma = projected_data.std(axis=0).mean()
    print(eigenvectors)

    fig, ax = plt.subplots()
    ax.scatter(xData, yData, s= 0.1)
    for axis in eigenvectors:
        start, end = mu, mu + sigma * axis
        ax.annotate(
            '', xy=end, xycoords='data',
            xytext=start, textcoords='data',
            arrowprops=dict(facecolor='red', width=2.0))
    ax.set_aspect('equal')
    plt.show()

print(data)

[[14.25        0.        ]
 [14.25        0.        ]
 [14.26        0.        ]
 ...
 [12.51       -0.02470534]
 [12.51       -0.02540376]
 [12.52       -0.02542746]]
[[-0.99999927 -0.00120856]
 [-0.00120856  0.99999927]]

eigenvectors [-0.99999927 -0.00120856] [-0.00120856 0.99999927]

start, end

 1.95096698e+01 -5.70968943e-03] [ 1.7057429e+01 -8.6733643e-03]
[ 1.95096698e+01 -5.70968943e-03] [19.50670611  2.44653112]

Upvotes: 0

Views: 94

Answers (2)

jkhadka
jkhadka

Reputation: 2543

Here I tested you code and I think a main problem for you is the line sigma = projected_data.std(axis=0).mean(), to look at the scatter in the respective eigendirection, you do not need the mean() but you need both values of std in both eigendirection. So just remove the mean to sigma = projected_data.std(axis=0) and you get good pca plot. I tested it below with some pseudorandom number.

#data = np.hstack((xData, yData))
N = 8000
data = np.random.random((N,2))
########################################################################
# Random number in Ellipse
########################################################################
a = 0.5
b = 0.15
a2 = a**2
b2 = b**2
cx = 0.5
cy = 0.5
xData = []
yData = []
for i in range(N):
    if ((data[i,0]-cx)**2/a2+(data[i,1]-cy)**2/b2 -1.)<0:
        xData.append(data[i,0])
        yData.append(data[i,1])
##################################################
xData = np.array(xData)
yData = np.array(yData)
data = np.vstack((xData, yData)).T

mu = data.mean(axis=0)

data = data - mu
# data = (data - mu)/data.std(axis=0)  # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
print np.shape(projected_data)
############################################################
#sigma = projected_data.std(axis=0).mean()
# In this line, mean is removed
sigma = projected_data.std(axis=0)
############################################################
fig, ax = plt.subplots(figsize=(7,7))
ax.scatter(xData, yData, s= 0.1)
ax.scatter(mu[0],mu[1],s = 50,marker='*', c = 'r')
for axis,s in zip(eigenvectors,sigma):
    #start, end = mu, mu + sigma * axis
    start, end = mu, mu + s * axis
    ax.annotate(
        '', xy=end, xycoords='data',
        xytext=start, textcoords='data',
        arrowprops=dict(facecolor='red', width=2.0))
ax.set_aspect('equal')
plt.savefig("pcs.png")
plt.show()

#print eigenvalues

How it looks like now.

enter image description here

Upvotes: 1

msi_gerva
msi_gerva

Reputation: 2078

It seems like the problem in your case is the limit of the axes. The annotation does not update the information of axes limits and the plot uses only the limits of the data (scatter plot on the image).

I was able to produce manually the plot you wanted with the code (function plot_andre).

#!/usr/bin/env ipython
import numpy as np
from pylab import plt
# ---------------------------------------------
np.random.seed(0);
# ---------------------------------------------
def process_data_PCA():
    print("process data")
    T1=np.random.random((60000,1));
    W_A1=np.random.random((60000,1));

    N = len(T1)
    xData =  T1
    yData =  W_A1
    xData = np.reshape(xData, (N, 1))
    yData = np.reshape(yData, (N, 1))

    data = np.hstack((xData, yData))
    print(data)
    mu = data.mean(axis=0)
    data = data - mu
    # data = (data - mu)/data.std(axis=0)  # Uncommenting this reproduces mlab.PCA results
    eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
    projected_data = np.dot(data, eigenvectors)
    sigma = projected_data.std(axis=0).mean()
    print(eigenvectors)
    # ----------------------------------------
    fig, ax = plt.subplots()
    ax.scatter(xData, yData, s= 0.1)
    for axis in eigenvectors:
        start, end = mu, mu + sigma * axis
        ax.annotate(
            '', xy=end, xycoords='data',
            xytext=start, textcoords='data',
            arrowprops=dict(facecolor='red', width=2.0))
        # ------------------------------------
        print start,end
    # ----------------------------------------
    ax.set_aspect('equal');#plt.axis('tight');
    plt.savefig('test_01.png',bbox_inches='tight');
    plt.show()
# -----------------------------------
def plot_andre():
    # ----------------------------------------
    vectors=[[[1.95096698e+01,-5.70968943e-03],[ 1.7057429e+01,-8.6733643e-03]],[[ 1.95096698e+01,-5.70968943e-03],[19.50670611,2.44653112]]];
    # ----------------------------------------
    fig, ax = plt.subplots()
    for iax in range(len(vectors)):
        start,end=vectors[iax];
        ax.annotate(
            '', xy=end, xycoords='data',
            xytext=start, textcoords='data',
            arrowprops=dict(facecolor='red', width=2.0))
    # ----------------------------------------
    vectors=np.array(vectors);
    ax.set_xlim(np.min(vectors[:,0]),np.max(vectors[:,0]));ax.set_ylim(np.min(vectors[:,1]),np.max(vectors[:,1]));
    ax.set_aspect('equal');#plt.axis('tight');
    plt.savefig('test_02.png',bbox_inches='tight');
    plt.show()

# -----------------------------------
process_data_PCA();
plot_andre();

Just set the axes limits to some suitable values like 0-20 and 0-20.

Upvotes: 1

Related Questions