Reputation: 765
Here, is the example of 2D scatter data
but when I draw the eigenvectors,
the plot is compressed into 1D image
.
I'm trying to do PCA on a temperature and strain data.
Why I have a 1D vector of the combination of the scattering and plotting the eigen vector ?
def process_data_PCA(temperature, strain):
print("process data")
T1 = temperature['T1'].tolist()
T2 = temperature['T2'].tolist()
T3 = temperature['T3'].tolist()
T4 = temperature['T4'].tolist()
T5 = temperature['T5'].tolist()
T6 = temperature['T6'].tolist()
T7 = temperature['T7'].tolist()
T8 = temperature['T8'].tolist()
T9 = temperature['T9'].tolist()
T10 = temperature['T10'].tolist()
W_A1 = strain[0]
N = len(T1)
xData = T1
yData = W_A1
xData = np.reshape(xData, (N, 1))
yData = np.reshape(yData, (N, 1))
data = np.hstack((xData, yData))
print(data)
mu = data.mean(axis=0)
data = data - mu
# data = (data - mu)/data.std(axis=0) # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
sigma = projected_data.std(axis=0).mean()
print(eigenvectors)
fig, ax = plt.subplots()
ax.scatter(xData, yData, s= 0.1)
for axis in eigenvectors:
start, end = mu, mu + sigma * axis
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
ax.set_aspect('equal')
plt.show()
print(data)
[[14.25 0. ]
[14.25 0. ]
[14.26 0. ]
...
[12.51 -0.02470534]
[12.51 -0.02540376]
[12.52 -0.02542746]]
[[-0.99999927 -0.00120856]
[-0.00120856 0.99999927]]
eigenvectors [-0.99999927 -0.00120856] [-0.00120856 0.99999927]
start, end
1.95096698e+01 -5.70968943e-03] [ 1.7057429e+01 -8.6733643e-03]
[ 1.95096698e+01 -5.70968943e-03] [19.50670611 2.44653112]
Upvotes: 0
Views: 94
Reputation: 2543
Here I tested you code and I think a main problem for you is the line sigma = projected_data.std(axis=0).mean()
, to look at the scatter in the respective eigendirection, you do not need the mean()
but you need both values of std
in both eigendirection. So just remove the mean to sigma = projected_data.std(axis=0)
and you get good pca plot. I tested it below with some pseudorandom number.
#data = np.hstack((xData, yData))
N = 8000
data = np.random.random((N,2))
########################################################################
# Random number in Ellipse
########################################################################
a = 0.5
b = 0.15
a2 = a**2
b2 = b**2
cx = 0.5
cy = 0.5
xData = []
yData = []
for i in range(N):
if ((data[i,0]-cx)**2/a2+(data[i,1]-cy)**2/b2 -1.)<0:
xData.append(data[i,0])
yData.append(data[i,1])
##################################################
xData = np.array(xData)
yData = np.array(yData)
data = np.vstack((xData, yData)).T
mu = data.mean(axis=0)
data = data - mu
# data = (data - mu)/data.std(axis=0) # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
print np.shape(projected_data)
############################################################
#sigma = projected_data.std(axis=0).mean()
# In this line, mean is removed
sigma = projected_data.std(axis=0)
############################################################
fig, ax = plt.subplots(figsize=(7,7))
ax.scatter(xData, yData, s= 0.1)
ax.scatter(mu[0],mu[1],s = 50,marker='*', c = 'r')
for axis,s in zip(eigenvectors,sigma):
#start, end = mu, mu + sigma * axis
start, end = mu, mu + s * axis
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
ax.set_aspect('equal')
plt.savefig("pcs.png")
plt.show()
#print eigenvalues
How it looks like now.
Upvotes: 1
Reputation: 2078
It seems like the problem in your case is the limit of the axes. The annotation does not update the information of axes limits and the plot uses only the limits of the data (scatter plot on the image).
I was able to produce manually the plot you wanted with the code (function plot_andre).
#!/usr/bin/env ipython
import numpy as np
from pylab import plt
# ---------------------------------------------
np.random.seed(0);
# ---------------------------------------------
def process_data_PCA():
print("process data")
T1=np.random.random((60000,1));
W_A1=np.random.random((60000,1));
N = len(T1)
xData = T1
yData = W_A1
xData = np.reshape(xData, (N, 1))
yData = np.reshape(yData, (N, 1))
data = np.hstack((xData, yData))
print(data)
mu = data.mean(axis=0)
data = data - mu
# data = (data - mu)/data.std(axis=0) # Uncommenting this reproduces mlab.PCA results
eigenvectors, eigenvalues, V = np.linalg.svd(data.T, full_matrices=False)
projected_data = np.dot(data, eigenvectors)
sigma = projected_data.std(axis=0).mean()
print(eigenvectors)
# ----------------------------------------
fig, ax = plt.subplots()
ax.scatter(xData, yData, s= 0.1)
for axis in eigenvectors:
start, end = mu, mu + sigma * axis
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
# ------------------------------------
print start,end
# ----------------------------------------
ax.set_aspect('equal');#plt.axis('tight');
plt.savefig('test_01.png',bbox_inches='tight');
plt.show()
# -----------------------------------
def plot_andre():
# ----------------------------------------
vectors=[[[1.95096698e+01,-5.70968943e-03],[ 1.7057429e+01,-8.6733643e-03]],[[ 1.95096698e+01,-5.70968943e-03],[19.50670611,2.44653112]]];
# ----------------------------------------
fig, ax = plt.subplots()
for iax in range(len(vectors)):
start,end=vectors[iax];
ax.annotate(
'', xy=end, xycoords='data',
xytext=start, textcoords='data',
arrowprops=dict(facecolor='red', width=2.0))
# ----------------------------------------
vectors=np.array(vectors);
ax.set_xlim(np.min(vectors[:,0]),np.max(vectors[:,0]));ax.set_ylim(np.min(vectors[:,1]),np.max(vectors[:,1]));
ax.set_aspect('equal');#plt.axis('tight');
plt.savefig('test_02.png',bbox_inches='tight');
plt.show()
# -----------------------------------
process_data_PCA();
plot_andre();
Just set the axes limits to some suitable values like 0-20 and 0-20.
Upvotes: 1