Reputation: 31
I was trying to code up an EM algorithm in python for clustering images of different types. My understanding of the EM algorithm is as follows:
Accordingly, I coded the same in python. Here's my code:
import numpy as np
import sys
from scipy import stats
# μ: mean vector ∈ R^(self.m x self.n)
# Σ: covariance matrix ∈ R^(self.m x self.n x self.n)
# π: probabilities of each component
class gmm:
def __init__(self,n_components):
self.m = n_components
self.π = np.random.random((self.m,))
self.x = None
self.Σ = None
self.μ = None
self.r = None
self.n = None # length of data
@classmethod
def N(cls, x, μ, Σ):
#print(Σ)
#print('\n')
return stats.multivariate_normal(μ, Σ).pdf(x)
def E_step(self):
for i,x_i in enumerate(self.x):
den = 0
for c in range(self.m):
#print(self.Σ[c].shape)
#print(self.μ[c].shape)
#sys.exit()
den+= self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
#print(f'{den=}')
for c in range(self.m):
num = self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
self.r[i,c] = num/den
print(f'{self.r}\n')
def M_step(self):
m_c = np.sum(self.r, axis = 0)
self.π = m_c/self.m
for c in range(self.m):
s1 = 0
s2 = 0
for i in range(self.n):
s1+= self.r[i,c]*self.x[i]
s2+= self.r[i,c]*(self.x[i]-self.μ[c]).dot(self.x[i]-self.μ[c])
self.μ[c] = s1/m_c[c]
self.Σ[c] = s2/m_c[c]
def fit(self,x, iterations = 10):
self.x = x
self.n = x.shape[0]
self.r = np.empty((self.n, self.m))
self.μ = np.random.random((self.m, self.n))
Sigma = [np.random.random((self.n, self.n)) for i in range(self.m)]
Sigma = [0.5*(s + s.T)+5*np.eye(s.shape[0]) for s in Sigma] # A symmetric diagonally dominant matrix is PD
#print([np.linalg.det(s) for s in Sigma])
self.Σ = np.asarray(Sigma)
for i in range(iterations):
self.E_step()
self.M_step()
def params():
return self.π, self.μ, self.Σ
if __name__ == '__main__':
data_dim = 5 # No. of data dimensions
data = [[]]*6
data[0] = np.random.normal(0.5,2, size = (300,))
data[1] = np.random.normal(3,2, size = (300,))
data[2] = np.random.normal(-1, 0.1, size = (300,))
data[3] = np.random.normal(2,3.14159, size = (300,))
data[4] = np.random.normal(0,1, size = (300,))
data[5] = np.random.normal(3.5,5, size = (300,))
p = [0.1, 0.15, 0.22, 0.3, 0.2, 0.03]
vals = [0,1,2,3,4,5]
combined = []
for i in range(data_dim):
choice = np.random.choice(vals, p = p)
combined.append(np.random.choice(data[choice]))
combined = np.array(combined)
G = gmm(n_components = 6)
G.fit(combined)
pi, mu, sigma = G.params()
print(pi)
print(mu)
print(sigma)
Now comes the problem. While running the code, the covariance matrix Σ becomes singular after some iterations. Specifically, all the entries of Sigma become the same all of a sudden in a particular iteration.
I have tried adding some random noise to Σ while this happens, but this seems to only delay the inevitable. Any help/comments will be greatly appreciated. Thanks in Advance :)
Upvotes: 3
Views: 1302
Reputation: 1311
To prevent the covariance matrix from becoming singular, you could add an arbitrary value along the diagonal of the matrix, i.e.
val * np.identity(size)
as this ensures that the covariance matrix will remain positive definite, and have an inverse. For instance, sklearn uses the default value 1e-6
for their regularization.
Upvotes: 1