Avisek Gupta
Indian Statistical Institute, Kolkata
import numpy as np
import matplotlib.pyplot as plt
data1 = np.random.normal(loc=[0,0], scale=1, size=(100,2))
data2 = np.random.normal(loc=[10,0], scale=1, size=(100,2))
data3 = np.random.normal(loc=[5,6], scale=1, size=(100,2))
data = np.vstack(( np.vstack((data1, data2)), data3))
plt.scatter(data[:,0], data[:,1], marker='x')
plt.show()
print(data.shape)
# k-Means algorithm
from scipy.spatial.distance import cdist
def compute_distance(data, v):
return np.sum((data - v)**2, axis=1)
def kmeans(data, k):
n = data.shape[0]
idxs = np.random.permutation(n)[0:k]
centers = data[idxs,:]
initial_centers = np.array(centers)
for num_iter in range(3):
#D = np.zeros((n, k))
#for i in range(k):
# D[:,i] = compute_distance(data, centers[i,:])
D = cdist(data, centers)
#U = np.zeros((n))
#for i in range(n):
# U[i] = np.argmin(D[i,:])
U = np.argmin(D, axis=1)
prev_centers = np.array(centers)
for i in range(k):
centers[i,:] = np.mean(data[U==i,:], axis=0)
if np.sum((centers - prev_centers)**2) < 1e-9:
break
return centers, U, initial_centers
k = 3
centers, U, initial_centers = kmeans(data, k=3)
print(centers)
for i in range(k):
plt.scatter(data[U==i,0], data[U==i,1], marker='x')
plt.scatter(centers[:,0], centers[:,1], marker='o', c='k')
plt.scatter(initial_centers[:,0], initial_centers[:,1], marker='o', c='r')
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_sample_images
dataset = load_sample_images()
img1 = dataset.images[1]
plt.figure(dpi=50)
plt.imshow(img1)
plt.xticks([],[])
plt.yticks([],[])
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_sample_images
dataset = load_sample_images()
img1 = dataset.images[1]
X = np.reshape(img1, (img1.shape[0]*img1.shape[1], img1.shape[2]))
k = 3
centers, mem, init_centers = kmeans(X, k=k)
img2 = np.zeros(X.shape)
for i in range(k):
img2[mem==i] = centers[i]
img2 = np.array(np.reshape(img2, img1.shape), dtype=int)
plt.figure(dpi=50)
plt.imshow(img2)
plt.xticks([],[])
plt.yticks([],[])
plt.show()
%matplotlib notebook
import numpy as np
a1 = np.random.normal(loc=[0,0,0], scale=1, size=(100,3))
a2 = np.random.normal(loc=[10,10,10], scale=1, size=(100,3))
a3 = np.random.normal(loc=[0,10,5], scale=1, size=(100,3))
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(a1[:,0], a1[:,1], a1[:,2], marker='x')
ax.scatter(a2[:,0], a2[:,1], a2[:,2], marker='x')
ax.scatter(a3[:,0], a3[:,1], a3[:,2], marker='x')
plt.show()
%matplotlib inline
data = np.vstack(( np.vstack((a1, a2)), a3))
print(data.shape)
data2 = data - np.mean(data, axis=0)
#U, sigma, Vt = np.linalg.svd(data2)
#print(U.shape)
# 1. find eigvecs of X^TX
eigvals, eigvecs = np.linalg.eigh(np.dot(data2.T, data2))
print(eigvecs)
k = 2
# 2. select k largest eigenvectors
pcomps = eigvecs[:, -k:]
# 3. project data onto the eigen vectors
z = np.dot(data2, pcomps)
print(z.shape)
plt.figure()
plt.scatter(z[:,0], z[:,1], marker='x')
plt.show()