Author: Not specified | Language: python |
Description: Not specified | Timestamp: 2018-04-28 07:07:56 +0000 |
View raw paste | Reply |
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
def scatter_mat(data):
data_len = len(data)
cov_mat = np.cov(data.T)
return (data_len-1)*cov_mat
def reconstruction_error(eig_vals, d):
tot = sum(eig_vals)
considered = sum(eig_vals[:d])
return (considered*100.0)/tot
def transform(X, m, eig):
A = np.matmul(eig, X-m)
X = m + np.matmul(A, eig)
return X
def mse(data, mean, eig_vects, num_of_comps):
eig = eig_vects[:num_of_comps]
J = 0
for x in data:
X = transform(x, mean, eig)
diff_sq = (x - X)**2
J += sum(diff_sq)
return J
DATASET = pd.read_csv('dataset.csv', header=None)
CLASSES = np.array(DATASET.iloc[:, 0])
DATASET = np.array(DATASET.iloc[:, 1:])
mean = find_mean(DATASET)
sc = scatter_mat(DATASET)
eig_vals, eig_vects = np.linalg.eig(sc)
if __name__ == "__main__":
X_ticks = range(0, 100, 10)
X = np.array(range(1, 101))
Y = np.array([mse(DATASET, mean, eig_vects, i) for i in X])
plt.axes().set_xticks(X_ticks)
plt.axes().set_xticklabels(X_ticks)
plt.plot(X, Y, 'ro')
plt.xlabel("No. of Principal Components")
plt.ylabel("Mean Squared Error")
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
def scatter_mat(data):
data_len = len(data)
cov_mat = np.cov(data.T)
return (data_len-1)*cov_mat
def reconstruction_error(eig_vals, d):
tot = sum(eig_vals)
considered = sum(eig_vals[:d])
return (considered*100.0)/tot
def transform(X, m, eig):
A = np.matmul(eig, X-m)
X = m + np.matmul(A, eig)
return X
def mse(data, mean, eig_vects, num_of_comps):
eig = eig_vects[:num_of_comps]
J = 0
for x in data:
X = transform(x, mean, eig)
diff_sq = (x - X)**2
J += sum(diff_sq)
return J
DATASET = pd.read_csv('dataset.csv', header=None)
CLASSES = np.array(DATASET.iloc[:, 0])
DATASET = np.array(DATASET.iloc[:, 1:])
mean = find_mean(DATASET)
sc = scatter_mat(DATASET)
eig_vals, eig_vects = np.linalg.eig(sc)
if __name__ == "__main__":
X_ticks = range(0, 100, 10)
X = np.array(range(1, 101))
Y = np.array([mse(DATASET, mean, eig_vects, i) for i in X])
plt.axes().set_xticks(X_ticks)
plt.axes().set_xticklabels(X_ticks)
plt.plot(X, Y, 'ro')
plt.xlabel("No. of Principal Components")
plt.ylabel("Mean Squared Error")
plt.show()
View raw paste | Reply |