Author: Not specified Language: python
Description: Not specified Timestamp: 2018-04-28 07:07:56 +0000
View raw paste Reply
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd

def scatter_mat(data):
    data_len = len(data)
    cov_mat = np.cov(data.T)
    return (data_len-1)*cov_mat

def reconstruction_error(eig_vals, d):
    tot = sum(eig_vals)
    considered = sum(eig_vals[:d])
    return (considered*100.0)/tot

def transform(X, m, eig):
    A = np.matmul(eig, X-m)
    X = m + np.matmul(A, eig)
    return X

def mse(data, mean, eig_vects, num_of_comps):
    eig = eig_vects[:num_of_comps]
    J = 0
    for x in data:
        X = transform(x, mean, eig)
        diff_sq = (x - X)**2
        J += sum(diff_sq)
    return J

DATASET = pd.read_csv('dataset.csv', header=None)
CLASSES = np.array(DATASET.iloc[:, 0])
DATASET = np.array(DATASET.iloc[:, 1:])
mean = find_mean(DATASET)
sc = scatter_mat(DATASET)
eig_vals, eig_vects = np.linalg.eig(sc)
if __name__ == "__main__":
    X_ticks = range(0, 100, 10)
    X = np.array(range(1, 101))
    Y = np.array([mse(DATASET, mean, eig_vects, i) for i in X])
    plt.axes().set_xticks(X_ticks)
    plt.axes().set_xticklabels(X_ticks)
    plt.plot(X, Y, 'ro')
    plt.xlabel("No. of Principal Components")
    plt.ylabel("Mean Squared Error")
    plt.show()
 
View raw paste Reply