Author: Not specified Language: python
Description: Not specified Timestamp: 2017-01-30 22:35:26 +0000
View raw paste Reply
  1. from scipy.io import loadmat
  2. import numpy as np
  3. from sklearn.svm import SVC
  4. from sklearn.svm import LinearSVC
  5.  
  6.  
  7. #MNIST
  8. def mnist():
  9.     mnistMain = loadmat("hw01_data/mnist/train.mat")['trainX']
  10.     np.random.shuffle(mnistMain)
  11.     mnistValidation = mnistMain[:10000]
  12.     mnistTraining = mnistMain[10000:]
  13.     mvalidationLabels = mnistValidation.T[-1].T
  14.     mvalidation = mnistValidation.T[:-1].T
  15.     mnistLabels = mnistTraining.T[-1].T
  16.     mnistTraining = mnistTraining.T[:-1].T
  17.     print("MNIST Datasets Created")
  18.     for trainingSize in [100, 200, 500, 1000, 2000, 5000,10000]:
  19.         classifier = SVC(kernel='linear')
  20.         # print("training Started for " + str(trainingSize))
  21.         classifier.fit(mnistTraining[:trainingSize], mnistLabels[:trainingSize])
  22.         # print("training Ended")
  23.         print(1 - classifier.score(mnistTraining[:trainingSize], mnistLabels[:trainingSize]))
  24.  
  25. def checkAccuracy():
  26.     mnistMain = loadmat("hw01_data/mnist/train.mat")['trainX']
  27.     mnistTest = loadmat("hw01_data/mnist/test.mat")['testX']
  28.  
  29.     np.random.shuffle(mnistMain)
  30.     mnistValidation = mnistMain[:10000]
  31.     mnistTraining = mnistMain[10000:]
  32.     mvalidationLabels = mnistValidation.T[-1].T
  33.     mvalidation = mnistValidation.T[:-1].T
  34.     mnistLabels = mnistTraining.T[-1].T
  35.     mnistTraining = mnistTraining.T[:-1].T
  36.     print("MNIST Datasets Created")
  37.     classifier = SVC(kernel='linear')
  38.     print("training Started for " + str(10000))
  39.     classifier.fit(mnistTraining[:10000], mnistLabels[:10000])
  40.     print("training Ended")
  41.     answers = classifier.predict(mnistTest)
  42.     import csv
  43.     import sys
  44.  
  45.     f = open('mnistout.csv', 'wt')
  46.     try:
  47.         writer = csv.writer(f)
  48.         writer.writerow( ('Id', 'Category') )
  49.         for i in range(10000):
  50.             writer.writerow( (i, str(answers[i])) )
  51.     finally:
  52.         f.close()
  53.     return answers
  54.  
  55. def mnistWithCValueAltering():
  56.     mnistMain = loadmat("hw01_data/mnist/train.mat")['trainX']
  57.     np.random.shuffle(mnistMain)
  58.     mnistValidation = mnistMain[:10000]
  59.     mnistTraining = mnistMain[10000:]
  60.     mvalidationLabels = mnistValidation.T[-1].T
  61.     mvalidation = mnistValidation.T[:-1].T
  62.     mnistLabels = mnistTraining.T[-1].T
  63.     mnistTraining = mnistTraining.T[:-1].T
  64.     print("MNIST Datasets Created")
  65.     trainingSize = 1000
  66.     for c in [1,1e-6,2e-6,3e-6,7e-6,10e-6]:
  67.         classifier = SVC(kernel='linear', C=c)
  68.         print("training Started for c value: " + str(c))
  69.         classifier.fit(mnistTraining[:trainingSize], mnistLabels[:trainingSize])
  70.         print("training Ended")
  71.         print(classifier.score(mvalidation, mvalidationLabels))
  72. def spam():
  73.     spamDict = loadmat("hw01_data/spam/spam_data.mat")
  74.     spamTrain = spamDict['training_data']
  75.     spamLabels = spamDict['training_labels']
  76.     spamFull = spamTrain.T + spamLabels[0]
  77.     spamFull = spamFull.T
  78.     np.random.shuffle(spamFull)
  79.     numVal = int(.2*spamFull.shape[0])
  80.     spamValidation = spamFull[:numVal]
  81.     spamTraining = spamFull[numVal:]
  82.     trainingLength = spamTraining.shape[0]
  83.     spamvalidationLabels = spamValidation.T[-1].T
  84.     spamvalidation = spamValidation.T[:-1].T
  85.     spamLabels = spamTraining.T[-1].T
  86.     spamTraining = spamTraining.T[:-1].T
  87.     print("Spam Datasets Created")
  88.     for trainingSize in [100, 200, 500, 1000, 2000, trainingLength]:
  89.         classifier = SVC(kernel='linear')
  90.         # print("training Started for " + str(trainingSize))
  91.         classifier.fit(spamTraining[:trainingSize], spamLabels[:trainingSize])
  92.         # print("training Ended")
  93.         print(1 - classifier.score(spamTraining[:trainingSize], spamLabels[:trainingSize]))
  94. def spamAc():
  95.     spamDict = loadmat("hw01_data/spam/spam_data.mat")
  96.     spamTrain = spamDict['training_data']
  97.     spamLabels = spamDict['training_labels']
  98.     testData = spamDict['test_data']
  99.     spamFull = np.vstack((spamTrain.T,spamLabels[0]))
  100.     spamFull = spamFull.T
  101.     np.random.shuffle(spamFull)
  102.     numVal = 0
  103.     spamValidation = spamFull[:numVal]
  104.     spamTraining = spamFull[numVal:]
  105.     trainingLength = spamTraining.shape[0]
  106.     spamvalidationLabels = spamValidation.T[-1].T
  107.     spamvalidation = spamValidation.T[:-1].T
  108.     spamLabels = spamTraining.T[-1].T
  109.     spamTraining = spamTraining.T[:-1].T
  110.     print("Spam Datasets Created")
  111.     classifier = SVC(kernel='linear', C=1)
  112.     print("training Started for " + str(trainingLength))
  113.     classifier.fit(spamTraining[:  trainingLength], spamLabels[:trainingLength])
  114.     print("training Ended")
  115.     # print(classifier.score(spamvalidation, spamvalidationLabels))
  116.     answers = classifier.predict(testData)
  117.     import csv
  118.     import sys
  119.  
  120.     f = open('spamout.csv', 'wt')
  121.     try:
  122.         writer = csv.writer(f)
  123.         writer.writerow( ('Id', 'Category') )
  124.         for i in range(len(answers)):
  125.             writer.writerow( (i, str(answers[i])) )
  126.     finally:
  127.         f.close()
  128.     return answers
  129.  
  130. def spamKCross():
  131.     spamDict = loadmat("hw01_data/spam/spam_data.mat")
  132.     spamTrain = spamDict['training_data']
  133.     spamLabels = spamDict['training_labels']
  134.     spamFull = spamTrain.T + spamLabels[0]
  135.     spamFull = spamFull.T
  136.     np.random.shuffle(spamFull)
  137.     k = 5
  138.     klength = int(spamFull.shape[0]/k)
  139.     # spamValidation = spamFull[:numVal]
  140.     kSets = [spamFull[x*klength:((x+1)*klength)] for x in range(5)]
  141.     print("Spam Datasets Created")
  142.     cVal = 1
  143.     while cVal < 100:
  144.         sumScores = 0.0
  145.         print(cVal)
  146.         for kIndex in range(5):
  147.             spamValidation = kSets[kIndex]
  148.             spamTraining = [itTuple[1] for itTuple in enumerate(kSets) if (itTuple[0] != kIndex)]
  149.             spamTraining = np.concatenate((spamTraining[0], spamTraining[1], spamTraining[2], spamTraining[3]))
  150.             spamvalidationLabels = spamValidation.T[-1].T
  151.             spamvalidation = spamValidation.T[:-1].T
  152.             spamLabels = spamTraining.T[-1].T
  153.             spamTraining = spamTraining.T[:-1].T
  154.             classifier = LinearSVC(C=cVal)
  155.             # print("training Started for k: " + str(kIndex))
  156.             classifier.fit(spamTraining, spamLabels)
  157.             # print("fitting")
  158.             sumScores += classifier.score(spamvalidation, spamvalidationLabels)
  159.         print(sumScores/k)
  160.         cVal +=1
  161.     # print('0.865183752418')
  162.  
  163.  
  164. def cifar():
  165.     cfarMain = loadmat("hw01_data/cifar/train.mat")['trainX']
  166.     np.random.shuffle(cfarMain)
  167.     cfarValidation = cfarMain[:5000]
  168.     cfarTraining = cfarMain[5000:]
  169.     cvalidationLabels = cfarValidation.T[-1].T
  170.     cvalidation = cfarValidation.T[:-1].T
  171.     cLabels = cfarTraining.T[-1].T
  172.     cTraining = cfarTraining.T[:-1].T
  173.     print("Cifar Datasets Created")
  174.     for trainingSize in [100, 200, 500, 1000, 2000, 5000]:
  175.         classifier = SVC(kernel='linear')
  176.         # print("training Started for " + str(trainingSize))
  177.         classifier.fit(cTraining[:trainingSize], cLabels[:trainingSize])
  178.         # print("training Ended")
  179.         print(1 - classifier.score(cTraining[:trainingSize], cLabels[:trainingSize]))
  180. mnistWithCValueAltering()
View raw paste Reply