1. 程式人生 > >斯坦福CS231n 課程學習筆記--線性分類器(Assignment1程式碼實現)

斯坦福CS231n 課程學習筆記--線性分類器(Assignment1程式碼實現)

最近學習了斯坦福的CS231n(winter 2016)系列課程,收穫很大,作為深度學習以及卷積神經網路學習的入門很是完美。學習過程中,主要參考了知乎上幾位同學的課程翻譯,做得很好,在這裡也對他們表示感謝,跟課程相關的很多資源都可以在該專欄中找到。推薦大家把每個筆記的翻譯都完整的看一下。關於該課程視訊的中文字幕也在翻譯進行中,目前第一集已經翻譯完成,感興趣的同學可以也可以看看參與進去。此外,完成課程視訊和筆記的閱讀之後也對課程的3個Assignment進行了實現。接下來主要以對課程筆記總結和Assignment程式碼實現相結合的方式完成這一系列的部落格。

本篇部落格主要對Assignment1

中的linear_svm、softmax、linear_classifier三個任務進行實現。在該頁面中提供了任務初始程式碼下載 下載的程式碼中已經有了相關檔案。我們只需要按照svm.ipynb和softmax.ipynb兩個檔案來一步步地做。關於numpy中一些陣列、向量、矩陣的操作可以參見該教程(介紹瞭如何從矩陣中選取特定位置處的元素等等接下來會用到的操作)
這裡我直接貼出自己的程式碼,相關注釋已經下載檔案之中:
1,linear_svm.py

import numpy as np
def svm_loss_naive(W, X, y, reg):
    dW = np.zeros(W.shape)   # initialize the gradient as zero
# compute the loss and the gradient num_classes = W.shape[1] num_train = X.shape[0] loss = 0.0 #遍歷樣本集中的每一個樣本 for i in xrange(num_train): #計算得分,點乘 scores = X[i].dot(W) #記錄正確類別的得分 correct_class_score = scores[y[i]] #遍歷C個類別 for
j in xrange(num_classes): if j == y[i]: continue #計算摺頁損失,和梯度公式。公式推導在上篇部落格中已經介紹過了 margin = scores[j] - correct_class_score + 1 # note delta = 1 if margin > 0: loss += margin dW[:, y[i]] += -X[i, :] # compute the correct_class gradients dW[:, j] += X[i, :] # compute the wrong_class gradients # Right now the loss is a sum over all training examples, but we want it # to be an average instead so we divide by num_train. loss /= num_train dW /= num_train # Add regularization to the loss.新增正則損失 loss += 0.5 * reg * np.sum(W * W) dW += reg * W return loss, dW #使用向量運算計算loss和梯度 def svm_loss_vectorized(W, X, y, reg): loss = 0.0 dW = np.zeros(W.shape) # initialize the gradient as zero #計算得分,是個矩陣N*C scores = X.dot(W) # N by C num_train = X.shape[0] num_classes = W.shape[1] #記錄所有樣本的正確類別得分。該操作是獲取1~N行中第y(陣列n*1)個元素 scores_correct = scores[np.arange(num_train), y] # 1 by N scores_correct = np.reshape(scores_correct, (num_train, 1)) # N by 1 #計算分數與正確分類的差 margins = scores - scores_correct + 1.0 # N by C #正確分類處的誤差為0 margins[np.arange(num_train), y] = 0.0 #小於0的位置也設定為0,即分類正確 margins[margins <= 0] = 0.0 loss += np.sum(margins) / num_train loss += 0.5 * reg * np.sum(W * W) # compute the gradient margins[margins > 0] = 1.0 row_sum = np.sum(margins, axis=1) # 1 by N margins[np.arange(num_train), y] = -row_sum dW += np.dot(X.T, margins)/num_train + reg * W # D by C return loss, dW

2,softmax.py

import numpy as np
#原理和svm一樣。也分為兩種計算方法。
def softmax_loss_naive(W, X, y, reg):    

    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)    # D by C
    dW_each = np.zeros_like(W)
    num_train, dim = X.shape
    num_class = W.shape[1]
    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    #考慮數值穩定性。減去f分值中最大的項。見上篇部落格
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    #計算歸一化概率
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) # N by C
    y_trueClass = np.zeros_like(prob)
    y_trueClass[np.arange(num_train), y] = 1.0
    for i in xrange(num_train):
        for j in xrange(num_class):    
            loss += -(y_trueClass[i, j] * np.log(prob[i, j]))    
            dW_each[:, j] = -(y_trueClass[i, j] - prob[i, j]) * X[i, :]
        dW += dW_each
    loss /= num_train
    loss += 0.5 * reg * np.sum(W * W)
    dW /= num_train
    dW += reg * W

    return loss, dW

def softmax_loss_vectorized(W, X, y, reg):
    # Initialize the loss and gradient to zero.    
    loss = 0.0    
    dW = np.zeros_like(W)    # D by C    
    num_train, dim = X.shape

    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True)
    y_trueClass = np.zeros_like(prob)
    y_trueClass[range(num_train), y] = 1.0    # N by C
    loss += -np.sum(y_trueClass * np.log(prob)) / num_train + 0.5 * reg * np.sum(W * W)
    dW += -np.dot(X.T, y_trueClass - prob) / num_train + reg * W

    return loss, dW

3,linear_classifier.py

from linear_svm import *
from softmax import *

class LinearClassifier(object):    

    def __init__(self):        
        self.W = None    

    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, batch_size=200, verbose=True):
        num_train, dim = X.shape
        # assume y takes values 0...K-1 where K is number of classes
        num_classes = np.max(y) + 1  
        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)   # D by C

        # Run stochastic gradient descent(Mini-Batch) to optimize W
        loss_history = []
        for it in xrange(num_iters): 
            X_batch = None
            y_batch = None
            # Sampling with replacement is faster than sampling without replacement.
            sample_index = np.random.choice(num_train, batch_size, replace=False)
            X_batch = X[sample_index, :]   # batch_size by D
            y_batch = y[sample_index]      # 1 by batch_size
            # evaluate loss and gradient
            loss, grad = self.loss(X_batch, y_batch, reg)
            loss_history.append(loss)

            # perform parameter update
            self.W += -learning_rate * grad
            if verbose and it % 100 == 0:
                print 'Iteration %d / %d: loss %f' % (it, num_iters, loss)

        return loss_history

    def predict(self, X):
        y_pred = np.zeros(X.shape[1])    # 1 by N
        y_pred = np.argmax(np.dot(self.W.T, X.T), axis=0)
        return y_pred

    def loss(self, X_batch, y_batch, reg): 
        pass

class LinearSVM(LinearClassifier):
  """ A subclass that uses the Multiclass SVM loss function """

  def loss(self, X_batch, y_batch, reg):
    return svm_loss_vectorized(self.W, X_batch, y_batch, reg)

class Softmax(LinearClassifier):
  """ A subclass that uses the Softmax + Cross-entropy loss function """

  def loss(self, X_batch, y_batch, reg):
    return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)

notebook上面的程式碼,其實主要是實現超引數調優的功能。

# softmax.ipynb

from cs231n.classifiers import Softmax
results = {}
best_val = -1
best_softmax = None
learning_rates = [5e-6, 1e-7, 5e-7]
regularization_strengths = [1e4, 5e4, 1e5]
params = [(x,y) for x in learning_rates for y in regularization_strengths ]
for lrate, regular in params:
    softmax = Softmax()
    loss_hist = softmax.train(X_train, y_train, learning_rate=lrate, reg=regular,
                             num_iters=700, verbose=True)
    y_train_pred = softmax.predict(X_train)
    accuracy_train = np.mean( y_train == y_train_pred)
    y_val_pred = softmax.predict(X_val)
    accuracy_val = np.mean(y_val == y_val_pred)
    results[(lrate, regular)] = (accuracy_train, accuracy_val)
    if(best_val < accuracy_val):
        best_val = accuracy_val
        best_softmax = softmax

# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy)

print 'best validation accuracy achieved during cross-validation: %f' % best_val


# svm.ipynb

from cs231n.classifiers import LinearSVM
learning_rates = [1e-7, 5e-5]
regularization_strengths = [5e4, 1e5]

results = {}
best_val = -1   # The highest validation accuracy that we have seen so far.
best_svm = None # The LinearSVM object that achieved the highest validation rate.

iters= 1000
for lr in learning_rates:
    for rs in regularization_strengths:
        svm = LinearSVM()
        svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)
        y_train_pred = svm.predict(X_train)
        acc_train = np.mean(y_train == y_train_pred)
        y_val_pred = svm.predict(X_val)
        acc_val = np.mean(y_val == y_val_pred)
        results[(lr, rs)] = (acc_train, acc_val)
        if best_val < acc_val:
            best_val = acc_val
            best_svm = svm

# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy)

print 'best validation accuracy achieved during cross-validation: %f' % best_val

貼上幾張執行過程中的截圖:
這裡寫圖片描述
這裡寫圖片描述
這裡寫圖片描述