1. 程式人生 > >Bobo老師機器學習筆記第五課-簡單線性迴歸

Bobo老師機器學習筆記第五課-簡單線性迴歸

課程地址:https://coding.imooc.com/class/169.html

最小二乘法的推導部落格點選此處

程式碼實現(參考Bobo實現,如果要看BoBo老師原始碼,請點選此處):

# -*- encoding: utf-8 -*-
"""
實現簡單的線性迴歸,
自己實現SimpleLineRegession1過程中的2個錯誤:
1、deno += (x - x_mean) ** 2 寫成 deno = (x - x_mean) ** 2 這裡要注意: deno是所有計算結果的累計值
2、 方程方式self.a_ * x + self.b_ 寫成 self.a_ * x - self.b_。 計算b的公式b=y_mean - a * x_mean, 但是整個方程是 y = ax+b
"""
import numpy as np


class SimpleLineRegession1(object):
    """
    不使用向量化實現簡單的線性迴歸
    """

    def __init__(self):
        """
        在過程中計算出來的變數統一命令,字尾加上_
        """
        self.a_ = None  # 表示線性的斜率
        self.b_ = None  # 表示線

    def fit(self, X_train, y_train):
        """
        訓練模型
        :param X_train:
        :return:
        """
        assert X_train.ndim == 1 and y_train.ndim == 1, 'X和Y必須為1維'
        assert len(X_train) == len(y_train), 'X和Y的訓練個數不相同'
        x_mean = np.mean(X_train)
        y_mean = np.mean(y_train)
        num = 0.0  # 分子  Numerator and denominator
        deno = 0.0
        for x, y in zip(X_train, y_train):
            num += (x - x_mean) * (y - y_mean)
            deno += (x - x_mean) ** 2
        self.a_ = num / deno
        self.b_ = y_mean - self.a_ * x_mean


    def _predict(self, x):
        """
        預測單個X的結果 線性方程y = a*x + b
        :param x:
        :return:
        """

        return self.a_ * x + self.b_

    def predict(self, X_test):
        """
        預測X,X是一維的資料
        :param X_test:
        :return:
        """
        assert X_test.ndim == 1, 'X_test必須是一維陣列'
        assert self.a_ is not None and self.b_ is not None , '在predict之前請先fit'

        y_pridect = [self._predict(x) for x in X_test]
        return np.array(y_pridect)

    def __repr__(self):
        return ('SimpleLineRegession1(a=%s, b=%s)' %(self.a_, self.b_))


class SimpleLineRegession2(object):
    """
    不使用向量化實現簡單的線性迴歸
    """

    def __init__(self):
        """
        在過程中計算出來的變數統一命令,字尾加上_
        """
        self.a_ = None  # 表示線性的斜率
        self.b_ = None  # 表示線

    def fit(self, X_train, y_train):
        """
        訓練模型
        :param X_train:
        :return:
        """
        assert X_train.ndim == 1 and y_train.ndim == 1, 'X和Y必須為1維'
        assert len(X_train) == len(y_train), 'X和Y的訓練個數不相同'
        x_mean = np.mean(X_train)
        y_mean = np.mean(y_train)
        self.a_ = (X_train - x_mean).dot(y_train - y_mean) / (X_train - x_mean).dot(X_train - x_mean)
        self.b_ = y_mean - self.a_ * x_mean


    def _predict(self, x):
        """
        預測單個X的結果 線性方程y = a*x + b
        :param x:
        :return:
        """

        return self.a_ * x + self.b_

    def predict(self, X_test):
        """
        預測X,X是一維的資料
        :param X_test:
        :return:
        """
        assert X_test.ndim == 1, 'X_test必須是一維陣列'
        assert self.a_ is not None and self.b_ is not None , '在predict之前請先fit'

        y_pridect = [self._predict(x) for x in X_test]
        return np.array(y_pridect)

    def __repr__(self):
        return 'SimpleLineRegession2(a=%s, b=%s)' %(self.a_, self.b_)

測試程式碼:

import numpy as np
from timeit import timeit as timeit
import matplotlib.pyplot as plt
from simplelinerregression import SimpleLineRegession1, SimpleLineRegession2

x = np.random.randint(1.0, 6, 10000) + np.random.normal(size=10000)
y = 0.8 * x + 0.4 + np.random.normal(size=len(x))

def test_reg1():

    reg1 = SimpleLineRegession1()
    reg1.fit(x, y)
    reg1.predict(x)
    print reg1

def test_reg2():
    reg2 = SimpleLineRegession2()
    reg2.fit(x, y)
    reg2.predict(x)
    print reg2

def draw_graph():

    x = np.array([1., 2., 3., 4., 5.])
    y = np.array([1., 3., 2., 3.0, 5.0])
    plt.scatter(x, y)
    plt.scatter(x, y, color='green')
    plt.axis([0, 6, 0, 6])

    reg1 = SimpleLineRegession1()
    reg1.fit(x, y)
    y_predict = reg1.predict(x)

    line_mark = 'y=%sx+%s' % (np.round(reg1.a_, 2), np.round(reg1.b_, 2))
    plt.plot(x, y_predict, color='red', label=line_mark)
    plt.legend()
    plt.show()


if __name__ == '__main__':
    print timeit('test_reg1()', "from __main__ import test_reg1", number=3)
    print timeit('test_reg2()', "from __main__ import test_reg2", number=3)
    draw_graph()

執行結果:

執行結果,明顯SimpleLineRegession2效率要比SimpleLineRegession1高很多
SimpleLineRegession1(a=0.8018889242367586, b=0.39478340695596614)
SimpleLineRegession1(a=0.8018889242367586, b=0.39478340695596614)
SimpleLineRegession1(a=0.8018889242367586, b=0.39478340695596614)
0.0413969199446
SimpleLineRegession2(a=0.8018889242367646, b=0.39478340695594794)
SimpleLineRegession2(a=0.8018889242367646, b=0.39478340695594794)
SimpleLineRegession2(a=0.8018889242367646, b=0.39478340695594794)
0.0128730256884