1. 程式人生 > >幾種常用迴歸演算法的比較

幾種常用迴歸演算法的比較

# -*- coding:utf-8 -*-

import numpy as np

import matplotlib.pyplot as plt

import random

def text2num(string):

    str_list = string.replace("\n", " ").split(" ")

    while '' in str_list:

        str_list.remove('')

    num_list = [float(i) for i in str_list]

    return num_list

def sigmoid(x):

    return 1.0 / (1 + np.exp(-x))

def data_plot(data_list, weight):

    x_data = [list(i[0:2]) for i in data_list if i[2] == 0.0]

    y_data = [list(i[0:2]) for i in data_list if i[2] == 1.0]

    x_data = np.reshape(x_data, np.shape(x_data))

    y_data = np.reshape(y_data, np.shape(y_data))

    linear_x = np.arange(-4, 4, 1)

    linear_y = (-weight[0] - weight[1] * linear_x) / weight[2]

    print(linear_y)

    plt.figure(1)

    plt.scatter(x_data[:, 0], x_data[:, 1], c='r')

    plt.scatter(y_data[:, 0], y_data[:, 1], c='g')

    print(linear_x)

    print(linear_y.tolist()[0])

    plt.plot(linear_x, linear_y.tolist()[0])

    plt.show()

def grad_desc(data_mat, label_mat, rate, times):

    data_mat = np.mat(data_mat)

    label_mat = np.mat(label_mat)

    m,n = np.shape(data_mat)

    weight = np.ones((n, 1))

    for i in range(times):

        h = sigmoid(data_mat * weight)

        error = h - label_mat

        weight = weight - rate * data_mat.transpose() * error

    return weight

def random_grad_desc(data_mat, label_mat, rate, times):

    data_mat = np.mat(data_mat)

    m,n = np.shape(data_mat)

    weight = np.ones((n, 1))

    for i in range(times):

        for j in range(m):

            h = sigmoid(data_mat[j] * weight)

            error = h - label_mat[j]

            weight = weight - rate * data_mat[j].transpose() * error

    return weight

def improve_random_grad_desc(data_mat, label_mat, times):

    data_mat = np.mat(data_mat)

    m,n = np.shape(data_mat)

    weight = np.ones((n, 1))

    for i in range(times):

        index_data = [i for i in range(m)]

        for j in range(m):

            rate = 0.0001 + 4 / (i + j + 1)

            index = random.sample(index_data, 1)

            h = sigmoid(data_mat[index] * weight)

            error = h - label_mat[index]

            weight = weight - rate * data_mat[index].transpose() * error

            index_data.remove(index[0])

    return weight

def main():

    file = open("/Users/chenzu/Documents/code-machine-learning/data/LR", "rb")

    file_lines = file.read().decode("UTF-8")

    data_list = text2num(file_lines)

    data_len = int(len(data_list) / 3)

    data_list = np.reshape(data_list, (data_len, 3))

    data_mat_temp = data_list[:, 0:2]

    data_mat = []

    for i in data_mat_temp:

        data_mat.append([1, i[0], i[1]])

    print(data_mat)

    label_mat = data_list[:, 2:3]

    #梯度下降求引數

    weight = improve_random_grad_desc(data_mat, label_mat, 500)

    print(weight)

    data_plot(data_list, weight)

if __name__ == '__main__':

    main()