1. 程式人生 > >線性迴歸的python實現

線性迴歸的python實現

  最近在學習機器學習的一些演算法,從最開始的線性迴歸開始。線性迴歸其實就是用一條直線來模擬你的資料,並且讓所產生的誤差儘可能的小。
#coding=utf-8
import random
import numpy as np
from matplotlib import pyplot as pp

random.seed(1)#產生隨機種子
house_size = [random.randrange(70,200) for i in range(10000)]
distance_from_citycenter = [random.randrange(1,30) for i in range(10000)]
floor = [random.randrange(1,20) for i in range(10000)]
house_price = []
for i in range(10000):
    price = house_size[i]*random.randrange(5e4,10e4)+distance_from_citycenter[i]*(-1e4)+floor[i]*1e4+random.randrange(1,1e6)\
    house_price.append(price)#假設已知的房價
x = [[1,house_size[i],distance_from_citycenter[i],floor[i]] for i in range(10000)]#構建所需要的資料為一個向量
x_matrix = np.array(x)#將它轉換為矩陣的形式
y_matrix = np.array(house_price)#已知價格的矩陣
y_matrix = y_matrix.reshape(len(y_matrix),1)#將它轉換為列向量的形式
theta = [0 for i in range(4)]#假設初始的未知數全為0
theta_matrix = np.array(theta)
theta_matrix = theta_matrix.reshape(len(theta_matrix),1)
def cost_function(x,theta,y):#定義代價函式
    y_pred = x.dot(theta)
    diff = y_pred - y
    squared_error = np.power(diff,2)
    return np.sum(squared_error)/(2*len(y))
def gradient(x,theta,y):#實現梯度下降,以此來獲取最佳的未知數的值
    y_pred = x.dot(theta)
    diff = y_pred - y
    gradient = (1/len(y))*x.T.dot(diff)
    return gradient
theta_matrix = theta_matrix.astype("float64")
max_item=10000#迭代次數
learning_rate = 0.00001#學習效率
for i in range(max_item):
    theta_matrix -= gradient(x_matrix,theta_matrix,y_matrix)*learning_rate
    # if i%20 ==0:
    #     print(cost_function(x_matrix,theta_matrix,y_matrix))
print(theta_matrix)