1. 程式人生 > >c++實現線性迴歸(高斯消元)(附python實現)

c++實現線性迴歸(高斯消元)(附python實現)

前言

  • 寫這次blog的契機是上次筆試的時候,遇到了這個問題
  • 當時以為numpy庫是可以用的,就先寫了個python版,結果並不能用。。
  • 最後憤然寫了個c++版
  • 不過最後一個小問題導致我差了兩分鐘沒交上去程式碼,所以這一版原始碼只是通過了案例但沒有提交ac。。如果大家發現有什麼bug也歡迎大家幫我指出

題意:

  • 就是給定一個X矩陣,一個y向量,分別表示資料特徵矩陣(n*m),ground truth向量(n*1
  • 輸入就是一個大矩陣,最後一列是y,前面是X,同時原題裡n,m是先不給定的,需要根據矩陣的行數和列數判斷
  • 要求返回係數向量,包含偏置b
    ,所以返回的是一個n+1維的vector
  • 我現在找不到原題了,找了一個類似的簡化的題,記在這裡:連結

思路:

  • 實現的話,其實就是通過高斯消元,解一個線性方程組:
    X T X w =
    X T y X^TXw = X^Ty
  • 這裡把推導過程簡單說一下
  • 首先我們知道線性迴歸的loss function:
    min
    w L = y X w \min_ w L = ||y - Xw||
  • 然後我們求關於w的梯度:
    L = ( y X w ) T ( y X w ) = ( y T y w T X T y y T X w + w T X T X w ) L = (y - Xw)^T(y - Xw) \\ = (y^Ty - w^TX^Ty - y^TXw + w^TX^TXw)
    w L = X T y X T y + 2 X T X w \nabla_w L = -X^Ty - X^Ty + 2X^TXw
  • 令梯度為0,容易推出最後結論:
    X T X w = X T y X^TXw = X^Ty
  • 最後附上矩陣求導參考文獻:連結

c++實現

#include <bits/stdc++.h>
using namespace std;

const double eps = 1e-8;
typedef vector<double> vec;
typedef vector<vec> mat;

vec gauss_jordan(const mat& A, const vec& b){
	int n = A.size();
	mat B(n, vec(n+1));
	for (int i = 0; i < n; i++){
		for (int j = 0; j < n; j++){
			B[i][j] = A[i][j];
		}
	}
	for (int i = 0; i < n; i++){
		B[i][n] = b[i];
	}
	for (int i = 0; i< n; i++){
		int pivot = i;
		for (int j = i; j < n; j++){
			if (abs(B[j][i]) > abs(B[pivot][i]))
				pivot = j;
		}
		swap(B[i], B[pivot]);
	
		if (abs(B[i][i]) < eps)
			return vec();
	
		for (int j = i + 1; j <= n; j++){
			B[i][j] /= B[i][i];
		}
		for (int j = 0; j < n; j++){
			if (i != j){
				for (int k = i + 1; k <= n; k++)
					B[j][k] -= B[j][i] * B[i][k];
			}
		}
	}
	vec x(n);
	for (int i = 0; i < n; i++)
		x[i] = B[i][n];
	return x;
}

mat mul(mat& A, mat& B){
	mat C(A.size(), vec(B[0].size()));
	for (int i = 0; i < A.size(); i++){
		for (int k = 0; k < B.size(); k++){
			for (int j = 0; j < B[0].size(); j++){
				C[i][j] += A[i][k] * B[k][j];
			}
		}
	}
	return C;
}

mat trans(mat& A){
	mat B(A[0].size(), vec(A.size()));
	for (int i = 0; i < A.size(); i++){
		for (int j = 0; j < A[i].size(); j++){
			B[j][i] = A[i][j];
		}
	}
	return B;
}
vec tovec(mat& A){
	vec B(A.size());
	for (int i = 0; i < A.size(); i++){
		B[i] = A[i][0];
	}
	return B;
}

void SplitString(const string& s, vector<double>& v, const string& c)
{
  string::size_type pos1, pos2;
  pos2 = s.find(c);
  pos1 = 0;
  while(string::npos != pos2)
  {
    v.push_back(stod(s.substr(pos1, pos2-pos1)));
 
    pos1 = pos2 + c.size();
    pos2 = s.find(c, pos1);
  }
  if(pos1 != s.length())
    v.push_back(stod(s.substr(pos1)));
}
int main(){
	string s;
	ios::sync_with_stdio(false);
	mat X, Y;
	for (int i = 0; i < 4; i++){
		cin>>s;
		vector<double> v;
		SplitString(s, v, ",");
		vector<double> v1(v.begin(), v.end() - 1);
		v1.insert(v1.begin(), 1.0);
		vector<double> v2(1, *v.rbegin());
		X.push_back(v1);
		Y.push_back(v2);
	}
	for (int i = 0; i < X.size(); i++){
		for (int j = 0; j < X[i].size(); j++){
			cout << X[i][j] << ", ";
		}
		cout << endl;
	}
	for (int i = 0; i < Y.size(); i++){
		for (int j = 0; j < Y[i].size(); j++){
			cout << Y[i][j] << ", ";
		}
		cout << endl;
	}
	auto Xt = trans(X);
	auto A = mul(Xt, X);
	auto B = mul(Xt, Y);
	auto C = tovec(B);
	auto ans = gauss_jordan(A, C);
	for (int i = 0; i < ans.size() - 1; i++){
		cout << fixed<<setprecision(2)<< ans[i] << ",";
	}
	cout << fixed<<setprecision(2) << *ans.rbegin() << endl;
	return 0;
}

python實現

import sys, math
import numpy as np


def solve(X,Y):
    A = np.dot(X.T, X)
    B = np.dot(X.T, Y.T)
    W = np.squeeze(np.linalg.solve(A, B).T).tolist()
    return W


if __name__=='__main__':
    X = []
    Y = []
    while True:
        line = sys.stdin.readline().strip('\r\n')

        if line == '':
            break

        nums = line.split(',')
        nums = [float(n) for n in nums]
        X.append([1.0] + nums[:-1])
        Y.append(nums[-1])

    X = np.array(X, dtype = np.float32)
    Y = np.array(Y, dtype = np.float32, ndmin = 2)
    coefs = solve(X,Y)

    formatted_coefs = []
    for coef in coefs:
        coef = math.floor(coef*100)/100
        formatted_coefs.append('%.2f' %coef)
    print(','.join(formatted_coefs))