基於模型融合的推薦系統實現(2):迭代式SVD分解
阿新 • • 發佈:2018-10-31
SVD演算法的原理網路上也有很多,不再細說了,關鍵是我們得到的資料是不完整的資料,所以要算SVD就必須做一次矩陣補全。補全的方式有很多,這裡推薦使用均值補全的方法(用每一行均值和每一列均值的平均來代替空白處),然後可以計算SVD,作PCA分析,然後就可以得到預測結果。
但是我們這裡有一個極為關鍵的思路,迭代是SVD,我們用第一次預測得到的SVD的值來原來的均值預測,然後繼續做SVD分解,直到收斂。這裡的方法非常有效,最後得到的效果也不錯(RMSE在0.87左右,第一次迭代的RMSE接近0.98)
同樣將中間結果儲存到文字檔案裡面,使得程式可以中斷之後繼續計算。
import numpy as np
from queue import PriorityQueue
from collections import Iterable,Counter,namedtuple,ChainMap,defaultdict
from functools import reduce
from itertools import groupby,chain,compress
from statistics import mean
from code import read_file
from PCA import get_train
def get_mean(train):
mean_u,mean_i,cnt = {},defaultdict(lambda :0),defaultdict(lambda:0)
for u,user_items in train.items():
mean_u[u] = mean(user_items.values())
for item,r in user_items.items():
mean_i[item]+=r
cnt[item]+=1
sum = 0
for each,mean_r in mean_i.items():
mean_i[each] = mean_r/cnt[each]
sum+=mean_i[each]
return mean_u,mean_i,sum/len(mean_i)
def construct_matrix(train=get_train(path=r'smaller_train.txt')):#get train data from smaller data set
row = max(train)
col = 0
mean_u,mean_i,all_mean = get_mean(train)
for u,i in train.items():
col = max(col,max(i))
matrix = np.zeros((row,col))
for u,user_items in train.items():
for i in range(col):
mean_r = (mean_u[u]+mean_i[i+1])/2
if (i+1) in user_items:
matrix[u-1][i] = round(user_items[i+1]-all_mean,3)
else:
matrix[u-1][i] = round(mean_r-all_mean,3)
return matrix
def save_svd_predict(k):
initial = construct_matrix()
n = get_svd_predict(index=k)#get last result
print('svd start')
train = get_train(path = r'smaller_train.txt')
mean_u,mean_i,all_mean = get_mean(train)
u,s,v = None,None,None
for step in range(10):
print(step)
u,s,v = np.linalg.svd(n)
u = u[:,:k]
s = s[:k]
v = v[:k,:]
S = np.diag(s)
n = np.dot(u,np.dot(S,v))
np.savetxt('u{}.txt'.format(k),u)
np.savetxt('s{}.txt'.format(k),S)
np.savetxt('v{}.txt'.format(k),v)
RMES(get_svd_predict(index=k),all_mean)
for row_index in range(len(n)):
user = train[row_index+1]
row_ini = initial[row_index]
row_iter = n[row_index]
for col in range(len(n[0])):
if col+1 in user:#recover value rated
row_iter[col] = row_ini[col]
print('svd finished')
def get_svd_predict(index):
u = np.loadtxt('u{0}.txt'.format(index))
s = np.loadtxt('s{0}.txt'.format(index))
v = np.loadtxt('v{0}.txt'.format(index))
return np.dot(u,np.dot(s,v))
def svd_predict(u,i,predictions):
try:
x = predictions[u-1][i-1]
return x
except:
return None
def write_ans(w_path,data):
with open(w_path,'w'):
pass
with open(w_path,'a') as file:
for r in data:
file.write('{0:.3f}\n'.format(r))