1. 程式人生 > >程世東老師TensorFlow實戰——個性化推薦,程式碼學習筆記之③推薦過程






1、首先,拋開這個實戰程式碼過程,先介紹一下tensorflow模型的恢復,這裡我主要參考這篇文章,對於tensorflow模型的儲存與恢復寫的很清楚 :https://www.cnblogs.com/hejunlin1992/p/7767912.html


loaded_graph = tf.Graph() #1、新建一個在這段程式碼中的graph

with tf.Session(graph=loaded_graph) as sess:  #2、在session中引入這個圖
    # 3、Load saved model匯入已經訓練好的模型
    loader = tf.train.import_meta_graph(load_dir + '.meta')#3.1、由於模型已經儲存在meta檔案中,這裡import該檔案來建立網路
    loader.restore(sess, load_dir)#3.2、載入引數:通過呼叫restore函式來恢復網路的引數

    # 4、Get Tensors from loaded model 
    # 要恢復這個網路,不僅需要恢復圖(graph)和權重,也需要準備一個新的feed_dict,將新的訓練資料餵給網路。
    #我們可以通過使用graph.get_tensor_by_name()方法來獲得已經儲存的操作(operations)和placeholder variables。
    uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, _, movie_combine_layer_flat, __ = get_tensors(loaded_graph)  #loaded_graph




import tensorflow as tf
import os
import pickle

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from collections import Counter

import re
from tensorflow.python.ops import math_ops
import random

import matplotlib.pyplot as plt
import time
import datetime

def load_params():
    Load parameters from file
    return pickle.load(open('params.p', mode='rb'))
load_dir = load_params()#從檔案中匯入引數
title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = pickle.load(open('preprocess.p', mode='rb'))
embed_dim = 32
                                        # [ [1, 1193, 0, ..., 10,list([ title]),list([ genres])],
                                        #   [2, 1193, 1, ..., 16,list([ ]),list([ ])],
                                        #   [12, 1193, 1, ..., 12,list([ ]),list([ ])],
                                        #   ..., 
                                        #   [5938, 2909, 1, ..., 1,list([ ]),list([ ])] 
                                        # ]

uid_max = max(features.take(0,1)) + 1 # 6040
                                      #ndarray.take(indices, axis=None, out=None, mode='raise')從軸axis上獲取陣列中的元素,並以一維陣列或者矩陣返回
gender_max = max(features.take(2,1)) + 1 # 1 + 1 = 2
age_max = max(features.take(3,1)) + 1 # 6 + 1 = 7
job_max = max(features.take(4,1)) + 1# 20 + 1 = 21
movie_id_max = max(features.take(1,1)) + 1 # 3952
movie_categories_max = max(genres2int.values()) + 1 # 18 + 1 = 19
movie_title_max = len(title_set) # 5216
                                 # title_set是由空格分開的電影單詞字串構成的列表(set表)

combiner = "sum"

sentences_size = title_count #  title_count=15重新命名,一個電影title欄位的長度,不夠會補
#文字卷積滑動視窗,分別滑動2, 3, 4, 5個單詞
window_sizes = {2, 3, 4, 5}
filter_num = 8

movieid2idx = {val[0]:i for i, val in enumerate(movies.values)} #格式為{movieid :i}

# Number of Epochs
num_epochs = 5
# Batch Size
batch_size = 256

dropout_keep = 0.5
# Learning Rate
learning_rate = 0.0001
# Show stats for every n number of batches
show_every_n_batches = 20

#獲取 Tensors
#使用函式 get_tensor_by_name()從 loaded_graph 中獲取tensors,後面的推薦功能要用到
def get_tensors(loaded_graph):

    uid = loaded_graph.get_tensor_by_name("uid:0") #想要恢復這個網路,我們不僅需要恢復圖(graph)和權重,而且也需要準備一個新的feed_dict
                                                    #已經儲存的操作(operations)和placeholder variables。
    user_gender = loaded_graph.get_tensor_by_name("user_gender:0")
    user_age = loaded_graph.get_tensor_by_name("user_age:0")
    user_job = loaded_graph.get_tensor_by_name("user_job:0")
    movie_id = loaded_graph.get_tensor_by_name("movie_id:0")
    movie_categories = loaded_graph.get_tensor_by_name("movie_categories:0")
    movie_titles = loaded_graph.get_tensor_by_name("movie_titles:0")
    targets = loaded_graph.get_tensor_by_name("targets:0")
    dropout_keep_prob = loaded_graph.get_tensor_by_name("dropout_keep_prob:0")
    lr = loaded_graph.get_tensor_by_name("LearningRate:0")
    #兩種不同計算預測評分的方案使用不同的name獲取tensor inference
#     inference = loaded_graph.get_tensor_by_name("inference/inference/BiasAdd:0")
    inference = loaded_graph.get_tensor_by_name("inference/ExpandDims:0") # 之前是MatMul:0 因為inference程式碼修改了 這裡也要修改 感謝網友 @清歌 指出問題
    movie_combine_layer_flat = loaded_graph.get_tensor_by_name("movie_fc/Reshape:0")
    user_combine_layer_flat = loaded_graph.get_tensor_by_name("user_fc/Reshape:0")
    return uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference, movie_combine_layer_flat, user_combine_layer_flat

def rating_movie(user_id_val, movie_id_val):
    loaded_graph = tf.Graph()  #
    with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
        loader = tf.train.import_meta_graph(load_dir + '.meta') #由於已經將模型儲存在了 .meta 檔案中,因此可使用tf.train.import()函式來重新建立網路
                                                                #使用別人已經訓練好的模型來fine-tuning的第一步:此為建立網路Create the network
        loader.restore(sess, load_dir)                          #第二步:載入引數Load the parameters,呼叫restore函式來恢復網路的引數
        # Get Tensors from loaded model
        uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference,_, __ = get_tensors(loaded_graph)  #loaded_graph
        categories = np.zeros([1, 18])
        categories[0] = movies.values[movieid2idx[movie_id_val]][2]
        titles = np.zeros([1, sentences_size])
        titles[0] = movies.values[movieid2idx[movie_id_val]][1]
        feed = {
              uid: np.reshape(users.values[user_id_val-1][0], [1, 1]),
              user_gender: np.reshape(users.values[user_id_val-1][1], [1, 1]),
              user_age: np.reshape(users.values[user_id_val-1][2], [1, 1]),
              user_job: np.reshape(users.values[user_id_val-1][3], [1, 1]),
              movie_id: np.reshape(movies.values[movieid2idx[movie_id_val]][0], [1, 1]),
              movie_categories: categories,  #x.take(6,1)
              movie_titles: titles,  #x.take(5,1)
              dropout_keep_prob: 1}
        # Get Prediction
        inference_val = sess.run([inference], feed)  
        return (inference_val)


loaded_graph = tf.Graph()  #1、新建一個graph
movie_matrics = []
with tf.Session(graph=loaded_graph) as sess:  #2、在session中引入這個圖
    # Load saved model匯入已經訓練好的模型
    loader = tf.train.import_meta_graph(load_dir + '.meta')#由於模型已經儲存在meta檔案中,這裡import該檔案來建立網路
    loader.restore(sess, load_dir)#載入引數:通過呼叫restore函式來恢復網路的引數

    # Get Tensors from loaded model 要恢復這個網路,不僅需要恢復圖(graph)和權重,也需要準備一個新的feed_dict,將新的訓練資料餵給網路。
    #我們可以通過使用graph.get_tensor_by_name()方法來獲得已經儲存的操作(operations)和placeholder variables。
    uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, _, movie_combine_layer_flat, __ = get_tensors(loaded_graph)  #loaded_graph

    for item in movies.values: 
                                #        list([106, 2958, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543]),
                                #        list([15, 5, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])],......])

        categories = np.zeros([1, 18])  #得到array([[ 18個0 ]])
        categories[0] = item.take(2)  #categories[0]= [ 18個0 ],注意這裡取得是array的第0個元素,而array中的一個元素是一個列表,觀察括號個數
        titles = np.zeros([1, sentences_size])
        titles[0] = item.take(1)      #item.take(1)表示取電影名稱這個list[電影名稱]

        feed = {
            movie_id: np.reshape(item.take(0), [1, 1]),
            movie_categories: categories,  #前面訓練模型的程式碼中是訓練資料取x.take(6,1)
            movie_titles: titles,  #x.take(5,1)
            dropout_keep_prob: 1}

        movie_combine_layer_flat_val = sess.run([movie_combine_layer_flat], feed) #執行整個movie結構中的最後一個功能,完成全部的資料流動,得到輸出的電影特徵

pickle.dump((np.array(movie_matrics).reshape(-1, 200)), open('movie_matrics.p', 'wb'))
movie_matrics = pickle.load(open('movie_matrics.p', mode='rb'))#將所有電影特徵存到movie_matrics.p檔案裡

loaded_graph = tf.Graph()  #
users_matrics = []
with tf.Session(graph=loaded_graph) as sess:  #
    # Load saved model
    loader = tf.train.import_meta_graph(load_dir + '.meta')
    loader.restore(sess, load_dir)

    # Get Tensors from loaded model
    uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, _, __,user_combine_layer_flat = get_tensors(loaded_graph)  #loaded_graph

    for item in users.values:

        feed = {
            uid: np.reshape(item.take(0), [1, 1]),
            user_gender: np.reshape(item.take(1), [1, 1]),
            user_age: np.reshape(item.take(2), [1, 1]),
            user_job: np.reshape(item.take(3), [1, 1]),
            dropout_keep_prob: 1}

        user_combine_layer_flat_val = sess.run([user_combine_layer_flat], feed)  

pickle.dump((np.array(users_matrics).reshape(-1, 200)), open('users_matrics.p', 'wb'))
users_matrics = pickle.load(open('users_matrics.p', mode='rb'))

# 開始推薦電影
# 使用生產的使用者特徵矩陣和電影特徵矩陣做電影推薦,這裡有三種方法,都可以在命令列進行呼叫來推薦

# 1、推薦同類型的電影
# 思路是計算當前看的電影特徵向量與整個電影特徵矩陣的餘弦相似度,取相似度最大的top_k個
def recommend_same_type_movie(movie_id_val, top_k = 20):
    loaded_graph = tf.Graph()  #
    with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
        loader = tf.train.import_meta_graph(load_dir + '.meta')
        loader.restore(sess, load_dir)
        norm_movie_matrics = tf.sqrt(tf.reduce_sum(tf.square(movie_matrics), 1, keep_dims=True))
                            #movie_matrics顯示為 (3883, 200)
                            #array([[-0.9784413 ,  0.97033578, -0.99996817, ..., -0.94367135,0.938721  ,  0.94092846],...])

                            #tf.reduce_sum,注意引數表示在維度1(列)上進行求和,且維度不變 x^2+y^2
                            #(3883, 1)
        normalized_movie_matrics = movie_matrics / norm_movie_matrics #Python中的 // 與 / 的區別 
                                                                      # / 表示浮點數除法,返回浮點結果
                                                                      # //表示整數除法,返回不大於結果的一個最大的整數 
                                                                      #單位化後的i=(  x/(x^2+y^2),y/(x^2+y^2)  )

        probs_embeddings = (movie_matrics[movieid2idx[movie_id_val]]).reshape([1, 200]) #使用者輸入已看過的電影,進行movieid2idx數字轉化
        probs_similarity = tf.matmul(probs_embeddings, tf.transpose(normalized_movie_matrics))#矩陣乘法(x1,x2)和(y1,y2)可以得到x1y1+x2y2
        sim = (probs_similarity.eval())#轉化為字串
                                       #sim [[ 13.49374485  13.48943233  13.51107979 ...,  13.50281906  13.49236774  13.49707603]]

        print("您看的電影是:{}".format(movies_orig[movieid2idx[movie_id_val]])) #movies_orig原始未處理的電影資料,為輸出使用者可讀
        p = np.squeeze(sim) #np.squeeze將表示向量的陣列轉換為秩為1的陣列
                            #p [ 13.49374485  13.48943233  13.51107979 ...,  13.50281906  13.49236774  13.49707603]

        p[np.argsort(p)[:-top_k]] = 0 #numpy.argsort()
                                      #          按照所得的排好序的對應找其在原x中的索引值,如-1由x[3]得到;1由x[0]得到,所以索引值為[3,0,2,1,4,5]
                                      #          所以這個即為輸出
        p = p / np.sum(p)#sum函式對某一維度求和,這裡表示全部元素求和,這裡將p的值限制在0~1
        results = set()
        while len(results) != 5: #推薦5個
            c = np.random.choice(3883, 1, p=p)[0] #引數意思分別 是從a 中以概率P,隨機選擇3個,
                                                  #a1 = np.random.choice(a=5, size=3, replace=False, p=None)
            results.add(c) #results本身為set(可以完成剔除掉相同的推薦,雖然前面np.random.choice是不放回)
        for val in (results):
            print(val) #由於前面已經轉換為字串eval,所以可以直接輸出
        return results
#recommend_same_type_movie(1401, 20)輸出
#您看的電影是:[1401 'Ghosts of Mississippi (1996)' 'Drama']
# 以下是給您的推薦:
# 3385
# [3454 'Whatever It Takes (2000)' 'Comedy|Romance']
# 707
# [716 'Switchblade Sisters (1975)' 'Crime']
# 2351
# [2420 'Karate Kid, The (1984)' 'Drama']
# 2189
# [2258 'Master Ninja I (1984)' 'Action']
# 2191
# [2260 'Wisdom (1986)' 'Action|Crime']

def recommend_your_favorite_movie(user_id_val, top_k = 10):

    loaded_graph = tf.Graph()  #
    with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
        loader = tf.train.import_meta_graph(load_dir + '.meta')
        loader.restore(sess, load_dir)

        probs_embeddings = (users_matrics[user_id_val-1]).reshape([1, 200])   #!!!這裡變成使用者特徵,且前面沒有餘弦相似性的計算

        probs_similarity = tf.matmul(probs_embeddings, tf.transpose(movie_matrics)) #這裡計算後的結果就是預測分數,相當於模型中計算inference 
        sim = (probs_similarity.eval())
        p = np.squeeze(sim)
        p[np.argsort(p)[:-top_k]] = 0
        p = p / np.sum(p)
        results = set()
        while len(results) != 5:
            c = np.random.choice(3883, 1, p=p)[0]
        for val in (results):

        return results
# recommend_your_favorite_movie(234, 10)
# 以下是給您的推薦:
# 1642
# [1688 'Anastasia (1997)' "Animation|Children's|Musical"]
# 994
# [1007 'Apple Dumpling Gang, The (1975)' "Children's|Comedy|Western"]
# 667
# [673 'Space Jam (1996)' "Adventure|Animation|Children's|Comedy|Fantasy"]
# 1812
# [1881 'Quest for Camelot (1998)' "Adventure|Animation|Children's|Fantasy"]
# 1898
# [1967 'Labyrinth (1986)' "Adventure|Children's|Fantasy"]
def recommend_other_favorite_movie(movie_id_val, top_k = 20):
    loaded_graph = tf.Graph()  #
    with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
        loader = tf.train.import_meta_graph(load_dir + '.meta')
        loader.restore(sess, load_dir)

        probs_movie_embeddings = (movie_matrics[movieid2idx[movie_id_val]]).reshape([1, 200]) #根據輸入的電影得到這個電影的特徵向量
        probs_user_favorite_similarity = tf.matmul(probs_movie_embeddings, tf.transpose(users_matrics))
        favorite_user_id = np.argsort(probs_user_favorite_similarity.eval())[0][-top_k:] #選出喜歡某個電影的top_k個人
    #     print(normalized_users_matrics.eval().shape)
    #     print(probs_user_favorite_similarity.eval()[0][favorite_user_id])
    #     print(favorite_user_id.shape)
        probs_users_embeddings = (users_matrics[favorite_user_id-1]).reshape([-1, 200])#計算這幾個人的特徵
        probs_similarity = tf.matmul(probs_users_embeddings, tf.transpose(movie_matrics)) #計算這幾個人對所有電影的評分
        sim = (probs_similarity.eval())
    #     results = (-sim[0]).argsort()[0:top_k]
    #     print(results)
    #     print(sim.shape)
    #     print(np.argmax(sim, 1))
        p = np.argmax(sim, 1)

        results = set()
        while len(results) != 5:
            c = p[random.randrange(top_k)]
        for val in (results):
        return results
#recommend_other_favorite_movie(1401, 20)
# 您看的電影是:[1401 'Ghosts of Mississippi (1996)' 'Drama']
# 喜歡看這個電影的人是:[[5782 'F' 35 0]
 # [5767 'M' 25 2]
 # [3936 'F' 35 12]
 # [3595 'M' 25 0]
 # [1696 'M' 35 7]
 # [2728 'M' 35 12]
 # [763 'M' 18 10]
 # [4404 'M' 25 1]
 # [3901 'M' 18 14]
 # [371 'M' 18 4]
 # [1855 'M' 18 4]
 # [2338 'M' 45 17]
 # [450 'M' 45 1]
 # [1130 'M' 18 7]
 # [3035 'F' 25 7]
 # [100 'M' 35 17]
 # [567 'M' 35 20]
 # [5861 'F' 50 1]
 # [4800 'M' 18 4]
 # [3281 'M' 25 17]]
# 喜歡看這個電影的人還喜歡看:
# 1779
# [1848 'Borrowers, The (1997)' "Adventure|Children's|Comedy|Fantasy"]
# 1244
# [1264 'Diva (1981)' 'Action|Drama|Mystery|Romance|Thriller']
# 1812
# [1881 'Quest for Camelot (1998)' "Adventure|Animation|Children's|Fantasy"]
# 1742
# [1805 'Wild Things (1998)' 'Crime|Drama|Mystery|Thriller']
# 2535
# [2604 'Let it Come Down: The Life of Paul Bowles (1998)' 'Documentary']
