1. 程式人生 > >基於社交網路的使用者與基於物品的協同過濾推薦演算法-java

基於社交網路的使用者與基於物品的協同過濾推薦演算法-java

完整工程+資料來源:https://github.com/scnuxiaotao/recom_sys
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;

public class itemcf {
	/*
	 * 
	 *     主函式~
	 * 
	 */
	/*public static void main(String[] args) throws IOException { 
		
		_Run();
		
	}*/
	
	/*
	 * 
	 *     基於物品的實現~
	 * 
	 */
	
	
	
	
	
	
	static int usersum = 20836;     //使用者數
	static int itemsum = 200;	//物品總數
	static int N = 3;           //推薦個數
	static int[][] train; //訓練集合user item rate矩陣
	static int[][] test;//測試集合user item rate矩陣
	static double[][] trainuseritem; //訓練集合user item 興趣程度 矩陣
	static int[][] recommend;  //為每個使用者推薦N個物品
	static simi [][]simiItem; //排序後的相似性矩陣
	static double [][]itemsim; //未排序的相似性矩陣
	static String road = "data/6 總評論情感分析結果/酒店-評論(已轉化).txt";  //資料路徑,格式為使用者編號::物品編號::評分
	static String road2 = "data/10 推薦/(已轉化)天河酒店ID.txt";
	static String road3 = "data/10 推薦/物品推薦.txt";
	public static String road4 = "data/10 推薦/(已轉化)使用者ID.txt";
	public static class simi
	{
		double value; //相似值
		int num;	 //相似物品號
	};
	
	public static void _Run() throws IOException {
		
		get_user_hotel_num();
		System.out.println("usersum: "+usersum);
		System.out.println("itemsum: "+itemsum);
		train = new int[itemsum][usersum]; train[0][0] = 0; //訓練集合user item rate矩陣
		test = new int[itemsum][usersum]; test[0][0] = 0;  //測試集合user item rate矩陣
		trainuseritem = 
				new double[usersum][itemsum]; trainuseritem[0][0] = 0.0; //訓練集合user item 興趣程度 矩陣
		recommend = new int[usersum][N]; recommend[0][0] = 0;  //為每個使用者推薦N個物品
		simiItem = new simi[itemsum][itemsum]; //排序後的相似性矩陣
		
		itemsim = new double[itemsum][itemsum]; //未排序的相似性矩陣

		int i,j,k = 8;        //去使用者的k個最近鄰居(相似度最高)來計算推薦物品
		
		for(i = 0 ;i < itemsum;++i)
			for(j = 0 ;j < itemsum;++j) simiItem[i][j] = new simi();
		
		System.out.println("1.訓練集");
		SplitData(8,1); 
	    //輸出初始化的矩陣
		/*for (i=0;i<10;i++)
	 	{
	 		System.out.println("Item"+i+":  ");
	 		for (j=0;j<5;j++)
	 		{
	 			System.out.print(train[i][j]+"  ");
	 		}
	 		System.out.println();
	 	}*/
		
		
		System.out.println("2.計算物品之間相似性,得到相似性矩陣");
		for (i=0;i<itemsum;i++) 
		{
			for (j=0;j<itemsum;j++)
			{
				itemsim[i][j] = Simility(train[i],train[j]);
				if(i == j) itemsim[i][j] = 0;   //此處有bug,已修改
			}
		}
		//輸出物品相似性矩陣
		/*for (i=0;i<5;i++)
	 	{
	 		System.out.println("Item"+":  ");
	 		for (j=0;j<100;j++)
	 		{
	 			System.out.print(itemsim[i][j]+"  ");
	 		}
	 		System.out.println();
	 	}*/
		
		System.out.println("3.物品相似度由高到低排序");
		sort();
		//輸出排序後的物品相似性矩陣
		
		/*for(i=0;i<5;i++)
		{
			System.out.println("Item"+i+":  ");
			for(j=0;j<10;j++)
			{
				System.out.print(simiItem[i][j].num+","+simiItem[i][j].value+" ");
			}
			System.out.println();
		}*/
	    
		
		
		
		System.out.println("4.得到使用者對物品興趣程度的矩陣");
		for(i=0;i<usersum;i++)
		{
			for(j=0;j<itemsum;j++)
			{
				if(train[j][i]==0)            //如果使用者i對物品j沒有過行為,才計算i對j的預測興趣程度
					//trainuseritem[i][j]=
			    	getUserLikeItem(i,j,k);
				
			}
		}
		//輸出使用者對物品興趣的矩陣
		/*for (i=0;i<5;i++)
	 	{
	 		System.out.println("User_ins"+i+":  ");
	 		for (j=0;j<10;j++)
	 		{
	 			System.out.print(trainuseritem[i][j]+"  ");
	 		}
	 		System.out.println();
	 	}*/
		System.out.println("5.通過物品興趣程度,推薦前N個");
		getRecommend();
		//輸出推薦矩陣
		for (i=0;i<200;i++)
	 	{
	 		System.out.println("user"+(i+1));
	 		for (j=0;j<N;j++)
	 		{
	 			if(recommend[i][j] != 0)
	 				System.out.print(recommend[i][j]+" ");
	 		}
	 		System.out.println();
	 	}
		
		System.out.println("6.輸出到txt");
		out_txt(road2,road3);
		
		
	}
    public static void get_user_hotel_num() throws IOException { 
    	FileReader data_about = new FileReader(road2);
 		BufferedReader read_data_about=new BufferedReader(data_about);
 		int num = 0;
		while(read_data_about.readLine() != null) num++;
 		itemsum = num;
		data_about.close();
		read_data_about.close();
		
		FileReader data_about2 = new FileReader(road4);
		BufferedReader read_data_about2=new BufferedReader(data_about2);
 		num = 0;
		while(read_data_about2.readLine() != null) num++;
 		usersum = num;
		data_about2.close();
		read_data_about2.close();

    }
	
	public static void out_txt(String r1,String r2) throws IOException {
		FileReader data_about=new FileReader(r1);
		BufferedReader read_data_about=new BufferedReader(data_about);
		FileWriter fw=new FileWriter(r2);
		
		String id; //暫存檔案一行記錄
		int id_num = 1;
		String []tmps = new String[5];
		String []hotel = new String[201];
		while((id=read_data_about.readLine())!=null){ 
			tmps = id.split("::");
			String hotelname = tmps[0];
			String number = tmps[1];
			hotel[Integer.parseInt(number)] = hotelname;
		}
		int i,j;
		for (i=0;i<usersum;i++)
	 	{
			fw.write("user"+(i+1));
	 		for (j=0;j<N;j++)
	 		{
	 			if(recommend[i][j] != 0)
	 				fw.write("::"+hotel[recommend[i][j]]);
	 		}
	 		fw.write("\r\n");
	 	}
		data_about.close();
		read_data_about.close();
		fw.close();
	}
	
	//拆分資料集為測試集test和訓練集trainuser,其中1/m為測試集,取不同的k<=m-1值 在相同的隨即種子下可得到不同的測/訓集合
	public static int SplitData(int m, int k)
	{	   
		
		int usernum = 0;
		int itemnum = 0;
                
		try {
			FileReader data_about=new FileReader(road);
			BufferedReader read_data_about=new BufferedReader(data_about);
			String s2; //暫存檔案一行記錄
			try {
				while((s2=read_data_about.readLine())!=null){
					//尋找資料集每條記錄對應的使用者號和物品號
					int sum = 0,ok = 0;
					for(int m_ = 0;m_ < s2.length();++m_) {
						if(s2.charAt(m_) != ':')
							sum = sum * 10 + s2.charAt(m_) - 48;
						else {
							m_ += 1;
							if(ok == 0) {usernum = sum;ok = 1;}
							else {
								itemnum = sum;
								break;
							}
							sum = 0;
						}
					}
					
					if (usernum <= usersum && itemnum <= itemsum)
					{
						//if(System.currentTimeMillis()%(m-1)==k) //設定當前時間為隨機種子  //判斷隨機產生0-7之間的隨機數是否等於k
						//   test[itemnum-1][usernum-1] = 1;        //rate為評分,再此實驗中只需統計有無評分的,無需討論具體評分
					    //else
						   train[itemnum-1][usernum-1] = 1;  //使用者號的物品號均從0開始算起,
					}
				}
			} catch (IOException e1) {
				// TODO Auto-generated catch block
				e1.printStackTrace();
			}
	 		try {
				data_about.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
	 		try {
				read_data_about.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		return 1;
	}

	//利用訓練集計算使用者之間相似度
	/* 計算向量ItemA和ItemB的相似性,返回值為ItemA和ItemB的相似度 */
	public static double Simility(int[] ItemA, int[] ItemB)
	{
		int comUser = 0;                   //ItemA與ItemB的都被使用者評論的使用者個數
		double simility = 0.0;
		int countIa = 0;
		int countIb = 0;

		int i;
		for (i=0;i<usersum;i++)      //此處有bug,已修改
		{
			if (ItemA[i]>0&&ItemB[i]>0)
			{
				comUser++;//查詢ItemA與ItemB的都被使用者評論的使用者個數
			}
			if (ItemA[i]>0){
				countIa++;//評論ItemA的使用者數量
			}
			if (ItemB[i]>0){
				countIb++;//評論ItemB的使用者數量
			}
		}
		double tem = Math.sqrt(countIa*countIb);
		//double tem = 1;
		//System.out.println(tem);
		if(tem == 0)
		{
			return 0;
		}
		else
		{
	    	simility = comUser/tem;
		    return simility;
		}
		
	}


	/*物品相似性矩陣排序(根據相似性由高到低排序)*/
	public static void quickSort(int x, int start, int end) {   
	    if (start < end) {   
	    	double base = simiItem[x][start].value; // 選定的基準值(第一個數值作為基準值)   
	    	double temp; // 記錄臨時中間值   
	    	int i_tmp;
	        int i = start, j = end;   
	        do {   
	            while ((simiItem[x][i].value > base) && (i < end))   
	                i++;   
	            while ((simiItem[x][j].value < base) && (j > start))   
	                j--;   
	            if (i <= j) {    
	                temp = simiItem[x][i].value;   
	                simiItem[x][i].value = simiItem[x][j].value;   
	                simiItem[x][j].value = temp;  
	                i_tmp = simiItem[x][i].num;   
	                simiItem[x][i].num = simiItem[x][j].num;   
	                simiItem[x][j].num = i_tmp;   
	                i++;   
	                j--;   
	            }   
	        } while (i <= j);   
	        if (start < j)   
	            quickSort(x, start, j);   
	        if (end > i)   
	            quickSort(x, i, end);   
	    }   
	}  
	public static int sort()
	{
		for (int i=0;i<itemsum;i++)
		{
			
			for(int j = 0; j < itemsum; ++j) {
				simiItem[i][j].num = j;
				simiItem[i][j].value = itemsim[i][j];
			}
			quickSort(i,0,itemsum-1);
		}
		return 1;

	}

	//得到使用者i對物品j預測興趣程度,用於推薦
	public static double getUserLikeItem(int i,int j,int k)
	{
		for(int x=0;x<k;x++)//從物品j最相似的k個物品中,找出使用者i有過行為的物品
		{
			//System.out.println(simiItem[j][x].num);
			if(train[simiItem[j][x].num][i]>0)//若這個使用者同樣對相似物品也有過行為
			{
				trainuseritem[i][j]+=simiItem[j][x].value;
			}
		}
		return trainuseritem[i][j];
	}
	
	/*通過物品興趣程度,推薦前N個*/ 
	public static int getRecommend() //有bug,已修改
	{
		int maxnum;//當前最感興趣物品號
		for(int i=0;i<usersum;i++)
		{

			int []finflag = new int[itemsum];

			for (int x=0;x<N;x++)//推薦N個
			{
				maxnum = 0;
				while(maxnum < itemsum && finflag[maxnum]!=0)
					maxnum++;
				for (int j=0;j<itemsum;j++)  //每迴圈一次就尋找此次感興趣最大的物品
				{
				
					if (trainuseritem[i][maxnum] < trainuseritem[i][j]&&finflag[j]==0)
						maxnum = j;
				}
				finflag[maxnum] = 1;
				if(trainuseritem[i][maxnum] != 0)
					recommend[i][x]=maxnum+1;//recommend陣列從1開始使用
			}
		}
		return 1;
	}

}
package WjPack;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

public class new_ojld_dis {
	/*
	 * 
	 *     主函式~
	 * 
	 */	

    /*public static void main(String[] args) throws IOException {
    	
    	run();
        
    }*/
	
	
	/*
	 * 
	 *     基於使用者的實現~
	 * 
	 */
	
	
	static String road_main = "data";
	static String road = road_main + "/6 總評論情感分析結果/酒店-評論(已轉化).txt";//資料路徑,格式為使用者編號::物品編號::評分
	static String road2 = road_main + "/10 推薦/(已轉化)使用者-關注.txt";  //資料路徑,格式為使用者編號::關注編號
	static String road3 = road_main + "/10 推薦/(已轉化)天河酒店ID.txt";
	static String road4 = road_main + "/10 推薦/使用者推薦.txt";  
	static String road5 = road_main + "/10 推薦/(已轉化)使用者ID.txt";
	
	static int usersum = 20836;     //使用者數
	static int itemsum = 200;	//物品總數
	
    static Map<String,HashMap<String,Integer>> score = new HashMap<String,HashMap<String,Integer>>();
    static Set<String> userSet = new HashSet<String>();
    static Set<String> filmSet = new HashSet<String>();
    
    static FileWriter txtw;
    
    static String tjhotel = "";
    
    static ArrayList<String> arr;
    static {

    	arr = new ArrayList<String>();
        try {
			score = get_score_from_road();
		} catch (IOException e) {
		}
    }
	
    public static void run() throws IOException {
    	txtw=new FileWriter(road4);
    	
    	get_hotelid(road3);
    	for(int m = 0;m < 20836;++m) {
    		tjhotel = "";
    		new_ojld_dis.outNearbyUserList(arr.get(m));
    		
    		if(tjhotel.length() > 1)
    			txtw.write("user"+arr.get(m)+tjhotel+"\r\n");

    	}
    	txtw.close();
    
    }
    public static void init() {
    	try {
			get_user_hotel_num();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}
    	System.out.println(usersum);
    	System.out.println(itemsum);
    	
    }
    public static void get_user_hotel_num() throws IOException { 
    	FileReader data_about = new FileReader(road3);
 		BufferedReader read_data_about=new BufferedReader(data_about);
 		int num = 0;
		while(read_data_about.readLine() != null) num++;
 		itemsum = num;
		data_about.close();
		read_data_about.close();
		
    	data_about = new FileReader(road5);
 		read_data_about=new BufferedReader(data_about);
 		num = 0;
		while(read_data_about.readLine() != null) num++;
 		usersum = num;
		data_about.close();
		read_data_about.close();

    }
	
	public static Map<String,HashMap<String,Integer>> get_score_from_road() throws IOException {
		init();
		String []tmps = new String[5];
		FileReader data_about=new FileReader(road);
		BufferedReader read_data_about=new BufferedReader(data_about);
		String s2; //暫存檔案一行記錄
		String usertmp = null;
		Map<String,HashMap<String,Integer>> score = new HashMap<String,HashMap<String,Integer>>();
        HashMap<String,Integer> tempScore = new HashMap<String,Integer>();
		while((s2=read_data_about.readLine())!=null){

			//尋找資料集每條記錄對應的使用者號和物品號
			tmps = s2.split("::");
			String username = tmps[0];
			String filmname = tmps[1];
			Integer socrename = Integer.valueOf(tmps[2]);
			
			if(usertmp == null) {usertmp = username;arr.add(usertmp);}
			else if(!usertmp.equals(username)) {
				score.put(usertmp, tempScore);
				usertmp = username;
				arr.add(usertmp);
				tempScore = new HashMap<String,Integer>();
			}
			
	        tempScore.put(filmname, socrename);	     
		}
		score.put(usertmp, tempScore);
		arr.add(usertmp);
		return score;
	}

		
			
    public static void outNearbyUserList(String user) throws IOException {
    	FileReader data_about=new FileReader(road2);
		BufferedReader read_data_about=new BufferedReader(data_about);
    	
        Map<String,Double> scores = new HashMap<String,Double>();
        
        String []tmps = new String[5];
        HashMap<String,Integer> thing = new HashMap<String,Integer>();
		
		String id; //暫存檔案一行記錄
		int num = 0;
		while((id=read_data_about.readLine())!=null){ 
			
			tmps = id.split("::");
			String username = tmps[0];
			String fansname = tmps[1];
				
			if(username.equals(user)) {
				thing.put(fansname, 1);
			}
				
		}
		
        for (int m = 0;m < arr.size()-1;++m) {
        	String tempUser = arr.get(m);
            if (tempUser.equals(user) || !thing.containsKey(tempUser)) {
                continue;
            }
            
            double score = getOSScore(user, tempUser);
            
            if(score >= 0)
            	scores.put(tempUser, score);
        }
        data_about.close();
		read_data_about.close();
        
    }
    
    private static Double getOSScore(String user1, String user2) throws NumberFormatException, IOException {
    	HashMap<String,Integer> user1Score = (HashMap<String,Integer>) score.get(user1);
    	HashMap<String,Integer> user2Score = (HashMap<String,Integer>) score.get(user2);
        double totalscore = 100;
        ArrayList<String> hobby = new ArrayList<String>();
        Iterator<String> it = user1Score.keySet().iterator();
        while (it.hasNext()) {
            String film = (String) it.next();
            int a1 = (Integer) user1Score.get(film);
            //System.out.println(film);
            if(user2Score.get(film) == null) continue; 
            int b1 = (Integer) user2Score.get(film);
            int a = a1 * a1 - b1 * b1;
            //System.out.println(Math.abs(a));
            totalscore = Math.sqrt(Math.abs(a));
        }
        if(totalscore == 0) {
        	int ok = 0;
        	it = user2Score.keySet().iterator();
        	if(it != null) {
                while (it.hasNext()) {
                    String film = (String) it.next();
                    if(user1Score.get(film) == null) {
                    	if(ok == 0) {
                    		ok = 1; 
                    	}
                    	tjhotel+="::"+hotel[Integer.parseInt(film)];
                    	
                    }
                    
                }
        		
                
        	}
        	
        	
        }
        return totalscore;
    }

    static String []hotel = new String[201];
    public static void get_hotelid(String r1) throws IOException {
		FileReader data_about=new FileReader(r1);
		BufferedReader read_data_about=new BufferedReader(data_about);
		
		String id; //暫存檔案一行記錄
		String []tmps = new String[5];
		
		while((id=read_data_about.readLine())!=null){ 
			tmps = id.split("::");
			String hotelname = tmps[0];
			String number = tmps[1];
			hotel[Integer.parseInt(number)] = hotelname;
		}
		data_about.close();
		read_data_about.close();
	}

}

課程設計寫的程式碼,可以用的,不過不寫註釋,但也不是很難看懂,先了解以下原理再看看程式碼就差不多了~

PS:因為抓到的使用者ID和酒店都是資料都是類似434132這麼長的編號,為了方便我用陣列存,事先我全部轉化為1開始的編號了。比如說有兩個4654654,32131321,那我就轉成1,2了,處理完推薦完再把1,2轉成4654654,32131321