1. 程式人生 > >hadoop入門6:hadoop查詢兩兩之間有共同好友,及他倆的共同好友都是誰

hadoop入門6:hadoop查詢兩兩之間有共同好友,及他倆的共同好友都是誰

A:B,C,D,F,E,O B:A,C,E,K C:F,A,D,I D:A,E,F,L E:B,C,D,M,L F:A,B,C,D,E,O,M G:A,C,D,E,F H:A,C,D,E,O I:A,O J:B,O K:A,C,D L:D,E,F M:E,F,G O:A,H,I,J

該資料可以看作好友,例如:A有B,C,D,F,E,O好友;B有A,C,E,K好友,以此類推;

求兩兩之間有共同好友,及他倆的共同好友都是誰,例如:A和B之間共同好友是:C、E

編碼思路:

       第一步是可以把好友當作key,value是擁有key好友的使用者,例如:擁有好友B的是:A,F,J,E使用者

       第二步在第一步結果後,雙重for迴圈進行兩兩之間進行拼接,這樣就可以得出正確結果

具體程式碼實現:

第一步:

package com.zsy.mr.commonfriend;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class commonFriendStepOne {
	static class commonFriendStepOneMapper extends Mapper<LongWritable, Text, Text, Text>{
		Text k = new Text();
		Text v = new Text();
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//通過過冒號分割
			String[] splits = value.toString().split(":");
			//獲取擁有好友的使用者名稱
			String name = splits[0];
			//獲取該使用者下的好友列表
			String[] friends = StringUtils.isNotBlank(splits[1])?  splits[1].split(","):null;
			if(friends != null) {
				//迴圈好友,好友當作key,擁有好友的使用者名稱當作value
				for (String friend : friends) {
					k.set(friend);
					v.set(name);
					context.write(k, v);
				}
			}
		}
	}
	
	static class commonFriendStepOneReducer extends Reducer<Text, Text, Text, Text>{
		Text v = new Text();
		@Override
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			List<String> resultList = new ArrayList<String>();//實際生產程式碼不建議用list接收,應該是直接處理掉
			//處理資料,該資料是擁有key好友的所有使用者
			for (Text value : values) {
				resultList.add(value.toString());
			}
			v.set(StringUtils.join(resultList, ","));
			context.write(key, v);
		}
	}
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		/*conf.set("mapreduce.framework.name", "yarn");
        conf.set("yarn.resoucemanger.hostname", "hadoop01");*/
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(commonFriendStepOne.class);
		
		//指定本業務job要使用的業務類
		job.setMapperClass(commonFriendStepOneMapper.class);
		job.setReducerClass(commonFriendStepOneReducer.class);
		
		//指定mapper輸出的k v型別  如果map的輸出和reduce的輸出一樣,只需要設定輸出即可
		//job.setMapOutputKeyClass(Text.class);
		//job.setMapOutputValueClass(IntWritable.class);
		
		//指定最終輸出kv型別(reduce輸出型別)
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		//指定job的輸入檔案所在目錄
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		//指定job的輸出結果目錄
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		//將job中配置的相關引數,以及job所有的java類所在 的jar包,提交給yarn去執行
		//job.submit();無結果返回,建議不使用它
		boolean res = job.waitForCompletion(true);
		
		System.exit(res?0:1);
	}
}

結果:

第二步:

程式碼實現

package com.zsy.mr.commonfriend;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class commonFriendStepTwo {

	static class commonFriendStepTwoMapper extends Mapper<LongWritable, Text, Text, Text>{
		Text k = new Text();
		Text v = new Text();
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String[] splits = value.toString().split("\t");
			//獲取好友
			String friend = splits[0];
			//獲取擁有該好友所有的使用者資訊
			String[] names = splits[1].split(",");
			//進行排序,防止計算資料重複,例如:A-B和B-A其實一個對
			Arrays.sort(names);
			//進行雙重for迴圈
			for (int i = 0; i < names.length-1; i++) {
				String string = names[i];
				for (int j = i+1; j < names.length; j++) {
					String string2 = names[j];
					k.set(string+"-"+string2);
					v.set(friend);
					context.write(k, v);
				}
			}
		}
	}
	
	static class commonFriendStepTwoReducer extends Reducer<Text, Text, Text, NullWritable>{
		Text k = new Text();
		@Override
		protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, NullWritable>.Context context)
				throws IOException, InterruptedException {
			List<String> resultList = new ArrayList<String>();//實際生產程式碼不建議用list接收,應該是直接處理掉
			for (Text text : value) {
				resultList.add(text.toString());
			}
			k.set(key.toString()+":"+ StringUtils.join(resultList,","));
			context.write(k, NullWritable.get());
		}
	}
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		/*conf.set("mapreduce.framework.name", "yarn");
        conf.set("yarn.resoucemanger.hostname", "hadoop01");*/
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(commonFriendStepTwo.class);
		
		//指定本業務job要使用的業務類
		job.setMapperClass(commonFriendStepTwoMapper.class);
		job.setReducerClass(commonFriendStepTwoReducer.class);
		
		//指定mapper輸出的k v型別  如果map的輸出和reduce的輸出一樣,只需要設定輸出即可
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		//指定最終輸出kv型別(reduce輸出型別)
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(NullWritable.class);
		
		//指定job的輸入檔案所在目錄
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		//指定job的輸出結果目錄
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		//將job中配置的相關引數,以及job所有的java類所在 的jar包,提交給yarn去執行
		//job.submit();無結果返回,建議不使用它
		boolean res = job.waitForCompletion(true);
		
		System.exit(res?0:1);
	}
}

結果:

這樣就可以找到正確結果