參考:http://eric-gcm.iteye.com/blog/1807468

math.txt:

張三    88
李四 99
王五 66
趙六 77

china.txt:

張三    78
李四 89
王五 96
趙六 67

english.txt:

張三    80
李四 82
王五 84
趙六 86

JAVA程式碼:

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser; public class Score { public static class Map extends
Mapper<LongWritable, Text, Text, IntWritable> { // 實現map函式
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException { // 將輸入的純文字檔案的資料轉化成String
String line = value.toString(); // 將輸入的資料首先按行進行分割
StringTokenizer tokenizerArticle = new StringTokenizer(line, "\n"); // 分別對每一行進行處理
while (tokenizerArticle.hasMoreElements()) { // 每行按空格劃分
StringTokenizer tokenizerLine = new StringTokenizer(
tokenizerArticle.nextToken()); String strName = tokenizerLine.nextToken();// 學生姓名部分
String strScore = tokenizerLine.nextToken();// 成績部分
Text name = new Text(strName);
int scoreInt = Integer.parseInt(strScore); // 輸出姓名和成績
context.write(name, new IntWritable(scoreInt));
}
}
} public static class Reduce extends
Reducer<Text, IntWritable, Text, IntWritable> { // 實現reduce函式
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0;
int count = 0;
Iterator<IntWritable> iterator = values.iterator(); while (iterator.hasNext()) { sum += iterator.next().get();// 計算總分
count++;// 統計總的科目數
}
int average = (int) sum / count;// 計算平均成績
context.write(key, new IntWritable(average));
}
} public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // 這句話很關鍵
conf.set("mapred.job.tracker", "172.16.11.74:9001"); String[] ioArgs = new String[] { "score_in", "score_out" };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Score Average <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Score Average");
job.setJarByClass(Score.class); // 設定Map、Combine和Reduce處理類
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class); // 設定輸出型別
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class); // 將輸入的資料集分割成小資料塊splites,提供一個RecordReder的實現
job.setInputFormatClass(TextInputFormat.class); // 提供一個RecordWriter的實現,負責資料輸出
job.setOutputFormatClass(TextOutputFormat.class); // 設定輸入和輸出目錄
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Score

執行結果:

張三    82
李四 90
王五 82
趙六 76

具體打包執行步驟:

參考博文:http://www.cnblogs.com/-wangjiannan/p/3590324.html