1. 程式人生 > >大資料MapReduce原理之WordCount程式

大資料MapReduce原理之WordCount程式

Map Reduce

WordCount

用IDEA建立一個maven工程wordcountmr(單詞計數程式) 在pom.xml中引入Hadoop依賴包

    <dependencies>
       <dependency>
           <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-client</artifactId>
           <version>2.9.1</version>
       </
dependency
>
</dependencies>

新建WordCountMR.Class

package com.cniao5;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.
Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class WordCountMR{ public static class WordCountMapper extends
Mapper<LongWritable, Text, Text, LongWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split("\t"); for(String word:words) { context.write(new Text(word), new LongWritable(1)); } } } public static class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable>{ @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for(LongWritable value:values){ count += value.get(); } context.write(key, new LongWritable(count)); } } public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException { String input = args[0]; String output = args[1]; Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJobName("wordcount"); job.setJarByClass(WordCountMR.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job,new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); System.exit(job.waitForCompletion(true)?0:1); } }

打包jar包,View-Tool Windows-Maven Projects- 雙擊package完成打包,在target資料夾下wordcountmr-1.0-SNAPSHOT.jar 將此jar包拷貝到Linux機器上執行 在hdfs上新建一個input資料夾,下面放一個要計數的檔案(自己寫若干的單詞,空格隔開) hadoop jar ./wordcountmr-1.0-SNAPSHOT.jar mr.WordCountMR /input /output 等待程式結束,輸出在output下