1. 程式人生 > >MapReduce 之 ---自定義全域性計數器,將資訊輸出到控制檯

MapReduce 之 ---自定義全域性計數器,將資訊輸出到控制檯

package jishuqi;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache
.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /* * 統計不合法的資料條數 通過全域性計數器 全域性計數器都在控制檯輸出 * ------用於統計資料中特定的資訊 */ public class MissFileds { static class MyMapper extends Mapper<LongWritable, Text, NullWritable, NullWritable>{ /** * LongWritable key,偏移量 * Text value,一行內容 Context context 上下文物件 傳輸 job執行過程中上文傳引數 */
@Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, NullWritable, NullWritable>.Context context) throws IOException, InterruptedException { //取出你每一條資料 String[] split = value.toString().split
(","); //進行判斷 如果長度為3 證明 資料完美 否則資料殘缺 if(split.length<3){ //將殘缺的資料記錄在全域性計數器中 //取出全域性計數器 Counter counter = context.getCounter(MissCounter.Miss_Field_Lines); /* Increment this counter by the given value * @param incr the value to increase this counter by */ //void increment(long incr); 引數代表需要增加的值 類似於 +=incr counter.increment(1L); } } } //Driver public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { System.setProperty("HADOOP_USER_NAME", "hadoop"); Configuration conf=new Configuration(); conf.set("fs.defaultFS", "hdfs://hadoop02:9000"); Job job=Job.getInstance(conf); job.setJarByClass(MissFileds.class); //只需一個mapper就可以了 job.setMapperClass(MyMapper.class); //設定map的輸出 job.setOutputKeyClass(NullWritable.class); //沒有資料輸出 job.setOutputValueClass(NullWritable.class); //如果不需要reducetask 這裡請設定為0 否則預設執行一個reducetask任務 job.setNumReduceTasks(0); //可以指定多個輸入路徑 FileInputFormat.setInputPaths(job, new Path("/friendout_01")); //輸出路徑需要嗎? 不設定 報錯Output directory not set. FileOutputFormat.setOutputPath(job, new Path("/counter_out01")); job.waitForCompletion(true); } }

自定義一個列舉類

package jishuqi;
/**
 * 列舉類定義全域性變數
 * @author Administrator
 */
public enum MissCounter {
    Miss_Field_Lines
}