MapReduce 之 ---自定義全域性計數器,將資訊輸出到控制檯
阿新 • • 發佈:2018-12-24
package jishuqi;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache .hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
* 統計不合法的資料條數 通過全域性計數器 全域性計數器都在控制檯輸出
* ------用於統計資料中特定的資訊
*/
public class MissFileds {
static class MyMapper extends Mapper<LongWritable, Text, NullWritable, NullWritable>{
/**
* LongWritable key,偏移量
* Text value,一行內容
Context context 上下文物件 傳輸 job執行過程中上文傳引數
*/
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, NullWritable, NullWritable>.Context context)
throws IOException, InterruptedException {
//取出你每一條資料
String[] split = value.toString().split (",");
//進行判斷 如果長度為3 證明 資料完美 否則資料殘缺
if(split.length<3){
//將殘缺的資料記錄在全域性計數器中
//取出全域性計數器
Counter counter = context.getCounter(MissCounter.Miss_Field_Lines);
/*
Increment this counter by the given value
* @param incr the value to increase this counter by
*/
//void increment(long incr); 引數代表需要增加的值 類似於 +=incr
counter.increment(1L);
}
}
}
//Driver
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf=new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
Job job=Job.getInstance(conf);
job.setJarByClass(MissFileds.class);
//只需一個mapper就可以了
job.setMapperClass(MyMapper.class);
//設定map的輸出
job.setOutputKeyClass(NullWritable.class); //沒有資料輸出
job.setOutputValueClass(NullWritable.class);
//如果不需要reducetask 這裡請設定為0 否則預設執行一個reducetask任務
job.setNumReduceTasks(0);
//可以指定多個輸入路徑
FileInputFormat.setInputPaths(job, new Path("/friendout_01"));
//輸出路徑需要嗎? 不設定 報錯Output directory not set.
FileOutputFormat.setOutputPath(job, new Path("/counter_out01"));
job.waitForCompletion(true);
}
}
自定義一個列舉類
package jishuqi;
/**
* 列舉類定義全域性變數
* @author Administrator
*/
public enum MissCounter {
Miss_Field_Lines
}