1.Mapreduce操作不需要reduce階段

 1 import org.apache.hadoop.conf.Configuration;
2 import org.apache.hadoop.fs.FileSystem;
3 import org.apache.hadoop.fs.Path;
4 import org.apache.hadoop.io.LongWritable;
5 import org.apache.hadoop.io.NullWritable;
6 import org.apache.hadoop.io.Text;
7 import org.apache.hadoop.mapreduce.Job;
8 import org.apache.hadoop.mapreduce.Mapper;
9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
11
12 import java.io.IOException;
13
14 public class WordCount03 {
15 public static class MyMapper extends Mapper<LongWritable, Text,Text, NullWritable>{
16 @Override
17 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
18 String line = value.toString();
19 String s = line.split(",")[3];
20 if(s.equals("男")){
21 context.write(new Text(s),NullWritable.get());
22 }
23 }
24 }
25 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
26 Job job= Job.getInstance();
27 job.setNumReduceTasks(0);
28 /**
29 * 有些情況下,不需要reduce(聚合程式),
30 * 在不需要聚合操作的時候,可以不需要reduce
31 * 而reduce預設為1,需要手動設定為0,
32 * 如果沒有設定為0,會產生預設的reduce,只不過reduce不處理任何資料
33 */
34 job.setJobName("mr03程式");
35 job.setJarByClass(WordCount03.class);
36 job.setMapOutputKeyClass(Text.class);
37 job.setMapOutputValueClass(NullWritable.class);
38 Path in = new Path("/word");
39 FileInputFormat.addInputPath(job,in);
40 Path out = new Path("/output");
41 FileSystem fs = FileSystem.get(new Configuration());
42 if(fs.exists(out)){
43 fs.delete(out);
44 }
45 FileOutputFormat.setOutputPath(job,out);
46 job.waitForCompletion(true);
47 }
48 }

注意:

有些情況下,不需要reduce(聚合程式),
在不需要聚合操作的時候,可以不需要reduce
而reduce預設為1,需要手動設定為0,
如果沒有設定為0,會產生預設的reduce,只不過reduce不處理任何資料


2.MapReduce中join操作(資料拼接)
  1 import org.apache.hadoop.conf.Configuration;
2 import org.apache.hadoop.fs.FileSystem;
3 import org.apache.hadoop.fs.Path;
4 import org.apache.hadoop.io.LongWritable;
5 import org.apache.hadoop.io.NullWritable;
6 import org.apache.hadoop.io.Text;
7 import org.apache.hadoop.mapreduce.InputSplit;
8 import org.apache.hadoop.mapreduce.Job;
9 import org.apache.hadoop.mapreduce.Mapper;
10 import org.apache.hadoop.mapreduce.Reducer;
11 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
12 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
13 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
14
15 import java.io.IOException;
16 import java.util.ArrayList;
17
18 public class WordCount04 {
19 public static class JoinMapper extends Mapper<LongWritable,Text,Text,Text>{
20 @Override
21 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
22 //1.獲取資料的路徑 InputSplit
23 //context 上面是hdfs 下面如果有reduce就是reduce 沒有就是hdfs
24 InputSplit inputSplit = context.getInputSplit();
25 FileSplit fs=(FileSplit)inputSplit;
26 String url = fs.getPath().toString();
27 //2.判斷
28 if(url.contains("students")){//true當前資料為students.txt
29 String id = value.toString().split(",")[0];
30 //為了方便reduce資料的操作 針對於不同的資料 打一個標籤
31 String line = "*" + value.toString();
32 context.write(new Text(id),new Text(line));
33 }else {//false 當前資料為score.txt
34 //以學號作為k 也是兩張資料的關聯條件
35 String id = value.toString().split(",")[0];
36 //為了方便reduce資料的操作 針對於不同的資料 打一個標籤
37 String line = "#" + value.toString();
38 context.write(new Text(id),new Text(line));
39 }
40 }
41 }
42 public static class JoinReduce extends Reducer<Text,Text,Text,NullWritable>{
43 @Override
44 protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
45 //資料在迴圈之外儲存
46 String stuInfo="";
47 ArrayList<String> scores = new ArrayList<String>();
48 //提取資料
49 for (Text value : values) {
50 //獲取一行一行的資料(所有資料包含students.txt和score.txt)
51 String line = value.toString();
52 if(line.startsWith("*")){//true 為學生資料
53 stuInfo= line.substring(1);
54 }else {//false 為學生成績資料
55 scores.add(line.substring(1));
56 }
57 }
58 /**
59 * 求的是 兩張表的拼接
60 */
61 //資料拼接
62 for (String score : scores) {
63 String subject = score.split(",")[1];
64 String s = score.split(",")[2];
65 String end=stuInfo+","+subject+","+s;
66 context.write(new Text(end),NullWritable.get());
67 }
68 /**
69 * 求的是 兩張表的拼接 拼接過程中對成績求和
70 */
71 // long sum=0l;
72 // for (String s : scores) {
73 // Integer sc =Integer.valueOf( s.split(",")[2]);
74 // sum+=sc;
75 // }
76 // String end=stuInfo+","+sum;
77 // context.write(new Text(end),NullWritable.get());
78 }
79 }
80 public static void main(String[] args) throws Exception {
81 Job job = Job.getInstance();
82 job.setJobName("Join MapReduce");
83 job.setJarByClass(WordCount04.class);
84
85 job.setMapperClass(JoinMapper.class);
86 job.setMapOutputKeyClass(Text.class);
87 job.setMapOutputValueClass(Text.class);
88
89 job.setReducerClass(JoinReduce.class);
90 job.setOutputKeyClass(Text.class);
91 job.setOutputValueClass(NullWritable.class);
92 //指定路徑
93 FileInputFormat.addInputPath(job,new Path("/word"));
94 Path path = new Path("/output");
95 FileSystem fs = FileSystem.get(new Configuration());
96 if(fs.exists(path)){
97 fs.delete(path);
98 }
99 FileOutputFormat.setOutputPath(job,new Path("/output"));
100 job.waitForCompletion(true);
101 System.out.println("join 正在執行");
102 }
103 }