Hadoop學習記錄(三、MapReduce)
阿新 • • 發佈:2018-12-06
1.將一個日誌檔案上傳到hdfs上
2. 編寫mapReduce程式碼
2.1新建一個maven專案,新增依賴
<dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>1.2.1</version> </dependency> </dependencies>
2.2編寫HotSearch類
import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /** * mapReduce功能演示:《我是歌手》熱搜榜 * * @author lrn * @createTime : 2018/11/30 19:03 */ public class HotSearch { public static class HotSearchMap extends Mapper<Object, Text, Text, IntWritable> { @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { // 文中的一行資料 String currentLine = value.toString(); // 如果當前行中出現歌手的名字,則對應歌手的統計數量+1 if (currentLine.contains("黃致列")) { context.write(new Text("黃致列"), new IntWritable(1)); } else if (currentLine.contains("李玟") || currentLine.contains("COCO")) { context.write(new Text("李玟"), new IntWritable(1)); } else if (currentLine.contains("張信哲")) { context.write(new Text("張信哲"), new IntWritable(1)); } else if (currentLine.contains("趙傳")) { context.write(new Text("趙傳"), new IntWritable(1)); } else if (currentLine.contains("老狼")) { context.write(new Text("老狼"), new IntWritable(1)); } } } public static class HotSearchReduce extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int count = 0; // 對map方法中輸出的統計資料進行彙總 for (IntWritable intWritable : values) { count += intWritable.get(); } // 輸出該reduce的彙總資料 context.write(key, new IntWritable(count)); } } public static void main(String[] args) throws Exception { // 取得一個任務物件 Job job = Job.getInstance(); job.setJarByClass(HotSearch.class); job.setMapperClass(HotSearchMap.class); job.setReducerClass(HotSearchReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // 設定任務的輸入檔案或路徑 FileInputFormat.addInputPath(job, new Path(args[0])); // 設定任務的輸出路徑 FileOutputFormat.setOutputPath(job, new Path(args[1])); // 啟動任務 job.waitForCompletion(true); } }
2.3打包
mvn clean,mvn install ,mvn package打成jar包
3.hdfs執行
3.1將jar包傳到Linux上
3.2啟動hdfs
在sbin目錄下執行
./start-dfs.sh
3.3啟動yarn
./start-yarn.sh
3.4執行mapReduce
./hadoop jar /tmp/mapReduce-1.0-SNAPSHOT.jar HotSearch /input/IAMSinger.txt /output2
命令解讀:./hadoop jar +jar包在Linux的路徑 +jar包main方法所在類(路徑)+hdfs上的待分析檔案路徑+hdfs分析結果路徑
File Output Format對應的Bytes若為0,則表示無輸出內容
3.5檢視分析結果
./hdfs dfs -cat /output2/part-r-00000