1. 程式人生 > >mapreduce典型應用案例之倒排索引

mapreduce典型應用案例之倒排索引

一、倒排索引的介紹

通俗的講,就是根據單詞找到包含這個單詞的所有文件。

二、mapreduce實現框架

1、首先要確定map、reduce、combiner中的key和value是什麼型別
2、然後確定key和value具體是什麼?
Map : key為 單詞+檔名 value為空
combiner : key為單詞 value為次數+檔名
reduce: key為單詞 value為相同單詞的“次數+檔名”拼接而成
原理圖

三、mapreduce程式碼實現

1、準備資料

a.txt i love beijing and love china
b.txt i love beijing and not like New York
c.txt i dot like anycity
d.txt you like where
e.txt love familiy and love china

2、具體程式碼實現

package com.qyl.master;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.
mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class MyMapReduce { public static class MyMapper extends Mapper<LongWritable,Text,Text,Text>{ private Text okey=new Text(); private Text ovalue=new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String filename = ((FileSplit) context.getInputSplit()).getPath().getName(); String[] strs = value.toString().split(" "); for(String s:strs){ okey.set(s+"-"+filename); context.write(okey,ovalue); } } } public static class MyCombiner extends Reducer<Text,Text,Text,Text>{ private Text okey=new Text(); private Text ovalue=new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { int count=0; for(Text text:values){ count++; } String strs[]=key.toString().split("-"); okey.set(strs[0]); ovalue.set(strs[1]+"="+count); context.write(okey,ovalue); } } public static class MyReduce extends Reducer<Text,Text,Text,Text>{ private Text ovalue=new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuilder sb=new StringBuilder(); for(Text text:values){ sb.append(text.toString()).append(","); } sb.delete(sb.length()-1,sb.length()); ovalue.set(sb.toString()); context.write(key,ovalue); } } public static void main(String[] args) { Configuration conf=new Configuration(); try { Job job=Job.getInstance(conf); job.setJarByClass(MyMapReduce.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReduce.class); job.setCombinerClass(MyCombiner.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Path inPath =new Path("C:\\data"); FileInputFormat.addInputPath(job, inPath); Path outpath=new Path("C:\\data\\result"); if(outpath.getFileSystem(conf).exists(outpath)){ outpath.getFileSystem(conf).delete(outpath, true); } FileOutputFormat.setOutputPath(job, outpath); job.waitForCompletion(true); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }

3、結果

New	b.txt=1
York	b.txt=1
and	b.txt=1,e.txt=1,a.txt=1
anycity	c.txt=1
beijing	b.txt=1,a.txt=1
china	a.txt=1,e.txt=1
dot	c.txt=1
familiy	e.txt=1
i	c.txt=1,a.txt=1,b.txt=1
like	b.txt=1,c.txt=1,d.txt=1
love	e.txt=2,b.txt=1,a.txt=2
not	b.txt=1
where	d.txt=1
you	d.txt=1