SQL中的多表查詢
1、笛卡爾積:舉例
2、根據連線條件的不同:
(*)等值連線
(*)不等值連線
(*)外連線
(*)自連線
二、多表查詢:等值連線
查詢員工資訊:部門名稱 員工姓名
select d.dname,e.ename
from emp e,dept d
where e.deptno=d.deptno;
三、多表查詢:自連線:就是通過表的別名,將同一張表視為多張表
查詢員工資訊:老闆姓名 員工姓名
條件:員工的老闆號 === 老闆的員工號
select b.ename,e.ename
from emp e,emp b
where e.mgr=b.empno;
===========================================================
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class SelfJoinMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
protected void map(LongWritable key1, Text value1, Context context)
throws IOException, InterruptedException {
// 資料:7698,BLAKE,MANAGER,7839,1981/5/1,2850,,30
String data = value1.toString();
//分詞
String[] words = data.split(",");
//輸出
//1、作為老闆表
context.write(new LongWritable(Long.parseLong(words[0])), new Text("*"+words[1]));
//2、作為員工表
try{
context.write(new LongWritable(Long.parseLong(words[3])), new Text(words[1]));
}catch(Exception ex){
//如果產生例外,表示:大老闆
context.write(new LongWritable(-1), new Text(words[1]));
}
}
}
-----------------------------------------------------------------------------------------------------------
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class SelfJoinReducer extends Reducer<LongWritable, Text, Text, Text> {
@Override
protected void reduce(LongWritable k3, Iterable<Text> v3, Context context)
throws IOException, InterruptedException {
// 定義變數:老闆姓名 員工姓名
String bossName = "";
String empNameList = "";
for(Text v:v3){
String str = v.toString();
//判斷是否存在*號
int index = str.indexOf("*");
if(index >=0){
//代表老闆姓名
bossName = str.substring(1);
}else{
//代表員工姓名
empNameList = str + ";" + empNameList;
}
}
//輸出
//判斷:如果存在老闆姓名和員工姓名,才輸出
if(bossName.length() > 0 && empNameList.length() > 0)
context.write(new Text(bossName), new Text("("+empNameList+")"));
}
}
---------------------------------------------------------------------------------------
package demp.selfjoin;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SelfJoinMain {
public static void main(String[] args) throws Exception {
//建立一個job = mapper + reducer
Job job = Job.getInstance(new Configuration());
//指定job的入口
job.setJarByClass(SelfJoinMain.class);
//指定任務的mapper和輸出資料型別
job.setMapperClass(SelfJoinMapper.class);
job.setMapOutputKeyClass(LongWritable.class); //指定k2的型別
job.setMapOutputValueClass(Text.class);//指定v2的資料型別
//指定任務的reducer和輸出資料型別
job.setReducerClass(SelfJoinReducer.class);
job.setOutputKeyClass(Text.class);//指定k4的型別
job.setOutputValueClass(Text.class);//指定v4的型別
//指定輸入的路徑和輸出的路徑
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//執行任務
job.waitForCompletion(true);
}
}
===============================================================
package demp.mutiltable;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MutilTableQueryMapper extends Mapper<LongWritable, Text, LongWritable,Text> {
@Override
protected void map(LongWritable key1, Text value1, Context context)
throws IOException, InterruptedException {
String data = value1.toString();
//分詞
String[] words = data.split(",");
//判斷陣列的長度
if(words.length == 3){
//部門表:部門號 部門名稱
context.write(new LongWritable(Long.parseLong(words[0])), new Text("*"+words[1]));
}else{
//員工表: 員工的部門號 員工姓名
context.write(new LongWritable(Long.parseLong(words[7])), new Text(words[1]));
}
}
}
-------------------------------------------------------------------------------------------------------
ackage demp.mutiltable;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MutilTableQueryReducer extends Reducer<LongWritable,Text, Text, Text> {
@Override
protected void reduce(LongWritable k3, Iterable<Text> v3, Context context)
throws IOException, InterruptedException {
//定義變數儲存部門名稱、員工姓名
String dname = "";
String empNameList = "";
for(Text t:v3){
String str = t.toString();
//找到*號的位置
int index = str.indexOf("*");
if(index >=0){
//代表部門名稱
dname = str.substring(1);
}else{
//代表員工姓名
empNameList = str + ";"+empNameList;
}
}
//輸出
context.write(new Text(dname), new Text(empNameList));
}
}
---------------------------------------------------------------------------------------------
package demp.mutiltable;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MutilTableQueryMain {
public static void main(String[] args) throws Exception {
//建立一個job = mapper + reducer
Job job = Job.getInstance(new Configuration());
//指定job的入口
job.setJarByClass(MutilTableQueryMain.class);
//指定任務的mapper和輸出資料型別
job.setMapperClass(MutilTableQueryMapper.class);
job.setMapOutputKeyClass(LongWritable.class); //指定k2的型別
job.setMapOutputValueClass(Text.class);//指定v2的資料型別
//指定任務的reducer和輸出資料型別
job.setReducerClass(MutilTableQueryReducer.class);
job.setOutputKeyClass(Text.class);//指定k4的型別
job.setOutputValueClass(Text.class);//指定v4的型別
//指定輸入的路徑和輸出的路徑
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//執行任務
job.waitForCompletion(true);
}
}