1. 程式人生 > >spark中各種連線操作以及實用方法

spark中各種連線操作以及實用方法

val a = sc.parallelize(Array(("123",4.0),("456",9.0),("789",9.0))

val b = sc.parallelize(Array(("123",8.0),("789",10)))

val c = a.join(b)

c.foreach(println)

/*

(123,(4.0,8.0))

(789,(9.0,10))

*/

val d = a.cogroup(b)

d.foreach(println)

/*

(456,(CompactBuffer(9.0),CompactBuffer()))

(123,(CompactBuffer(4.0),CompactBuffer(8.0)))

(789,(CompactBuffer(9.0),CompactBuffer(10)))

*/

val e = a.leftOuterJoin(b)

e.foreach(println)

/*

(456,(9.0,None))

(123,(4.0,Some(8.0)))

(789,(9.0,Some(10)))

*/

val f = a.fullOuterJoin(b)

f.foreach(println)

/*

(456,(Some(9.0),None))

(123,(Some(4.0),Some(8.0)))

(789,(Some(9.0),Some(10)))

*/

val g = a.cartesian(b)

g.foreach(println)

/*

((123,4.0),(123,8.0))

((123,4.0),(789,10))

((456,9.0),(123,8.0))

((456,9.0),(789,10))

((789,9.0),(123,8.0))

((789,9.0),(789,10))

*/

/*val h = a.coalesce(6,true)

h.foreach(println)

a.dependencies.foreach(println)*/

val i = a.keyBy{case (k,v)=>("haha",234)}

i.foreach(println)

/*

((haha,234),(123,4.0))

((haha,234),(456,9.0))

((haha,234),(789,9.0))

*/