1. 程式人生 > >Spark SQL 筆記(7)—— DataFrame API操作案例

Spark SQL 筆記(7)—— DataFrame API操作案例

1 測試資料

stu.txt

1|Anaa|111111|[email protected]
2|Bob|22222|[email protected]
3|Candy|333333|[email protected]
4|Dany|44444|[email protected]
5|Elf|55555|[email protected]
6|Frank|6666|[email protected]
7|George|777777|[email protected]@.com
8|Hlnk|888888|[email protected]
9||999999|[email protected] 10||101010|[email protected] 11|NULL|121212|[email protected]

2 測試程式碼

package com.tzb.demo2

import org.apache.spark.sql.SparkSession


object DataFrameTest {
  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder().appName("DataFrameAPITest"
).master("local[2]").getOrCreate() val rdd = spark.sparkContext.textFile("file:///d://stu.txt") import spark.implicits._ val stuDF = rdd.map(_.split("\\|")).map(line => { Stu(line(0).toInt, line(1), line(2), line(3)) }).toDF() //預設只顯示20// stuDF.show() //stuDF.take(10).foreach(
println) stuDF.first() stuDF.head(3) //stuDF.select("email").show(20,false) //stuDF.select("name","email").show(20,false) // stuDF.filter("name=''").show() //stuDF.filter("name='' OR name='NULL'").show() //檢視 name 以 H開頭的人 //stuDF.filter("SUBSTR(name,0,1)='H'").show() //stuDF.sort(stuDF("name")).show() //stuDF.sort(stuDF("name").desc).show() //stuDF.sort(stuDF("name").asc,stuDF("id").desc).show() val stuDF2 = rdd.map(_.split("\\|")).map(line => { Stu(line(0).toInt, line(1), line(2), line(3)) }).toDF() stuDF.join(stuDF2,stuDF.col("id")===stuDF2.col("id")).show() spark.stop() } case class Stu(id: Int, name: String, phone: String, email: String) }