1. 程式人生 > >spark 基本操作(二)

spark 基本操作(二)

select ray oca uil top main taf str afr

1.dataframe 基本操作

 def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("test")
      .master("local[*]")
      .getOrCreate()
    import spark.implicits._
    val people = spark.read.format("json").load("people.json")
    people.show()
    /*    +----+-------+
        | age|   name|
        +----+-------+
        |null|Michael|
          |  30|   Andy|
          |  19| Justin|
          +----+-------+   
*/ people.printSchema() /*root |-- age: long (nullable = true) |-- name: string (nullable = true)*/ people.select($"name").show() /* +-------+ | name| +-------+ |Michael| | Andy| | Justin| +-------+*/ people.select
($"name", $"age".cast("string").as("age")).printSchema() /* root |-- name: string (nullable = true) |-- age: string (nullable = true)*/ people.select($"name", ($"age" + 1).as("age")).show() /* +-------+----+ | name| age| +-------+----+ |Michael|null| | Andy| 31| | Justin| 20| +-------+----+
*/ people.filter($"age" > 21).show() // +---+----+ // |age|name| // +---+----+ // | 30|Andy| // +---+----+ people.groupBy("age").count().show() // +----+-----+ // | age|count| // +----+-----+ // | 19| 1| // |null| 1| // | 30| 1| // +----+-----+ spark.stop() }

spark 基本操作(二)