1. 程式人生 > >用scala統計單詞個數一步一步詳解

用scala統計單詞個數一步一步詳解

val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")

    

//先按空格切分

val lines_1 = lines.map(_.split(" "))

//壓平

val lines_2 = lines_1.flatten


//用flatMap 兩步合併成一步

val words= lines.flatMap(_.split(" ")) 

//將每一個單詞對映為元組
val wordToOne = lines_3.map(x=>(x,1))

val wordToOne = lines_3.map((_,1))


//分組

val grouped = wordToOne.groupBy(_._1)


組內求和

val grouped_1 = grouped.map(t => (t._1,t._2.size)) 


//將map轉化成List

 val result = grouped_1.toList


//排序

 val result_1 = result.sortBy(_._2).reverse


合併為一句


val words = lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).map(t=>(t._1,t._2.size)).toList.sortBy(_._2).reverse


val words = lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).mapValues(_.size).toList.sortBy(_._2).reverse




val words = lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).mapValues(_.foldLeft(0) 

(_+_._2)).toList.sortBy(_._2).reverse

從本地讀取檔案

package main.count

import scala.io.Source
object WordCounts {
  /*
  * 從本地讀取檔案統計個數
  * */
  def main(args: Array[String]): Unit = {
    val  lines=Source.fromFile("d:\\words.txt").getLines().toBuffer
    //val lines=lines.map(_.split(" ")).flatten.map(t=>(t,1)).groupBy(_._1).map(x=>(x._1,x._2.size)).toList.sortBy(_._2).reverse
    val lines1=lines.flatMap(_.split(" ")).map((_,1)).groupBy(_._1).map(t=>(t._1,t._2.size)).toList.sortBy(_._2).reverse

    println(lines1)
  }
}