1. 程式人生 > >spark入門(4)----scala掃盲(1)

spark入門(4)----scala掃盲(1)

scala方法和函式區別

注意:方法的返回值型別可以不寫,編譯器可以自動推斷出來,但是對於遞迴函式,必須指定返回型別

  //定義方法
    def m2(f:(Int,Int)=>Int) = f(2,6)

    //定義函式
    val f2 = (x:Int,y:Int) => x-y

    val a = m2(f2)

    println("the result is: "+a)

    //將方法轉化為函式(神奇的“_”將m2這個方法變為了函式)
    val f1 = m2 _

    println(f1(f2))

陣列、對映、元組、集合

//初始化一個長度為8的定長陣列,其所有元素均為0
    val arr1 = new Array[Int](8)
    //直接列印定長陣列,內容為陣列的hashcode值
    println(arr1)
    //將陣列轉換成陣列緩衝,就可以看到原陣列中的內容了
    //toBuffer會將陣列轉換長陣列緩衝
    println(arr1.toBuffer)

    //注意:如果new,相當於呼叫了陣列的apply方法,直接為陣列賦值
    //初始化一個長度為1的定長陣列
    val arr2 = Array[Int](10)
    println(arr2.toBuffer)

    //定義一個長度為3的定長陣列
val arr3 = Array("hadoop", "storm", "spark") //使用()來訪問元素 println(arr3(2)) ////////////////////////////////////////////////// //變長陣列(陣列緩衝) //如果想使用陣列緩衝,需要匯入import scala.collection.mutable.ArrayBuffer包 val ab = ArrayBuffer[Int]() //向陣列緩衝的尾部追加一個元素 //+=尾部追加元素 ab += 1 //追加多個元素 ab += (2, 3, 4, 5) //追加一個數組++= ab ++= Array(6
, 7) //追加一個數組緩衝 ab ++= ArrayBuffer(8,9) //列印陣列緩衝ab //在陣列某個位置插入元素用insert ab.insert(0, -1, 0) //刪除陣列某個位置的元素用remove ab.remove(8, 2) println(ab) }

遍歷陣列

//初始化一個數組
    val arr = Array(1,2,3,4,5,6,7,8)
    //增強for迴圈
    for(i <- arr)
      println(i)

    //好用的until會生成一個Range
    //reverse是將前面生成的Range反轉
    for(i <- (0 until arr.length).reverse)
      println(arr(i))
  }

陣列轉換

yield關鍵字將原始的陣列進行轉換會產生一個新的陣列,原始的陣列不變

  //定義一個數組
    val arr = Array(1,2,3,4,5,6,7,8)
    //用yield關鍵字生成一個新的陣列
    val res = for( e <- arr if e%2==0 )
      yield e*2
    println(res.toBuffer)

    //map方法更好用
    val res2 = arr.filter(_%2==0).map(_*2)
    println(res2.toBuffer)

陣列轉換

  //定義一個數組
    val arr = Array(1,2,3,4,5,6,7,8)
    //用yield關鍵字生成一個新的陣列
    val res = for( e <- arr if e%2==0 )
      yield e*2
    println(res.toBuffer)

    //map方法更好用
    val res2 = arr.filter(_%2==0).map(_*2)
    println(res2.toBuffer)

陣列常用演算法

 val arr = Array(2,5,1,4,3)
    //求和
    println(arr.sum)
    //秋最大值
    println(arr.max)
    //排序
    println(arr.sorted.toBuffer)

雜湊表(對映)

注意:在Scala中,有兩種Map,一個是immutable包下的Map,該Map中的內容不可變;另一個是mutable包下的Map,該Map中的內容可變

//第一種:->
    val scores1 = Map("tom"->85,"jetty"->99,"kitty"->90)
    println(scores1)

    //第二種:元組
    val scores = Map(("tom",85),("jetty",99),("kitty",90))
    println(scores)


    //獲取
    println(scores("jetty"))

    //getOrElse
    val o1= scores.getOrElse("tian",0)
    println(o1)

    //修改scala.collection.mutable.Map中的值
    val scores2 = scala.collection.mutable.Map("tom"->80,"jim"->40)
    scores2("jim")=50
    println(scores2)

常用函式

//建立一個List
    val lst0 = List(1,7,9,8,0,3,5,4,6,2)
    //將lst0中每個元素乘以10後生成一個新的集合
    val tem = lst0.map(_*10)
    println(tem)

    //將lst0中的偶數取出來生成一個新的集合
    val tem1 = lst0.filter( x=> x%2==0)
    println(tem1)

    //將lst0排序後生成一個新的集合
    val tem2 = lst0.sorted
    println(tem2)

    //反轉順序
    println(lst0.reverse)

    //將lst0中的元素4個一組,型別為Iterator[List[Int]]
    val tem3 = lst0.grouped(4)
    println(tem3.toList)

    //將Iterator轉換成List
    val tem4 = lst0.grouped(4).toList
    println(tem4)

    //將多個list壓扁成一個List
    println(tem4.flatten)

    val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
    //先按空格切分,在壓平
    val line = lines.flatMap(_.split(" ")).map((_,1))
      .groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2))

    println(line.toList.sortBy(_._2).reverse)


    //平行計算求和
     lst0.par
    println(lst0.par.reduce(_+_))

    //化簡:reduce
    //將非特定順序的二元操作應用到所有元素
//    reduce()
    //按照特定的順序
//    reduceLeft()



    //摺疊:有初始值(無特定順序)
//    fold()()

    //摺疊:有初始值(有特定順序)
//    floldLeft()


    //聚合
    val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
    val result = arr.aggregate(0)(_+_.sum,_+_)
    println(result)


    val l1 = List(5,6,4,7)
    val l2 = List(1,2,3,4)
    //求並集
    val temp1 = l1.union(l2)
//    val temp1 = l1 union l2
    println(temp1)

    //求交集
    val temp2 = l1.intersect(l2)
    println(temp2)

    //求差集
    val temp3 = l1.diff(l2)
    println(temp3)
################################

    val lines = List("wo shi ni hao","wo shi shi tian jun","ha ha ha ha")
//    val line = lines.map(_.split(" ")).flatten
    val line = lines.flatMap(_.split(" "))
    val words = line.map((_,1)).groupBy(_._1)

    //方法一:
    val total = words.map(t=>(t._1,t._2.size))
    val result1 = total.toList.sortBy(_._2).reverse
    println(result1)

    //方法二:
    val result2 = words.mapValues(_.size)
    println(result2)

    //方法三:
    val total3=words.mapValues(_.foldLeft(0)(_+_._2))
    println(total3)
List((ha,4), (shi,3), (wo,2), (hao,1), (ni,1), (jun,1), (tian,1))
Map(tian -> 1, ha -> 4, jun -> 1, shi -> 3, ni -> 1, wo -> 2, hao -> 1)
Map(tian -> 1, ha -> 4, jun -> 1, shi -> 3, ni -> 1, wo -> 2, hao -> 1)

scala和java混合開發的pom檔案

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

  <modelVersion>4.0.0</modelVersion>

  <groupId>tianjun.cmcc.org</groupId>
  <artifactId>mytest</artifactId>
  <packaging>jar</packaging>
  <version>1.0-SNAPSHOT</version>

  <name>A Camel Scala Route</name>
  <url>http://www.myorganization.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
  </properties>

  <dependencyManagement>
    <dependencies>
      <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.11</version>
      </dependency>
      <dependency>
        <groupId>org.apache.camel</groupId>
        <artifactId>camel-parent</artifactId>
        <version>2.18.1</version>
        <scope>import</scope>
        <type>pom</type>
      </dependency>
    </dependencies>
  </dependencyManagement>

  <dependencies>
    <dependency>
      <groupId>org.apache.camel</groupId>
      <artifactId>camel-core</artifactId>
    </dependency>
    <dependency>
      <groupId>org.apache.camel</groupId>
      <artifactId>camel-scala</artifactId>
    </dependency>

    <!-- scala -->
    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>2.11.7</version>
    </dependency>
    <dependency>
      <groupId>org.scala-lang.modules</groupId>
      <artifactId>scala-xml_2.11</artifactId>
      <version>1.0.4</version>
    </dependency>

    <!-- logging -->
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-api</artifactId>
      <scope>runtime</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-core</artifactId>
      <scope>runtime</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-slf4j-impl</artifactId>
      <scope>runtime</scope>
    </dependency>

    <!-- testing -->
    <dependency>
      <groupId>org.apache.camel</groupId>
      <artifactId>camel-test</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>repository.junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/commons-collections/commons-collections -->
    <dependency>
      <groupId>commons-collections</groupId>
      <artifactId>commons-collections</artifactId>
      <version>3.2.2</version>
    </dependency>
  </dependencies>



  <build>
    <defaultGoal>install</defaultGoal>
    <sourceDirectory>src/main/scala</sourceDirectory>
    <testSourceDirectory>src/test/scala</testSourceDirectory>

    <plugins>

      <!-- the Maven compiler plugin will compile Java source files -->
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>3.5.1</version>
        <configuration>
          <source>1.8</source>
          <target>1.8</target>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-resources-plugin</artifactId>
        <version>3.0.1</version>
        <configuration>
          <encoding>UTF-8</encoding>
        </configuration>
      </plugin>

      <!-- the Maven Scala plugin will compile Scala source files -->
      <plugin>
        <groupId>net.alchim31.maven</groupId>
        <artifactId>scala-maven-plugin</artifactId>
        <version>3.2.2</version>
        <executions>
          <execution>
            <goals>
              <goal>compile</goal>
              <goal>testCompile</goal>
            </goals>
          </execution>
        </executions>
      </plugin>

      <!-- configure the eclipse plugin to generate eclipse project descriptors for a Scala project -->
      <!--<plugin>-->
        <!--<groupId>org.apache.maven.plugins</groupId>-->
        <!--<artifactId>maven-eclipse-plugin</artifactId>-->
        <!--<version>2.10</version>-->
        <!--<configuration>-->
          <!--<projectnatures>-->
            <!--<projectnature>org.scala-ide.sdt.core.scalanature</projectnature>-->
            <!--<projectnature>org.eclipse.jdt.core.javanature</projectnature>-->
          <!--</projectnatures>-->
          <!--<buildcommands>-->
            <!--<buildcommand>org.scala-ide.sdt.core.scalabuilder</buildcommand>-->
          <!--</buildcommands>-->
          <!--<classpathContainers>-->
            <!--<classpathContainer>org.scala-ide.sdt.launching.SCALA_CONTAINER</classpathContainer>-->
            <!--<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>-->
          <!--</classpathContainers>-->
          <!--<excludes>-->
            <!--<exclude>org.scala-lang:scala-library</exclude>-->
            <!--<exclude>org.scala-lang:scala-compiler</exclude>-->
          <!--</excludes>-->
          <!--<sourceIncludes>-->
            <!--<sourceInclude>**/*.scala</sourceInclude>-->
            <!--<sourceInclude>**/*.java</sourceInclude>-->
          <!--</sourceIncludes>-->
        <!--</configuration>-->
      <!--</plugin>-->

      <!-- allows the route to be run via 'mvn exec:java' -->
      <plugin>
        <groupId>org.codehaus.mojo</groupId>
        <artifactId>exec-maven-plugin</artifactId>
        <version>1.5.0</version>
        <configuration>
          <mainClass>tianjun.cmcc.org.MyRouteMain</mainClass>
        </configuration>
      </plugin>
    </plugins>
  </build>

</project>