1. 程式人生 > >spark叢集與spark HA高可用快速部署 spark研習第一季

spark叢集與spark HA高可用快速部署 spark研習第一季

1、spark 部署

標籤: spark

0 apache spark專案架構

spark SQL  --  spark streaming --  MLlib  --  GraphX

0.1 hadoop快速搭建,主要利用hdfs儲存框架

下載hadoop-2.6.0,解壓,到etc/hadoop/目錄下

0.2 快速配置檔案

cat core-site.xml

  1. <configuration>
  2. <property>
  3. <name>fs.defaultFS</name>
  4. <value>hdfs://worker1:9000
    </value>
  5. </property>
  6. <property>
  7. <name>hadoop.tmp.dir</name>
  8. <value>/opt/hadoop-2.6.0/tmp</value>
  9. </property>
  10. <property>
  11. <name>hadoop.native.lib</name>
  12. <value>true</value>
  13. <description>Should native hadoop libraries, if present, be used.
    </description>
  14. </property>
  15. </configuration>

cat hdfs-site.xml

  1. <configuration>
  2. <property>
  3. <name>dfs.replication</name>
  4. <value>2</value>
  5. </property>
  6. <property>
  7. <name>dfs.namenode.secondary.http-address</name>
  8. <value>worker1:50090
    </value>
  9. <description>The secondary namenode http server address and port.</description>
  10. </property>
  11. <property>
  12. <name>dfs.namenode.name.dir</name>
  13. <value>/opt/hadoop-2.6.0/dfs/name</value>
  14. </property>
  15. <property>
  16. <name>dfs.datanode.data.dir</name>
  17. <value>/opt/hadoop-2.6.0/dfs/data</value>
  18. </property>
  19. <property>
  20. <name>dfs.namenode.checkpoint.dir</name>
  21. <value>file:///opt/hadoop-2.6.0/dfs/namesecondary</value>
  22. <description>Determines where on the local filesystem the DFSsecondary name node should store the temporary images to merge. If this is acomma-delimited list of directories then the image is replicated in all of thedirectories for redundancy.</description>
  23. </property>
  24. </configuration>

cat hadoop-env.sh

  1. export JAVA_HOME=/opt/jdk
  2. export HADOOP_HOME=/opt/hadoop-2.6.0
  3. export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_HOME/lib/native"
  4. export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}

cat mapred-site.xml

  1. <configuration>
  2. <property>
  3. <name>mapreduce.framework.name</name>
  4. <value>yarn</value>
  5. </property>
  6. </configuration>

cat yarn-site.xml

  1. <configuration>
  2. <!-- Site specific YARN configuration properties -->
  3. <property>
  4. <name>yarn.resourcemanager.hostname</name>
  5. <value>worker1</value>
  6. </property>
  7. <property>
  8. <name>yarn.nodemanager.aux-services</name>
  9. <value>mapreduce_shuffle</value>
  10. </property>
  11. </configuration>

0.3 hadoop hdfs快速啟動測試

  1. $ sbin/start-dfs.sh //開啟程序
  2. jps
  3. 5212NameNode
  4. 5493SecondaryNameNode
  5. 5909Jps
  6. 5336DataNode
  7. //如果沒有DataNode,檢視log/下的最新啟動情況
  8. 可能是因為hostname沒有改為worker1引起,每次重啟虛擬機器會遇到這種情況。
  9. 再次sbin/start-dfs.sh 如果namenode沒有起來
  10. $ bin/hdfs namenode -format //格式化
  11. 最後瀏覽器檢視worker1:50070

1. Spark安裝及配置

1.1 執行環境配置

A.下載及配置JDK,Scala,sbt,Maven  到/opt 目錄下

JDK jdk-7u79-linux-x64.gz 
Scala http://downloads.typesafe.com/scala/2.10.5/scala-2.10.5.tgz 
Maven apache-maven-3.2.5-bin.tar.gz 
SBT sbt-0.13.7.tgz 
解壓 tar zxf jdk-7u79-linux-x64.gz 
tar zxf scala-2.10.5.tgz

B.配置
  1. vi ~/.bash_profile ##vi /etc/profile 以下皆替換
  2. export JAVA_HOME=/opt/jdk
  3. export PATH=$JAVA_HOME/bin:$PATH
  4. export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
  5. export SCALA_HOME=/opt/scala-2.10.5
  6. export PATH=$PATH:$SCALA_HOME/bin
  7. $ source /etc/profile //source ~/.bash_profile
C.測試
  1. $ java -version
  2. java version "1.7.0_79"
  3. Java(TM) SE RuntimeEnvironment(build 1.7.0_79-b15)
  4. JavaHotSpot(TM)64-BitServer VM (build 24.79-b02, mixed mode)
  5. $ scala -version
  6. Scala code runner version 2.10.5--Copyright2002-2013, LAMP/EPFL
D.Maven,sbt配置
  1. export MAVEN_HOME=/opt/apache-maven-3.2.5
  2. export SBT_HOME=/opt/sbt
  3. export PATH=$PATH:$SCALA_HOME/bin:$MAVEN_HOME/bin:$SBT_HOME/bin
  4. $source /etc/profile
  5. $ mvn --version
  6. ApacheMaven3.2.5(12a6b3acb947671f09b81f49094c53f426d8cea1;2014-12-15T01:29:23+08:00)
  7. Maven home:/opt/apache-maven-3.2.5
  8. Java version:1.7.0_79, vendor:OracleCorporation
  9. Java home:/opt/jdk
  10. Default locale: en_US, platform encoding: UTF-8
  11. OS name:"linux", version:"2.6.32-504.el6.x86_64", arch:"amd64", family:"unix"
  12. $ sbt --version //warning '--'
  13. sbt launcher version 0.13.7

1.2 Spark配置

A.下載Hadoop,Spark
  1. $ tar zxf spark-1.4.0-bin-hadoop2.6.tgz
  2. $ tar zxf hadoop-2.6.0.tar.gz
  3. $ ll 檢視
B. 配置Hadoop,Spark的安裝目錄
  1. vi ~/.bash_profile
  2. export JAVA_HOME=/opt/jdk
  3. export PATH=$JAVA_HOME/bin:$PATH
  4. export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
  5. export SCALA_HOME=/opt/scala/scala-2.10.5
  6. export SPARK_HOME=/opt/spark-1.4.0-bin-hadoop2.6
  7. export HADOOP_HOME=/opt/hadoop-2.6.0
  8. export HADOOP_CONF_DIR=/opt/hadoop-2.6.0/etc/hadoop
  9. export MAVEN_HOME=/opt/apache-maven-3.2.5