1. 程式人生 > >使用Shell指令碼一鍵部署Hadoop

使用Shell指令碼一鍵部署Hadoop

測試環境

Linux系統版本:CentOS 7


實現功能

1、Java環境一鍵配置
2、Hadoop單機版一鍵安裝
3、Hadoop偽分散式一鍵安裝
4、Hadoop叢集部署
5、偽分散式hadoop初始化
6、叢集設定SSH免密登入(使用hadoop使用者操作)


指令碼說明

部署Hadoop環境過程比較繁瑣,還容易出錯,寫了一個Shell指令碼用來一鍵部署,廢話不多說,下面直接上程式碼。

叢集部署的方法暫時還沒進行全面測試,後面測試後會持續更新

若在使用過程中遇到什麼問題或者是有好的改進方案歡迎在評論區指出


指令碼程式碼

#!/bin/bash
JDKLINK='http://download.oracle.com/otn-pub/java/jdk/8u191-b12/2787e4a523244c269598db4e85c51e0c/jdk-8u191-linux-x64.rpm' HADOOPLINK='https://archive.apache.org/dist/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz' localIP=$(ip a | grep ens33 | awk '$1~/^inet.*/{print $2}' | awk -F '/' '{print $1}') ip_arrays=() #初始化環境
installWget(){ echo '初始化安裝環境....' wget if [ $? -ne 1 ]; then echo '開始下載wget' yum -y install wget fi } #wget下載JDK進行安裝 installJDK(){ ls /usr/local | grep 'jdk.*[rpm]$' if [ $? -ne 0 ]; then echo '開始下載JDK.......' wget --no-check-certificate --no-cookies --header "Cookie: oraclelicense=accept-securebackup-cookie"
$JDKLINK mv $(ls | grep 'jdk.*[rpm]$') /usr/local fi chmod 751 /usr/local/$(ls /usr/local | grep 'jdk.*[rpm]$') rpm -ivh /usr/local/$(ls /usr/local | grep 'jdk.*[rpm]$') } #JDK環境變數配置 pathJDK(){ #PATH設定 grep -q "export PATH=" /etc/profile if [ $? -ne 0 ]; then #末行插入 echo 'export PATH=$PATH:$JAVA_HOME/bin'>>/etc/profile else #行尾新增 sed -i '/^export PATH=.*/s/$/:\$JAVA_HOME\/bin/' /etc/profile fi grep -q "export JAVA_HOME=" /etc/profile if [ $? -ne 0 ]; then #匯入配置 filename="$(ls /usr/java | grep '^jdk.*[^rpm | gz]$' | sed -n '1p')" sed -i "/^export PATH=.*/i\export JAVA_HOME=\/usr\/java\/$filename" /etc/profile sed -i '/^export PATH=.*/i\export JRE_HOME=$JAVA_HOME/jre' /etc/profile sed -i '/^export PATH=.*/i\export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar' /etc/profile #echo "export JAVA_HOME=/usr/java/$filename">>/etc/profile #echo 'export JRE_HOME=$JAVA_HOME/jre'>>/etc/profile #echo 'export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar'>>/etc/profile else #替換原有配置 filename="$(ls /usr/java | grep '^jdk.*[^rpm | gz]$' | sed -n '1p')" sed -i "s/^export JAVA_HOME=.*/export JAVA_HOME=\/usr\/java\/$filename/" /etc/profile fi source /etc/profile } #wget下載Hadoop進行解壓(單機版) wgetHadoop(){ ls /usr/local | grep 'hadoop.*[gz]$' if [ $? -ne 0 ]; then echo '開始下載hadoop安裝包...' wget $HADOOPLINK mv $(ls | grep 'hadoop.*gz$') /usr/local fi tar -zxvf /usr/local/$(ls | grep 'hadoop.*[gz]$') mv /usr/local/$(ls | grep 'hadoop.*[^gz]$') /usr/local/hadoop } #hadoop環境變數配置 pathHadoop(){ #PATH設定 grep -q "export PATH=" /etc/profile if [ $? -ne 0 ]; then #末行插入 echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin'>>/etc/profile else #行尾新增 sed -i '/^export PATH=.*/s/$/:\$HADOOP_HOME\/bin:\$HADOOP_HOME\/sbin/' /etc/profile fi #HADOOP_HOME設定 grep -q "export HADOOP_HOME=" /etc/profile if [ $? -ne 0 ]; then #在PATH前面一行插入HADOOP_HOME sed -i '/^export PATH=.*/i\export HADOOP_HOME=\/usr\/local\/hadoop' /etc/profile else #修改檔案內的HADOOP_HOME sed -i 's/^export HADOOP_HOME=.*/export HADOOP_HOME=\/usr\/local\/hadoop/' /etc/profile fi source /etc/profile } #新增hadoop使用者並設定許可權 hadoopUserAdd(){ echo '正在建立hadoop使用者....' useradd hadoop echo '請設定hadoop使用者密碼....' passwd hadoop gpasswd -a hadoop root chmod 771 /usr chmod 771 /usr/local chown -R hadoop:hadoop /usr/local/hadoop } #單機版hadoop配置 installHadoop(){ installWget wgetHadoop pathHadoop hadoopUserAdd } #偽分散式設定 setHadoop(){ echo '<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <configuration> <property> <name>hadoop.tmp.dir</name> <value>file:/usr/local/hadoop/tmp</value> <description>指定hadoop執行時產生檔案的儲存路徑</description> </property> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> <description>hdfs namenode的通訊地址,通訊埠</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/core-site.xml echo '<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <!-- 該檔案指定與HDFS相關的配置資訊。 需要修改HDFS預設的塊的副本屬性,因為HDFS預設情況下每個資料塊儲存3個副本, 而在偽分散式模式下執行時,由於只有一個數據節點, 所以需要將副本個數改為1;否則Hadoop程式會報錯。 --> <configuration> <property> <name>dfs.replication</name> <value>1</value> <description>指定HDFS儲存資料的副本數目,預設情況下是3份</description> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:/usr/local/hadoop/hadoopdata/namenode</value> <description>namenode存放資料的目錄</description> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/usr/local/hadoop/hadoopdata/datanode</value> <description>datanode存放block塊的目錄</description> </property> <property> <name>dfs.permissions.enabled</name> <value>false</value> <description>關閉許可權驗證</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/hdfs-site.xml echo '<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <!-- 在該配置檔案中指定與MapReduce作業相關的配置屬性,需要指定JobTracker執行的主機地址--> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> <description>指定mapreduce執行在yarn上</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/mapred-site.xml echo '<?xml version="1.0"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> <description>mapreduce執行shuffle時獲取資料的方式</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/yarn-site.xml echo 'localhost'>$HADOOP_HOME/etc/hadoop/slaves sed -i 's/export JAVA_HOME=.*/\#&/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh sed -i "/#export JAVA_HOME=.*/a export JAVA_HOME=$JAVA_HOME" $HADOOP_HOME/etc/hadoop/hadoop-env.sh chown -R hadoop:hadoop $HADOOP_HOME } #完全分散式設定 setHadoop2(){ echo '<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <configuration> <property> <name>hadoop.tmp.dir</name> <value>file:/usr/local/hadoop/tmp</value> <description>指定hadoop執行時產生檔案的儲存路徑</description> </property> <property> <name>fs.defaultFS</name> <value>hdfs://'$1':9000</value> <description>hdfs namenode的通訊地址,通訊埠</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/core-site.xml echo '<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <!-- 該檔案指定與HDFS相關的配置資訊。 需要修改HDFS預設的塊的副本屬性,因為HDFS預設情況下每個資料塊儲存3個副本, 而在偽分散式模式下執行時,由於只有一個數據節點, 所以需要將副本個數改為1;否則Hadoop程式會報錯。 --> <configuration> <property> <name>dfs.replication</name> <value>3</value> <description>指定HDFS儲存資料的副本數目,預設情況下是3份</description> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:/usr/local/hadoop/hadoopdata/namenode</value> <description>namenode存放資料的目錄</description> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/usr/local/hadoop/hadoopdata/datanode</value> <description>datanode存放block塊的目錄</description> </property> <property> <name>dfs.secondary.http.address</name> <value>'$2':50090</value> <description>secondarynamenode 執行節點的資訊,和 namenode 不同節點</description> </property> <property> <name>dfs.permissions.enabled</name> <value>false</value> <description>關閉許可權驗證</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/hdfs-site.xml echo '<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <!-- Put site-specific property overrides in this file. --> <!-- 在該配置檔案中指定與MapReduce作業相關的配置屬性,需要指定JobTracker執行的主機地址--> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> <description>指定mapreduce執行在yarn上</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/mapred-site.xml echo '<?xml version="1.0"?> <!-- Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. See accompanying LICENSE file. --> <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>'$1'</value> <description>yarn總管理器的IPC通訊地址</description> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> <description>mapreduce執行shuffle時獲取資料的方式</description> </property> </configuration>'>$HADOOP_HOME/etc/hadoop/yarn-site.xml rm -rf $HADOOP_HOME/etc/hadoop/slaves touch $HADOOP_HOME/etc/hadoop/slaves int=0 while(( ${int}<${#ip_arrays[*]} )) do #echo "while is run" echo "${ip_arrays[$int]}">>$HADOOP_HOME/etc/hadoop/slaves if [ $? -ne 0 ] then echo '寫入slaves配置失敗' break fi let "int++" done sed -i 's/export JAVA_HOME=.*/\#&/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh sed -i "/#export JAVA_HOME=.*/a export JAVA_HOME=$JAVA_HOME" $HADOOP_HOME/etc/hadoop/hadoop-env.sh chown -R hadoop:hadoop $HADOOP_HOME } #關閉防火牆 stopFirewalld(){ systemctl stop firewalld systemctl disable firewalld } #IP校驗,返回值0校驗合法,1不合法。 checkIPAddr(){ echo $1|grep "^[0-9]\{1,3\}\.\([0-9]\{1,3\}\.\)\{2\}[0-9]\{1,3\}$" > /dev/null; #IP地址必須為全數字 if [ $? -ne 0 ] then return 1 fi ipaddr=$1 a=`echo $ipaddr|awk -F . '{print $1}'` #以"."分隔,取出每個列的值 b=`echo $ipaddr|awk -F . '{print $2}'` c=`echo $ipaddr|awk -F . '{print $3}'` d=`echo $ipaddr|awk -F . '{print $4}'` for num in $a $b $c $d do if [ $num -gt 255 ] || [ $num -lt 0 ] #每個數值必須在0-255之間 then return 1 fi done return 0 } #控制檯輸入叢集IP ipInput(){ echo "本機IP地址為:$localIP" int=0 echo '輸入完成後,輸入ip值為0可退出' while read -p "輸入第`expr ${int} + 1`臺的IP:" ip do if [ "$ip" == "0" ] then break fi checkIPAddr $ip if [ $? -eq 0 ] then ip_arrays[$int]=$ip #echo $int else echo '輸入的IP不合法,重新進行配置....' ipInput fi let "int++" done } #scp設定免密登入 scpOutput(){ int=0 while(( ${int}<${#ip_arrays[*]} )) do scp -r ~/.ssh ${ip_arrays[$int]}:~/ let "int++" done } #SSH免密登入 setSSH(){ echo '---------------配置ssh免密登入----------------------' echo '------------一路回車即可生成祕鑰--------------------' ssh-keygen -t rsa echo '----------祕鑰生成完成,開始生成公鑰----------------' echo '根據提示輸入相應的資訊' echo '----------------------------------------------------' echo 'Are you sure you want to continue connecting (yes/no)?' echo '------------------輸入"yes"-------------------------' echo '[email protected] s password:' echo '--------------輸入hadoop使用者密碼--------------------' ssh-copy-id localhost } #控制檯選擇本機角色 nameOrData(){ echo '--------------------------' echo '1、namenode' echo '2、datanode' read -p '請選擇本機的角色[1-2]:' n case $n in 1) return 0 ;; 2) return 1 ;; *) echo '輸入錯誤!!!' nameOrData ;; esac } #配置hosts檔案 setHosts(){ echo '開始配置/etc/hosts檔案' echo '本機IP地址為:'$localIP'' read -p '請輸入本機主機名(hostname):' hostname echo -e ''$localIP'\t'$hostname''>>/etc/hosts echo '根據提示輸入其他主機名(hostname)' echo '-----------------------------------' int=0 while(( ${int}<${#ip_arrays[*]} )) do echo 'IP:'${ip_arrays[$int]}'' read -p "請輸入主機名:" hostname echo -e ''${ip_arrays[$int]}'\t'$hostname''>>/etc/hosts echo '-----------------------------------' let "int++" done } #1、Java環境一鍵配置 javaInstall(){ echo '開始檢查本機環境' java -version if [ $? -ne 0 ]; then installWget echo '開始配置JDK,請耐心等待......' installJDK pathJDK java -version if [ $? -eq 0 ]; then echo 'JDK配置完成' else echo '安裝失敗,請重新嘗試或手動安裝' fi else echo '已經配置該環境' fi } #2、Hadoop單機版一鍵安裝 hadoopInstall(){ javaInstall echo '開始檢查本機環境' hadoop if [ $? -ne 0 ]; then installHadoop hadoop if [ $? -eq 0 ]; then echo 'hadoop單機版配置完成' else echo '安裝失敗,請重新嘗試或手動安裝' fi else echo '已經配置該環境' fi } #3、Hadoop偽分散式一鍵安裝 hadoopInstall2(){ javaInstall echo '開始檢查本機環境' hadoop if [ $? -ne 0 ]; then installHadoop hadoop if [ $? -eq 0 ]; then echo 'hadoop單機版配置完成,開始配置偽分散式' setHadoop stopFirewalld echo '配置完成....使用hadoop使用者初始化' su hadoop else echo '安裝失敗,請重新嘗試或手動安裝' fi else echo 'hadoop單機版已經安裝,開始配置偽分散式' setHadoop stopFirewalld echo '配置完成....使用hadoop使用者初始化' su hadoop fi } #4、Hadoop叢集部署 hadoopInstall3(){ nameOrData if [ $? -eq 0 ] then #記錄IP echo '輸入datanode的IP' ipInput #namenode配置 #1安裝單機版hadoop hadoopInstall #2匯入叢集配置檔案 echo '開始匯入配置檔案' setHadoop2 ${localIP} ${ip_arrays[0]} echo '配置匯入完成' #3關閉防火牆 stopFirewalld echo '防火牆已關閉' #上傳主機配置到datanode int=0 while(( ${int}<${#ip_arrays[*]} )) do echo "開始給第`expr ${int} + 1`臺datanode傳送配置檔案和安裝包" echo "IP為:${ip_arrays[${int}]}" echo "傳送過程需手動輸入遠端主機root密碼" #scp傳送安裝包 scp $(pwd)/install.sh ${ip_arrays[$int]}:/usr/local scp /usr/local/$(ls | grep 'jdk.*[rpm]$') ${ip_arrays[$int]}:/usr/local scp -r /usr/local/hadoop ${ip_arrays[$int]}:/usr/local echo "${ip_arrays[$int]}檔案上傳完成....." let "int++" done setHosts echo '請登入datanode主機執行該指令碼繼續完成datanode配置,指令碼儲存目錄/usr/local' elif [ $? -eq 1 ] then #安裝Java javaInstall #配置Hadoop環境變數 echo '配置環境變數' pathHadoop echo '環境變數配置完成' #新增使用者 hadoopUserAdd #關閉防火牆 stopFirewalld echo '防火牆已關閉' source /etc/profile echo '測試安裝情況.....' java -version if [ $? -ne 0 ]; then echo '請手動執行source /etc/profile' echo '執行java -version確認JDK安裝情況' fi hadoop version if [ $? -ne 0 ]; then echo '請手動執行source /etc/profile' echo '執行hadoop version確認hadoop安裝情況' fi echo 'datanode配置完成' else echo '發生錯誤!!!' fi } #6、叢集設定SSH免密登入(使用hadoop使用者操作) setSSHS(){ #本機設定免密 echo '開始設定本機免密....' setSSH #輸入其他電腦IP echo '開始設定其他主機....' echo '輸入其他主機ip' ipInput #用scp將祕鑰發到其他主機 echo '開始傳送祕鑰到其他主機...' scpOutput } #控制檯輸入選項 consoleInput(){ echo '請輸入選項[1-4]' echo '1、Java環境一鍵配置' echo '2、Hadoop單機版一鍵安裝' echo '3、Hadoop偽分散式一鍵安裝' echo '4、Hadoop叢集部署' echo '5、hadoop初始化(在namenode主機上執行)' echo '6、叢集設定SSH免密登入(使用hadoop使用者操作)' echo '請輸入選項[1-6]' read aNum case $aNum in 1) javaInstall ;; 2) hadoopInstall ;; 3) hadoopInstall2 ;; 4) hadoopInstall3 ;; 5) echo 'Hadoop初始化' hdfs namenode -format ;; 6) setSSHS ;; *) echo '沒有該選項,請重新輸入!!!退出請按Ctrl+c' consoleInput ;; esac } echo '------------------歡迎使用一鍵安裝------------------' echo '為保證安裝過程順利進行,請使用root使用者執行該指令碼' echo '該指令碼增加了本地安裝包自動安裝' echo '如果需要指令碼安裝本地安裝包,請將安裝包放在/usr/local下' echo 'hadoop安裝包要求以hadoop開頭的.tar.gz包' echo 'JDK安裝包要求以jdk開頭的.rpm包' echo '----------------------------------------------------' consoleInput