heartbeat+drbd監控腳本(自動發現問題,並郵件通知)
阿新 • • 發佈:2018-06-01
heartbeat+drbd監控腳本 監控heartbeat+drbd 生成環境有一些服務器是heartbeat+drbd做的高可用,但是有時候由於各種原因會產生問題(比如腦裂),而且沒有及時發現。所以做了腳本是對heartbeat、drbd服務狀態進行監控、對drbd分區掛載目錄進行監控,現分享如下。
腳本的執行原理可能並不好(先判斷當前服務器是哪個主機名,然後將另外一臺的IP、端口、密碼賦值給變量,然後將本機和遠程的heartbeat、drbd和掛載目錄狀態輸出到不同的文件,最後進行相應的邏輯與或或運算,如果有問題郵件通知),希望高人指點:
#!/bin/bash #author:gxm #date:2018-05-15 #version:1.1 #運行腳本前請先修改相關參數,比如7-13、220-224、227-231、252行信息 HOSTNAME1="drbd1.db.com" HOSTNAME2="drbd2.db.com" MOUNTDIR="/store" MAILFROM="[email protected]" SMTPSERVER="smtp.163.com" SMTPUSER="[email protected]" SMTPPASSWD="123456" CURRDATE=$(date "+%Y-%m-%d %H:%M:%S") DRBD_HALOG=/var/log/drbd_ha.log CURRENTHOST_HEARTBEAT_STATUS=/tmp/currenthost_heartbeat_status.txt OTHERHOST_HEARTBEAT_STATUS=/tmp/otherhost_heartbeat_status.txt CURRENTHOST_DRBD_DETAILED=/tmp/currenthost_drbd_detailed.txt OTHERHOST_DRBD_DETAILED=/tmp/otherhost_drbd_detailed.txt CURRENTHOST_DISK=/tmp/currenthost_disk.txt OTHERHOST_DISK=/tmp/otherhost_disk.txt #退出腳本 function force_exit() { echo "$CURRDATE: 腳本意外退出!" | tee -a $DRBD_HALOG echo exit 1; } # 輸出日誌提示 function output_notify() { echo $CURRDATE:$1 | tee -a $DRBD_HALOG } # 輸出錯誤提示 function output_error() { echo "$CURRDATE:[ERROR] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" | tee -a $DRBD_HALOG echo "$CURRDATE:[ERROR] "$1 | tee -a $DRBD_HALOG echo "$CURRDATE:[ERROR] <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" | tee -a $DRBD_HALOG } #顏色函數 function echo_colour() { if [ $1 -eq 0 ] then echo -e $CURRDATE:"\033[41;37m ${2} \033[0m" | tee -a $DRBD_HALOG return 0 fi if [ $1 -eq 1 ] then echo -e $CURRDATE:"\033[43;37m ${2} \033[0m" | tee -a $DRBD_HALOG return 0 fi if [ $1 -eq 2 ] then echo -e $CURRDATE:"\033[47;30m ${2} \033[0m" | tee -a $DRBD_HALOG return 0 fi if [ $1 -eq 3 ] then echo -e $CURRDATE:"\033[34m ${2} \033[0m" | tee -a $DRBD_HALOG return 0 fi if [ $1 -eq 4 ] then echo -e $CURRDATE:"\033[31m ${2} \033[0m" | tee -a $DRBD_HALOG return 0 fi } #檢測root用戶 function check_user_root() { if [ `id -u` -eq 0 ] then output_notify "當前是root賬號,正在執行腳本" else output_error "當前是非root賬號,退出腳本" force_exit fi } check_user_root #檢測操作系統版本 function check_os() { if uname -a | grep 'el5' >/dev/null 2>&1 then SYS_RELEASE="el5" elif uname -a | grep 'el7' >/dev/null 2>&1 then SYS_RELEASE="el7" else SYS_RELEASE="el6" fi } #安裝配置mailx客戶端工具 function mailx() { yum -y install mailx cat >> /etc/mail.rc << EOF set from=$MAILFROM set smtp=$SMTPSERVER set smtp-auth-user=$SMTPUSER set smtp-auth-password=$SMTPPASSWD set smtp-auth=login EOF } #檢測mailx是否安裝,如果沒安裝安裝下 function check_mailx_program() { check_os if [ $SYS_RELEASE = el6 ] then if [ ! -e /bin/mailx ] then echo "現在安裝mailx工具!" mailx fi elif [ $SYS_RELEASE = el7 ] then if [ ! -e /usr/bin/mailx ] then echo "現在安裝mailx工具!" mailx fi else echo "此腳本只適用於centos6和7版本" fi } check_mailx_program #發送郵件函數的幫助 function sendmailhelp() { echo "eg: $0 [Subject] [address] [content_file] [file]" echo "" exit 1 } #具體發送郵件函數 #$1為郵件標題,$2為收件人郵箱地址,$3為郵件內容,$4為附件(不是必須) function sendmail() { if [ ! -n "$1" ] then sendmailhelp fi cDate=`date +%Y%m%d` if [ ! -n "$2" ] then sendmailhelp else mail_to=$2 echo " Send Mail to ${mail_to}" fi if [ ! -n "$4" ] then mail -s $1 ${mail_to}<$3 else mail -s $1 -a $4 ${mail_to}<$3 fi } #檢查操作系統版本 function check_os() { if uname -a | grep 'el5' >/dev/null 2>&1 then SYS_RELEASE="el5" elif uname -a | grep 'el7' >/dev/null 2>&1 then SYS_RELEASE="el7" else SYS_RELEASE="el6" fi } #獲取當前主機名並給另外一臺主機賦予相關遠程信息 CURRENT_HOSTNAME=`hostname` if [ $CURRENT_HOSTNAME = "$HOSTNAME1" ] then output_notify "當前服務器主機名為$CURRENT_HOSTNAME" OTHER_HOST="192.168.40.52" OTHER_PROT="22" OTHER_USER="root" OTHER_PASSWD="123456" elif [ $CURRENT_HOSTNAME = "$HOSTNAME2" ] then output_notify "當前服務器主機名為$CURRENT_HOSTNAME" OTHER_HOST="192.168.40.54" OTHER_PROT="22" OTHER_USER="root" OTHER_PASSWD="123456" else echo "您主機名不符合要求" fi #判斷是否安裝了expect工具 if [ ! -e /usr/bin/expect ] then echo "現在安裝expect工具!" yum -y install expect fi #遠程到另外一臺主機的函數 function ssh_otherhost() { /usr/bin/expect<<EOF spawn ssh -t -p "$OTHER_PROT" $OTHER_USER@$OTHER_HOST "$1" expect { "*\(yes\/no\)?" { send "yes\r"; exp_continue} "*password:" { send "123456\r" } } expect eof EOF } #將查詢服務狀態導出到txt文件中 function outtxt() { $1 > $2 ssh_otherhost "$1" > $3 } check_os if [ $SYS_RELEASE = el6 ] then outtxt "/etc/init.d/heartbeat status" "$CURRENTHOST_HEARTBEAT_STATUS" "$OTHERHOST_HEARTBEAT_STATUS" outtxt "cat /proc/drbd" "$CURRENTHOST_DRBD_DETAILED" "$OTHERHOST_DRBD_DETAILED" outtxt "df -h" "$CURRENTHOST_DISK" "$OTHERHOST_DISK" elif [ $SYS_RELEASE = el7 ] then outtxt "systemctl status heartbeat" "$CURRENTHOST_HEARTBEAT_STATUS" "$OTHERHOST_HEARTBEAT_STATUS" outtxt "cat /proc/drbd" "$CURRENTHOST_DRBD_DETAILED" "$OTHERHOST_DRBD_DETAILED" outtxt "df -h" "$CURRENTHOST_DISK" "$OTHERHOST_DISK" else echo "此腳本只支持centos6和7" fi #在導出的heartbeat狀態文件中查找指定關鍵字的函數 function server_code() { cat $1 | egrep "$2" >/dev/null 2>&1 reslut=$? if [ $reslut -eq 0 ] then output_notify "$3" return 0 else output_error "$4" return 1 fi } #在導出的disk文件中查找指定關鍵字的函數 function disk_mount_code() { cat $1 | egrep "$2" >/dev/null 2>&1 reslut=$? if [ $reslut -eq 0 ] then output_notify "$3" return 0 else output_notify "$4" return 1 fi } #drbd的主從狀態函數 function drbd_status() { currenthost_drbd=`cat $CURRENTHOST_DRBD_DETAILED | grep "ro:"|awk -F" " '{print $3}'` otherhost_drbd=`cat $OTHERHOST_DRBD_DETAILED | grep "ro:"|awk -F" " '{print $3}'` if ([[ $currenthost_drbd = "ro:Secondary/Primary" ]] && [[ $otherhost_drbd = "ro:Primary/Secondary" ]]) || ([[ $currenthost_drbd = "ro:Primary/Secondary" ]] && [[ $otherhost_drbd = "ro:Secondary/Primary" ]]) then output_notify "drbd主從狀態正常" return 0 else output_error "drbd主從狀態異常,請詳細檢查或參考$DRBD_HALOG日誌" return 1 fi } #drbd的同步狀態函數 function drbd_status_update() { currenthost_drbd_update=`cat $CURRENTHOST_DRBD_DETAILED | grep "ro:"|awk -F" " '{print $4}'` otherhost_drbd_update=`cat $OTHERHOST_DRBD_DETAILED | grep "ro:"|awk -F" " '{print $4}'` if [[ $currenthost_drbd_update = "ds:UpToDate/UpToDate" ]] && [[ $otherhost_drbd_update = "ds:UpToDate/UpToDate" ]] then output_notify "drbd同步狀態正常" return 0 else output_error "drbd同步狀態異常,請詳細檢查或參考$DRBD_HALOG日誌" return 1 fi } #判斷兩臺服務器heartbeat運行情況 check_os if [ $SYS_RELEASE = el6 ] then server_code "$CURRENTHOST_HEARTBEAT_STATUS" "is running" "當前服務器heartbeat服務運行正常" "當前服務器heartbeat服務異常,請檢查" currenthost_heartbeat_code=$? server_code "$OTHERHOST_HEARTBEAT_STATUS" "is running" "另一臺服務器heartbeat服務運行正常" "另一臺服務器heartbeat服務異常,請檢查" otherhost_heartbeat_code=$? elif [ $SYS_RELEASE = el7 ] then server_code "$CURRENTHOST_HEARTBEAT_STATUS" "Active.*active.*running" "當前服務器heartbeat服務運行正常" "當前服務器heartbeat服務異常,請檢查" currenthost_heartbeat_code=$? server_code "$OTHERHOST_HEARTBEAT_STATUS" "Active.*active.*running" "另一臺服務器heartbeat服務運行正常" "另一臺服務器heartbeat服務異常,請檢查" otherhost_heartbeat_code=$? else echo "此腳本只支持centos6和7" fi if [ $currenthost_heartbeat_code -eq 0 ] && [ $otherhost_heartbeat_code -eq 0 ] then echo_colour 1 "恭喜,兩臺服務器heartbeat服務均運行正常" else output_error "heartbeat服務異常,請詳細檢查或參考$DRBD_HALOG日誌" sendmail "heartbeat服務異常,詳細見郵件正文" [email protected] $DRBD_HALOG fi #判斷兩臺服務器掛載磁盤情況 disk_mount_code "$CURRENTHOST_DISK" "$MOUNTDIR" "當前服務器$MOUNTDIR掛載了" "當前服務器$MOUNTDIR沒掛載" currenthost_disk_code=$? disk_mount_code "$OTHERHOST_DISK" "$MOUNTDIR" "另一臺服務器$MOUNTDIR掛載了" "另一臺服務器$MOUNTDIR沒掛載" otherhost_disk_code=$? if ([ $currenthost_disk_code -eq 0 ] && [ $otherhost_disk_code -eq 1 ]) || ([ $currenthost_disk_code -eq 1 ] && [ $otherhost_disk_code -eq 0 ]) then echo_colour 1 "恭喜,$MOUNTDIR掛載正常" else output_error "$MOUNTDIR掛載異常,請詳細檢查或參考$DRBD_HALOG日誌" sendmail "$MOUNTDIR掛載異常,詳細見郵件正文" [email protected] $DRBD_HALOG fi #判斷兩臺服務器drbd運行情況 drbd_status drbd_status_code=$? drbd_status_update drbd_status_update_code=$? if [ $drbd_status_code -eq 0 ] && [ $drbd_status_update_code -eq 0 ] then echo_colour 1 "恭喜,兩臺服務器drbd運行正常" else output_error "drbd運行不正常,請詳細檢查或參考$DRBD_HALOG日誌" sendmail "drbd服務異常,詳細見郵件正文" [email protected] $DRBD_HALOG fi
運行結果:
heartbeat+drbd監控腳本(自動發現問題,並郵件通知)