1. 程式人生 > >saltstack一些簡單總結--利用saltstack的event實現自己的功能(2)

saltstack一些簡單總結--利用saltstack的event實現自己的功能(2)

saltstack的master上minion連線較多,下面這個程式可以分析哪些minion任務執行成功,哪些執行失敗以及哪些沒有返回

import salt.utils.event
import re
import signal, time
import sys
import os
def single_handler(target):
    os.execl('/usr/bin/salt', 'salt', target, 'state.sls', 'os')
    
def handler(num1, num2):
    #signal.signal(signal.SIGCLD,signal.SIG_IGN)
    print 'We are in signal handler'
    print 'Job Not Ret: '+str(record[jid])
    print ' Job Failed: '+str(failedrecord[jid])
    print 'all done...'
    for item in failedrecord[jid]:
        #print item
        try:
           pid  = os.fork()
           if pid == 0:
              single_handler(item)
        except OSError:
           print 'we exec. '+ item +' error!'
    for item in record[jid]:
        #print item
        try:
           print 'fork ok ' + item
           pid = os.fork()
           if pid == 0 :
              single_handler(item)
        except OSError:
           print 'we exec. '+item + ' error!' 
    sys.stdout.flush()
    os._exit(0)



fd = open('/tmp/record', 'w+')
#sys.stdout = fd
#sys.stderr = fd

signal.signal(signal.SIGCLD, handler)

#fd = open('/var/log/record', 'w+')
os.dup2(fd.fileno(), sys.stdout.fileno())
os.dup2(fd.fileno(), sys.stderr.fileno())

#sys.stdout = fd
#sys.stderr = fd


try:
   pid = os.fork()
   if pid == 0:
      time.sleep(2)
      try:
         os.execl('/usr/bin/salt', 'salt', '*', 'state.sls', 'os')
      except OSError:
         print 'exec error!'
         os._exit(1)
except OSError:
   print 'first fork error!'
   os._exit(1)
event = salt.utils.event.MasterEvent('/var/run/salt/master')
flag=False
reg=re.compile('salt/job/([0-9]+)/new')
reg1=reg
#a process to exec. command, but will sleep some time
#another process listen the event
#if we use this method, we can filter the event through func. name
record={}
failedrecord={}
jid = 0


#try:
for eachevent in event.iter_events(tag='salt/job',full=True):
    eachevent=dict(eachevent)
    result = reg.findall(eachevent['tag'])
    if not flag and result:
       flag = True
       jid = result[0]
       print "   job_id: " + jid
       print "  Command: " + dict(eachevent['data'])['fun'] + ' ' + str(dict(eachevent['data'])['arg'])
       print "    RunAs: " + dict(eachevent['data'])['user'] 
       print "exec_time: " + dict(eachevent['data'])['_stamp'] 
       print "host_list: " + str(dict(eachevent['data'])['minions'])
       sys.stdout.flush()
       record[jid]=eachevent['data']['minions']
       failedrecord[jid]=[]
       reg1 = re.compile('salt/job/'+jid+'/ret/([0-9.]+)')
    else:
       result = reg1.findall(eachevent['tag'])
       if result:
          record[jid].remove(result[0])
          if not dict(eachevent['data'])['success']:
             failedrecord[jid].append(result[0])
#except:
#   print 'we in except'
"""
   print 'Job Not Ret: '+str(record[jid])
   print ' Job Failed: '+str(failedrecord[jid])
   for item in failedrecord[jid]:
       os.system('salt '+ str(item) + ' state.sls os')
   for item in record[jid]:
       os.system('salt '+ str(item) + ' state.sls os')
   os._exit(0)
"""

執行完一遍後:
   job_id: 20151208025319005896
  Command: state.sls ['os']
    RunAs: root
exec_time: 2015-12-08T02:53:19.006284
host_list: ['172.18.1.212', '172.18.1.214', '172.18.1.213', '172.18.1.211']
172.18.1.213:
----------
          ID: configfilecopy
    Function: file.managed
        Name: /root/node3
      Result: True
     Comment: File /root/node3 is in the correct state
     Started: 02:53:19.314015
    Duration: 13.033 ms
     Changes:   
----------
          ID: commonfile
    Function: file.managed
        Name: /root/commonfile
      Result: True
     Comment: File /root/commonfile is in the correct state
     Started: 02:53:19.327173
    Duration: 1.993 ms
     Changes:   

Summary
------------
Succeeded: 2
Failed:    0
------------
Total states run:     2
172.18.1.212:
----------
          ID: configfilecopy
    Function: file.managed
        Name: /root/node2
      Result: True
     Comment: File /root/node2 is in the correct state
     Started: 02:53:19.337325
    Duration: 8.327 ms
     Changes:   
----------
          ID: commonfile
    Function: file.managed
        Name: /root/commonfile
      Result: True
     Comment: File /root/commonfile is in the correct state
     Started: 02:53:19.345787
    Duration: 1.996 ms
     Changes:   

Summary
------------
Succeeded: 2
Failed:    0
------------
Total states run:     2
172.18.1.211:
----------
          ID: configfilecopy
    Function: file.managed
        Name: /root/node1
      Result: True
     Comment: File /root/node1 is in the correct state
     Started: 02:53:19.345017
    Duration: 12.741 ms
     Changes:   
----------
          ID: commonfile
    Function: file.managed
        Name: /root/commonfile
      Result: True
     Comment: File /root/commonfile is in the correct state
     Started: 02:53:19.357873
    Duration: 1.948 ms
     Changes:   

Summary
------------
Succeeded: 2
Failed:    0
------------
Total states run:     2
172.18.1.214:
    Minion did not return. [Not connected]
We are in signal handler
Job Not Ret: ['172.18.1.214']
 Job Failed: []
all done...
fork ok 172.18.1.214
172.18.1.214:
    Minion did not return. [Not connected]
一: 最先打印出本次任務的job id、command name以及其它相關資訊,然後是本次任務的執行流程和結果,這和我們單獨執行這個命令是一致的。最後程式會打印出所有未成功的任務和未返回的任務,並且重新執行一遍。 這裡要說明的是,因為沒有檢視對應的情景,對於失敗任務的排判斷做的不好,另外minion未連線我也歸為任務未返回,並且會再執行一遍,實際上如果是minion未連線,則不應該執行。

二: 程式我們先派生子程序去執行salt命令,再salt命令執行完畢後,我們的程式會對其中失敗的和未返回的minion任務二次執行