1. 程式人生 > >開源監控解決方案:ICINGA(Nagios)監控Linux

開源監控解決方案:ICINGA(Nagios)監控Linux

ios mat lin plugins linu rmi roc usm lib

[root@king02 ~]# useradd nagios [root@king02 ~]# tar zxvf nagios-plugins-2.2.1.tar.gz [root@king02 ~]# cd nagios-plugins-2.2.1 [root@king02 nagios-plugins-2.2.1]# ./configure --prefix=/usr/local/nagios --with-cgiurl=/nagios/cgi-bin --with-nagios-user=nagios --with-nagios-group=nagios [root@king02 nagios-plugins-2.2.1]# make [root@king02 nagios-plugins-2.2.1]# make install [root@king02 ~]# yum install -y xinetd [root@king02 ~]# tar zxvf nrpe-3.2.1.tar.gz [root@king02 ~]# cd nrpe-3.2.1 [root@king02 nrpe-3.2.1]# ./configure --prefix=/usr/local/nagios --enable-ssl [root@king02 nrpe-3.2.1]# make all [root@king02 nrpe-3.2.1]# make install [root@king02 nrpe-3.2.1]# make install-plugin [root@king02 nrpe-3.2.1]# make install-daemon [root@king02 nrpe-3.2.1]# make install-config [root@king02 nrpe-3.2.1]# make install-inetd [root@king02 ~]# vi /etc/xinetd.d/nrpe # default: off # description: NRPE (Nagios Remote Plugin Executor) service nrpe { disable = no socket_type = stream port = 5666 wait = no user = nagios group = nagios server = /usr/local/nagios/bin/nrpe server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd only_from = 192.168.1.201 log_on_success = } [root@king02 ~]# vi /etc/services nrpe 5666/tcp # nagios [root@king02 ~]# yum -y install perl-Time-HiRes [root@king02 ~]# rpm -ivh perl-UNIVERSAL-require-0.13-1.el6.rf.noarch.rpm [root@king02 ~]# tar zxvf Sys-Statistics-Linux-0.66.tar.gz [root@king02 ~]# cd Sys-Statistics-Linux-0.66 [root@king02 Sys-Statistics-Linux-0.66]# perl Makefile.PL [root@king02 Sys-Statistics-Linux-0.66]# make [root@king02 Sys-Statistics-Linux-0.66]# make install [root@king02 ~]# vi /usr/local/nagios/etc/nrpe.cfg allowed_hosts=192.168.1.201 command[check_cpu]=/usr/local/nagios/libexec/check_linux_stats.pl -C -w 50 -c 80 -s 5 command[check_memory]=/usr/local/nagios/libexec/check_memory.py -w 20 -c 10 command[check_disk]=/usr/local/nagios/libexec/check_disk.pl -w 20 -c 10 command[check_network]=/usr/local/nagios/libexec/check_linux_stats.pl -N -w 1048576 -c 2097152 -p eth0 command[check_uptime]=/usr/local/nagios/libexec/check_linux_stats.pl -U -w 5 [root@king02 ~]# cd /usr/local/nagios/libexec [root@king02 libexec]# chmod a+x check_linux_stats.pl [root@king02 libexec]# chmod a+x check_memory.py [root@king02 libexec]# chmod a+x check_disk.pl [root@king02 libexec]# ./check_linux_stats.pl -C -w 50 -c 80 -s 5 CPU OK : idle 97.79% |idle=97.79%;50;80 user=0.00% system=0.00% iowait=2.21% steal=0.00% [root@king02 ~]# /etc/init.d/xinetd start Starting xinetd: [ OK ] [root@king02 ~]# netstat -tunlp | grep 5666 tcp 0 0 :::5666 :::* LISTEN 2409/xinetd


[root@king01 ~]# cd /usr/local/icinga/etc/objects/
[root@king01 objects]# vi linux.cfg  --主機

# define a host 
define host{
        use                     linux-server            
        host_name               sales_zx
        alias                   sales_zx
        icon_image              redhat.gif
        statusmap_image         redhat.gd2
        address                 192.168.1.202
        }
        
# define an hostgroup 
define hostgroup{
        hostgroup_name  sales-servers
        alias           sales-servers
        members         sales_zx
        }
        
# define an servicegroup
define servicegroup {
        servicegroup_name  ping
        alias   ping
        }
define servicegroup {
        servicegroup_name  cpu
        alias   cpu
        }
define servicegroup {
        servicegroup_name  memory
        alias   memory
        }
define servicegroup {
        servicegroup_name  disk
        alias   disk
        }
define servicegroup {
        servicegroup_name  network
        alias   network
        }
define servicegroup {
        servicegroup_name  uptime
        alias   uptime
        }
        
# Define a service
define service{
        hostgroup_name                  sales-servers                       
        use                             generic-service         
        service_description             alive
        servicegroups                   ping
        check_command                   check_ping!100.0,20%!500.0,60%
        }
define service{
        hostgroup_name                  sales-servers                       
        use                             generic-service         
        service_description             os cpu usage
        servicegroups                   cpu
        check_command                   check_nrpe!check_cpu
        }
define service{
        hostgroup_name                  sales-servers                       
        use                             generic-service        
        service_description             os memory usage
        servicegroups                   memory
        check_command                   check_nrpe!check_memory
        }
define service{
        hostgroup_name                  sales-servers                        
        use                             generic-service        
        service_description             os disk usage
        servicegroups                   disk
        check_command                   check_nrpe!check_disk
        }
define service{
        hostgroup_name                  sales-servers                        
        use                             generic-service         
        service_description             os network usage
        servicegroups                   network
        check_command                   check_nrpe!check_network
        }
define service{
        hostgroup_name                  sales-servers                       
        use                             generic-service        
        service_description             os uptime
        servicegroups                   uptime
        check_command                   check_nrpe!check_uptime
        }
        
[root@king01 objects]# vi commands.cfg --命令

# 'check_nrpe' command definition
define command{
        command_name    check_nrpe
        command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 30
        }     
           
[root@king01 objects]# vi templates.cfg --模板

define host{
        name                            linux-server    ; The name of this host template
        use                             generic-host    ; This template inherits other values from the generic-host template
        check_period                    24x7            ; By default, Linux hosts are checked round the clock
        check_interval                  1               ; Actively check the host every 5 minutes
        retry_interval                  1               ; Schedule host check retries at 1 minute intervals
        max_check_attempts              2               ; Check each Linux host 10 times (max)
        check_command                   check-host-alive ; Default command to check Linux hosts
        notification_period             workhours       ; Linux admins hate to be woken up, so we only notify during the day
                                                        ; Note that the notification_period variable is being overridden from
                                                        ; the value that is inherited from the generic-host template!
        notification_interval           120             ; Resend notifications every 2 hours
        notification_options            d,u,r           ; Only send notifications for specific host states
        contact_groups                  admins          ; Notifications get sent to the admins by default
        register                        0               ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
        }


define service{
        name                            generic-service         ; The 'name' of this service template
        active_checks_enabled           1                       ; Active service checks are enabled
        passive_checks_enabled          1                       ; Passive service checks are enabled/accepted
        parallelize_check               1                       ; Active service checks should be parallelized (disabling this can lead to major performance problems)
        obsess_over_service             1                       ; We should obsess over this service (if necessary)
        check_freshness                 0                       ; Default is to NOT check service 'freshness'
        notifications_enabled           1                       ; Service notifications are enabled
        event_handler_enabled           1                       ; Service event handler is enabled
        flap_detection_enabled          1                       ; Flap detection is enabled
        failure_prediction_enabled      1                       ; Failure prediction is enabled
        process_perf_data               1                       ; Process performance data
        retain_status_information       1                       ; Retain status information across program restarts
        retain_nonstatus_information    1                       ; Retain non-status information across program restarts
        is_volatile                     0                       ; The service is not volatile
        check_period                    24x7                    ; The service can be checked at any time of the day
        max_check_attempts              2                       ; Re-check the service up to 3 times in order to determine its final (hard) state
        check_interval                  1                       ; Check the service every 10 minutes under normal conditions
        retry_interval                  1                       ; Re-check the service every two minutes until a hard state can be determined
        contact_groups                  admins                  ; Notifications get sent out to everyone in the 'admins' group
        notification_options            w,u,c,r                 ; Send notifications about warning, unknown, critical, and recovery events
        notification_interval           60                      ; Re-notify about service problems every hour
        notification_period             24x7                    ; Notifications can be sent out at any time
         register                        0                      ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
        }




開源監控解決方案:ICINGA(Nagios)監控Linux