|
dave spink toolset |
|
NETSAINT: |
|||
CONFIG | EXAMPLE |
THE PROJECTThe objective is to have pro-active system monitoring combined with accurate "operation processes" to ensure the correct person is called to resolve the issue. The open source software Nagios (formerly NetSaint) provides a framework for monitoring your environment. Our company started with 85 SUN servers and 661 services actively monitored. If you consider cpu load, memory performance, swap utilisation, disk performance and disk utilisation per server, that counts for 425 services. The remaining services are scripts (applications, databases instances, other hardware, system configuration) we have created in order to ensure the environment is properly monitored. CONFIGServer configuration files are located in "/usr/local/netsaint/etc". ./netsaint.cfg ;passed in startup - contains logs, contacts, hosts.cfg, command.cfg ./hosts.cfg ;check_nrpe $HOSTADDRESS$ -c check_disk.sh -to 60 ./command.cfg ;check_disk -w 85% -c 95% -p $ARG1$ Client configuration files are "/etc/services" and "nrep.cfg". /etc/services ;contains entry "nrpe 5666/tcp" /usr/local/netsaint/nrpe.cfg ;defines what can be run by this client /opt/netsaint-plugins-1.2.9.4 ;netsaint plugins /opt/nrpe-1.2.5 ;netsaint daemon Server daemon starts by reading the configuration file. /usr/local/netsaint/bin/netsaint -d /usr/local/netsaint/etc/netsaint.cfg See an extract of /usr/local/netsaint/etc/hosts.cfg generic entries. command[check_nrpe_cpu]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_load -to 60 command[check_nrpe_iostat.se]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_iostat.se -to 60 command[check_nrpe_swap]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_swap -to 60 command[check_nrpe_vmstat.se]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_vmstat.se -to 60 command[check_nrpe_disk.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_disk.sh -to 60 # HOSTADDRESS is the DNS name passed from service entry See an extract of /usr/local/netsaint/etc/hosts.cfg custom entries. command[check_galleon_uat]=/app/galleon/check_galleon_uat command[check_galleon_prod]=/app/galleon/check_galleon_prod command[check_mqm_log.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_mqm_log.sh -to 60 command[check_ftp_d1pr0150.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ftp_d1pr0150.sh -to 60 command[check_ftp_d1pr0152.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ftp_d1pr0152.sh -to 60 command[check_ftp_bloomberg.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ftp_bloomberg.sh -to 60 command[check_ftp_wachovia.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ftp_wachovia.sh -to 60 command[check_ftp_deutsche.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ftp_deutsche.sh -to 60 command[check_nrpe_raid]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_raid -to 60 command[check_nrpe_raid3310]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_3310 -to 60 command[check_nrpe_ehub.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ehub.sh -to 60 command[check_ehub_outbound.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ehub_outbound.sh -to 60 command[check_raid_d1de0001.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_raid_d1de0001.sh -to 60 command[check_system.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_system.sh -to 60 # /usr/local/netsaint/libexec/check_nrpe indicates this service will be run remotely on the host itself # /app/galleon/check_galleon_prod indicates this service is executed from the NetSaint Server itself See an extract of /usr/local/netsaint/etc/hosts.cfg service entries. service[d1de0001]=CPU Load;0;24x7;3;10;5;Unix-Admins;0;none;0;0;0;;check_nrpe_cpu service[d1de0001]=Memory Performance;0;24x7;3;10;5;Unix-Admins;0;none;0;0;0;;check_nrpe_vmstat.se service[d1de0001]=Swap Utilization;0;24x7;3;10;5;Unix-Admins;0;none;0;0;0;;check_nrpe_swap service[d1de0001]=Disk Performance;0;24x7;3;10;5;Unix-Admins;0;none;0;0;0;;check_nrpe_iostat.se service[d1de0001]=Disk Utilization;0;24x7;3;10;5;Unix-Admins,DB-Admins;240;24x7;1;1;1;;check_nrpe_disk.sh service[d1de0001]=SHAPE Client;0;24x7;3;360;1;Unix-Admins;1440;24x7;1;1;1;;check_nrpe_orca.sh service[d1de0001]=Oracle CFM;0;24x7;3;5;1;Unix-Admins,DB-Admins;240;24x7;1;1;1;;check_nrpe_oracle_appd01.sh service[d1de0001]=Oracle Oceans;0;24x7;3;5;1;Unix-Admins,DB-Admins;240;24x7;1;1;1;;check_nrpe_oracle_utld01.sh service[d1de0001]=Oracle Pricing;0;24x7;3;5;1;Unix-Admins,DB-Admins;240;24x7;1;1;1;;check_nrpe_oracle_prcd01.sh service[d1de0001]=Oracle Remedy;0;24x7;3;5;1;Unix-Admins,DB-Admins;240;24x7;1;1;1;;check_nrpe_oracle_remd01.sh service[d1de0001]=Oracle Enterprise Manager;0;24x7;3;5;1;Unix-Admins,DB-Admins;240;24x7;1;1;1;;check_nrpe_oracle_eval01.sh service[d1de0001]=A1000 RAID Controller;0;24x7;3;10;5;Unix-Admins;240;24x7;1;1;1;;check_nrpe_raid_d1de0001.sh service[d1de0001]=System Config;0;24x7;1;1440;1;Unix-Admins;1440;24x7;1;1;1;;check_system.sh service[d1de0001]=Check VX;0;24x7;3;10;5;Unix-Admins;240;24x7;1;1;1;;check_vx See an extract of /usr/local/netsaint/etc/hosts.cfg host entries. host[d1de0001]=d1de0001;d1de0001.cpships.com;;check_ping;3;240;24x7;1;1;1; # check_ping command is defined in command.cfg, hosts.cfg and command.cfg are really same thing Client Server Communication EXAMPLEScenario - Adding an FTP Monitoring Script. Create a script on the NetSaint server for checking FTP login is working. # telnet d1pr0102 # cd /usr/local/netsaint/libexec # cat check_ftp_d1ua0007.sh #!/bin/sh URL1="a href=\"/netsaint/resolution/ftp.htm\" URL2=" (Click for Resolution)" # The .netrc file is used for username and password /usr/bin/ftp 199.172.169.3 | grep bin > /dev/null if [ $? = 0 ] then echo "FTP login working" exit 0 else echo "$URL1 FTP Not Working $URL2" exit 2 fi Modify the server hosts.cfg file and add the required entries. # telnet d1pr0102 # cd /usr/local/netsaint/etc # cp hosts.cfg hosts.cfg-Sep20 # vi hosts.cfg #add to custom commands section command[check_ftp_wachovia.sh]=/usr/local/netsaint/libexec/check_nrpe $HOSTADDRESS$ -c check_ftp_wachovia.sh -to 60 #add to host entries host[d1ua0007]=d1ua0007;d1ua0007.cpships.com;;check_ping;3;240;24x7;1;1;1; #add to hostgroup entries hostgroup[tpa-uat-servers]=Tampa UAT Servers;Unix Admins;d1ua0001,d1ua0002,d1ua0003,d1ua0004,d1ua0005,d1ua0006,d1ua0007 #add to service entries service[d1ua0007]=FTP Wachovia Service;0;24x7;3;10;5;Unix-Admins;240;24x7;0;0;0;;check_ftp_wachovia.sh Copy the server script to client where the script will be routinely executed. # ftp d1ua0007.cpships.com ftp> cd /tmp ftp> put check_ftp_d1ua0007.sh # telnet d1ua0007 # cd /usr/local/netsaint/libexec # mv /tmp/check_ftp_d1ua0007.sh . # chown root:other check_ftp_d1ua0007.sh # chmod 755 check_ftp_d1ua0007.sh Create a client /export/home/netsaint/.netrc file to better protect the username and password. Set the file to be owned by "netsaint" user. # telnet d1ua0007 # vi .netrc machine 199.172.169.3 login dl348214 password CPfXXX-l macdef init ls quit # chown netsaint:other # chmod 600 .netrc Modify client nrpe.cfg configuraton file. # telnet d1ua0007 # cd /usr/local/netsaint # cp nrpe.cfg nrpe.cfg-Sep15 # vi nrpe.cfg # add following line command[check_ftp_d1ua0007.sh]=/usr/local/netsaint/libexec/check_ftp_d1ua0007.sh Restart the Netsaint Server to apply the configuration change. # telnet d1pr0102 # /etc/init.d/netsaint restart Test the configuration from the NetSaint Server command line. # /usr/local/netsaint/libexec/check_nrpe d1ua0007 -c check_ftp_d1ua0007.sh -to 60 FTP login working |