Introduction
Articles
Snapshots
Links
Books
Commerce
Outdoors
Me :)

Your comments & suggestions may be mailed to:
archeryring@nm.ru

Copyright ї 1998 Taras Plakhotnichenko
Most recent revision 11 July 2011


стрельба из лука в России

The first archery dedicated WEB site in Russia




Мониторим  логи удаленного сервиса с помощью Nagios'а  на предмет ключевых фраз
Cisco ASA active VPN sessions monitoring with Nagios
Мониторим сервер через iLO
Мониторим удаленный хост по snmp через сервер-посредник (так бывает, если он закрыт из нашей сети, например)
Мониторим свободное место. Скрпиптик не мой, правда, но работает замечательно
Извлекаем системную информацию об удаленных win серверах в вики (на серверах установлен клиент freeSSHd)
Парсим ини-файл типа [название секции] параметр=значение
Пример графика RRDTool
Пример графика GNUPlot
Выполняем удаленный батник через нагиосовский check_nrpe (www cgi)
Проверим есть ли в последних 1000 строках удаленного лога ключевая фраза, и возвратив код ошибки нагиосу:
Посылаем почту в случае ключевой фразы в логе (шаблон):
Мониторим логи удаленного сервиса с помощью Nagios'а на предмет ключевых фраз. Секции в Nagios: define service{
use generic-service
host_name remote host IP
service_description SERVICE - SOME KEY STRING
check_command check_alert4!'SOME KEY STRING'
check_period 1000-2359
} define command{
command_name check_alert4
command_line $USER1$/check_alert4.pl -H $HOSTADDRESS$ -P 5108 -S $ARG1$
}

Скрипт check_alert4:
#!/usr/bin/perl -w
use strict;
use warnings;
use IO::Select;
use IO::Socket;
use Getopt::Long;
use vars qw($opt_V $opt_h $opt_w $opt_c $opt_H $opt_C $PROGNAME $opt_P $opt_S);
use lib "/usr/local/nagios/libexec" ;
use utils qw(%ERRORS &print_revision &support &usage);
$PROGNAME = "check_alert4";
#define service{
# use generic-service
# host_name host IP
# service_description STRING_SERVICE
# check_command check_alert4!'String to be monitored'
# check_period 1000-2359
# }
#define command{
# command_name check_alert4
# command_line $USER1$/check_alert4.pl -H $HOSTADDRESS$ -P 5108 -S $ARG1$
# }
sub print_help ();
sub print_usage ();
$ENV{'PATH'}='';
$ENV{'BASH_ENV'}='';
$ENV{'ENV'}='';
Getopt::Long::Configure('bundling');
GetOptions
("V" => \$opt_V, "version" => \$opt_V,
"h" => \$opt_h, "help" => \$opt_h,
"w=s" => \$opt_w, "warning=s" => \$opt_w,
"c=s" => \$opt_c, "critical=s" => \$opt_c,
"H=s" => \$opt_H, "hostname=s" => \$opt_H,
"P=s" => \$opt_P, "port=s" => \$opt_P,
"S=s" => \$opt_S, "string=s" => \$opt_S);

if ($opt_h) {print_help(); exit $ERRORS{'OK'};}
($opt_H) || usage("Host name/address not specified\n");#if ($opt_h) {print_help(); exit $ERRORS{'OK'};}
($opt_P) || usage("Port not specified\n");
#if ($opt_S) {print_help(); exit $ERRORS{'OK'};}
($opt_S) || usage("String not specified\n");my $host = $1 if ($opt_H =~ /([-.A-Za-z0-9]+)/);
($host) || usage("Invalid host: $opt_H\n");
#sending messgage to remote host
my $sock=new IO::Socket::INET->new(PeerPort=>$opt_P, Proto=>'tcp', PeerAddr=>$opt_H, Timeout => "4");
if (!($sock))
{
print "Can't get data from alert4 - SOCKET TIMEOUT";
exit $ERRORS{'CRITICAL'};
}
my $select = new IO::Select($sock);
#my $select = = IO::Select->new();
$sock->send("$opt_S");
my $buffer = '';
$SIG{'ALRM'} = sub { die 'Timeout' };
alarm(2);
eval
{
sysread($sock,$buffer,1024);
if ($buffer)
{
if ($buffer eq 'OK')
{
print "It' OK. We got $buffer.";
exit $ERRORS{'OK'};
} elsif ($buffer eq $opt_S)
{
print "We got $buffer.";
exit $ERRORS{'CRITICAL'};
}
else
{
exit $ERRORS{'UNKNOWN'};
}
}
else
{
print "couldn't get a reply";
exit $ERRORS{'UNKNOWN'};
}
alarm(0);
};


sub print_usage () {
print "Usage: $PROGNAME -H <host> -P <port> -S <string>\n";
}
sub print_help () {
print "This plugin reports remote service log ";
print_usage();
print "
-H, --hostname=HOST
Name or IP address of host to check
-P --port=PORT
Port number
-w, --warning=INTEGER
Percentage strength below which a WARNING status will result
-c, --critical=INTEGER
Percentage strength below which a CRITICAL status will result
";
support();
}

Собственно сама мониторилка на удаленном хосте:
#!/usr/bin/perl
use strict;
use warnings;
use IO::Socket;
use threads;
use threads::shared;
#my $TIME=`date +%M`;
my $TIME=time();
my $SUBJECT :shared ='';
my $file = "alert_buffer.txt";
my @bigstring = ();
my $COUNT=0;
my $date_current=`date +%Y%m%d`;
chomp($date_current);
#------------------------------------------
my $srv = threads->new(\&server);
sub server {
my $buffer;
my ($peer_address,$peer_port);
my $sock=new IO::Socket::INET->new(LocalHost => 'localhost', LocalPort => '5108',Proto => 'tcp', Listen => 1, Reuse => 1) or die "Can't create socket: $!";
my $recieved_data='';

while(1) {
my $client_sock = $sock->accept();
my $peer_address = $client_sock->peerhost();
my $peer_port = $client_sock->peerport();
print "Accepted New Client Connection From : $peer_address, $peer_port\n";
$client_sock->recv($buffer,1024);

if ($SUBJECT)
{
$client_sock->send("$SUBJECT");
}
else
{
$client_sock->send("OK");
}
}
}

#------------------------------------------


open my $pipe, "-|", "/usr/bin/tail","-n1","-f", "/log/log_file_" . "$date_current" . ".log" or die "could not start tail on file.log: $!";

#reset the buffer for a less log activity otherwise you would wait too long
$| = 1;

while (<$pipe>)
{
if ($_=~m/SOME KEY STRING|ANOTHER KEY STRING/i)
{
$COUNT++;
push(@bigstring,$_);

if ($_=~m/SOME KEY STRING/i)
{
$SUBJECT = "SOME KEY STRING";

}
elsif ($_=~m/ANOTHER KEY STRING/i)
{
$SUBJECT = "ANOTHER KEY STRING";
}

if ((time() - $TIME ) > 120 )
{
open SENDMAIL, "|/usr/sbin/sendmail -t"
or die "$0: fatal: could not open sendmail: $!\n";
print SENDMAIL "To: ts_osl\@open.ru\n";
print SENDMAIL "Subject: $COUNT warning messages $SUBJECT - SERVER NAME\n";
print SENDMAIL "@bigstring\n";
close SENDMAIL;

print "@bigstring" . " $COUNT warning messages $SUBJECT - SERVER NAME\n";
$COUNT=0;
$#bigstring = -1;
$TIME=time();
$SUBJECT='';
}
}

};
close $pipe;


Cisco ASA active VPN sessions monitoring with Nagios:

#!/usr/bin/perl -w
# This check is intended to show up/down status for
# a site-to-site VPN on a Cisco ASA firewall.
#
# The next 4 lines should go in your checkcommands.cfg file:
# define command{
# command_name check_asa_l2lvpn
# command_line $USER1$/check_asa_l2lvpn $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$
# }
#
# You might put somethign like this in your Nagios service definition:
# check_command check_asa_l2lvpn!mySecret!10.15.14.1!salesoffice
#
# Where mySecret is your community string, 10.15.14.1 is the VPN peer IP,
# and salesoffice is the friendly name. The friendly name is used for the
# service_description, which would be VPN-salesoffice in this example.
#
# http://www.barkingseal.com/2009/08/monitoring-site-to-site-vpns-on-a-cisco-asa/
#
# Sun Aug 2 11:53:17 MDT 2009
# Version 1.0
#
#
unless (($#ARGV == 2) or ($#ARGV == 3)) { print("usage:\tcheck_asa_l2lvpn <IP address> <community> <peer IP> [friendlyname]\n"); exit(1);}
$IP = $ARGV[0];
$community = $ARGV[1];
$peerip = $ARGV[2];
if (defined($ARGV[3])) { $friendlyname = $ARGV[3]; }$uptunnels = `/usr/bin/snmpwalk -v1 -c $community $IP 1.3.6.1.4.1.9.9.171.1.2.3.1.7`;$state = "CRIT";
$msg = "Site-to-site VPN tunnel to peer ".$peerip." is down!";
$output = "";
foreach (split("\n", $uptunnels)) {
if ($_ =~ /SNMPv2-SMI::enterprises.9.9.171.1.2.3.1.7.\d+ = STRING: "$peerip"/) {
$state = "OK";
$msg = "Site-to-site VPN tunnel to peer ".$peerip." is up.";
}
}
print "VPN-".$friendlyname." " . $state . " " . $msg . "|" . $output . "\n";
if ($state eq "OK") { exit 0;
} elsif ($state eq "WARN") { exit 1;
} elsif ($state eq "CRIT") { exit 2;
} else { #unknown!
exit 3;
}

define service{
use vpn-service
host_name HOST_NAME
service_description VPN CLIENT_PEER - CISCO_ASA_IP
check_command check_asa_l2lvpn!snmp_community!IP1!IP2
check_period 1030-2345
}


Мониторим сервер через iLO:

#!/usr/bin/perl
# check_ilo2_health.pl
# based on check_stuff.pl and locfg.pl
#
# Nagios plugin using the Nagios::Plugin module and the
# HP Lights-Out XML PERL Scripting Sample
# see http://h18013.www1.hp.com/support/files/lights-out/us/download/25057.html
# checks if all sensors are ok, returns warning on high temperatures and
# fan failures and critical on overall health failure
#
# Alexander Greiner-Baer <alexander.greiner-baer@web.de> 2007
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# Changelog:
# 1.44 Mon, 14 Dec 2009 20:11:37 +0100
# new option "--checkdrives"
# --
# 1.43 Mon, 17 Aug 2009 20:50:13 +0200
# new option "--fanredundancy"
#
# new option "--powerredundancy"
# --
# 1.42 Mon, 17 Aug 2009 12:52:23 +0100
# check power supply and fans redundancy
# gcivitella@enter.it
# --
# 1.41 Thu, 26 Jul 2007 17:42:36 +0200
# perfdata label ist now quoted
# --
# 1.4 Mon, 25 Jun 2007 09:45:52 +0200
# check vrm and power supply
#
# new option "--notemperatures"
#
# new option "--perfdata"
#
# some minor changes
# --
# 1.3beta Wed, 20 Jun 2007 09:57:46 +0200
# do some error checking
#
# new option "--inputfile"
# read bmc output from file
# --
# 1.2 Mon, 18 Jun 2007 09:33:17 +0200
# new option "--skipsyntaxerrors"
# ignores syntax errors in the xml output, maybe required by older firmwares
#
# introduce a date to the changelog ;)
# --
# 1.1 do not return warning if temperature status is n/a
#
# add "<LOCFG VERSION="2.21" />" to get rid of the
# "<INFORM>Scripting utility should be updated to the latest version.</INFORM>"
# message
# --
# 1 initial releaseuse strict;
use warnings;
use Nagios::Plugin;
use IO::Socket::SSL;
use XML::Simple;
use vars qw($VERSION $PROGNAME $verbose $warn $critical $timeout $result);
$VERSION = 1.43;
$PROGNAME = "check_ilo2_health";
# instantiate Nagios::Plugin
my $p = Nagios::Plugin->new(
usage => "Usage: %s [ -v|--verbose ] [-H <host>] [-t <timeout>]
[ -u|--user=<USERNAME> ] [ -p|--password=<PASSWORD> ]
[ -e|--skipsyntaxerrors=1 ] [ -f|--inputfile=<filename> ]
[ -a|--fanredundancy=1 ] [ -o|--powerredundancy=1 ]
[ -n|--notemperatures=1 ] [ -c|--checkdrives=1 ]
[ -d|--perfdata=1 ]",
version => $VERSION,
blurb => 'This plugin checks the health status on a remote iLO2 device
and will return OK, WARNING or CRITICAL. iLO2 (integrated Lights-Out 2)
can be found on HP ProLiant servers.'
);
# add all arguments
$p->add_arg(
spec => 'user|u=s',
help =>
qq{-u, --user=STRING
Specify the username on the command line.},
);
$p->add_arg(
spec => 'password|p=s',
help =>
qq{-p, --password=STRING
Specify the password on the command line.},
);
$p->add_arg(
spec => 'host|H=s',
help =>
qq{-H, --host=STRING
Specify the host on the command line.},
);
$p->add_arg(
spec => 'skipsyntaxerrors|e=i',
help =>
qq{-e, --skipsyntaxerrors=INTEGER
Setting to 1 skips syntax errrors on older firmwares. Default off.},
);
$p->add_arg(
spec => 'fanredundancy|a=i',
help =>
qq{-a, --fanredundancy=INTEGER
Setting to 1 checks fan redundancy. Default off.},
);
$p->add_arg(
spec => 'powerredundancy|o=i',
help =>
qq{-o, --powerredundancy=INTEGER
Setting to 1 checks power redundancy. Default off.},
);
$p->add_arg(
spec => 'notemperatures|n=i',
help =>
qq{-n, --notemperatures=INTEGER
Setting to 1 gives output without temperature listing. Default off.},
);
$p->add_arg(
spec => 'checkdrives|c=i',
help =>
qq{-c, --checkdrives=INTEGER
Setting to 1 tries to check drive bays. Default off.},
);
$p->add_arg(
spec => 'perfdata|d=i',
help =>
qq{-d, --perfdata=INTEGER
Setting to 1 adds perfdata to the output. Default off.},
);
$p->add_arg(
spec => 'inputfile|f=s',
help =>
qq{-f, --inputfile=STRING
Do not query the BMC. Read input from file.},
);
# parse arguments
$p->getopts;
my $return = "OK";
my $message;
my $xmlinput;
my $isinput = 0;
my $client;
my $host = $p->opts->host;
my $username = $p->opts->user;
my $password = $p->opts->password;
my $inputfile = $p->opts->inputfile;
my $skipsyntaxerrors = 0;
my $optfanredundancy = 0;
my $optpowerredundancy = 0;
my $notemperatures = 0;
my $optcheckdrives = 0;
my $perfdata = 0;
my %drives;
my $drive;
my $drivestatus;
# perform checking on command line options
if ( defined($p->opts->skipsyntaxerrors) ) {
if ( ( $p->opts->skipsyntaxerrors != 1 ) && ( $p->opts->skipsyntaxerrors != 0 ) ) {
$p->nagios_die( "ERROR: Invalid option supplied for the -e option. Use 0 or 1." );
}
if ( $p->opts->skipsyntaxerrors == 1 ) {
$skipsyntaxerrors = 1;
}
}
if ( defined($p->opts->fanredundancy) ) {
if ( ( $p->opts->fanredundancy != 1 ) && ( $p->opts->fanredundancy != 0 ) ) {
$p->nagios_die( "ERROR: Invalid option supplied for the -a option. Use 0 or 1." );
}
if ( $p->opts->fanredundancy == 1 ) {
$optfanredundancy = 1;
}
}
if ( defined($p->opts->powerredundancy) ) {
if ( ( $p->opts->powerredundancy != 1 ) && ( $p->opts->powerredundancy != 0 ) ) {
$p->nagios_die( "ERROR: Invalid option supplied for the -o option. Use 0 or 1." );
}
if ( $p->opts->powerredundancy == 1 ) {
$optpowerredundancy = 1;
}
}
if ( defined($p->opts->notemperatures) ) {
if ( ( $p->opts->notemperatures != 1 ) && ( $p->opts->notemperatures != 0 ) ) {
$p->nagios_die( "ERROR: Invalid option supplied for the -n option. Use 0 or 1." );
}
if ( $p->opts->notemperatures == 1 ) {
$notemperatures = 1;
}
}
if ( defined($p->opts->checkdrives) ) {
if ( ( $p->opts->checkdrives != 1 ) && ( $p->opts->checkdrives != 0 ) ) {
$p->nagios_die( "ERROR: Invalid option supplied for the -c option. Use 0 or 1." );
}
if ( $p->opts->checkdrives == 1 ) {
$optcheckdrives = 1;
}
}
if ( defined($p->opts->perfdata) ) {
if ( ( $p->opts->perfdata != 1 ) && ( $p->opts->perfdata != 0 ) ) {
$p->nagios_die( "ERROR: Invalid option supplied for the -d option. Use 0 or 1." );
}
if ( $p->opts->perfdata == 1 ) {
$perfdata = 1;
}
}
unless ( (defined($inputfile) ) || ( defined($host) && defined($username) && defined($password) ) ) {
$p->nagios_die("ERROR: Missing host, password and user.");
}
unless ( defined($inputfile) ) {
# query code from locfg.pl
# Set the default SSL port number if no port is specified
$host .= ":443" unless ($host =~ m/:/);
#
# Open the SSL connection and the input file
$client = new IO::Socket::SSL->new(PeerAddr => $host);
if (!$client) {
$p->nagios_exit(
return_code => "UNKNOWN",
message => "ERROR: Failed to establish SSL connection with $host."
);
}
# send xml to BMC
print $client '<?xml version="1.0"?>' . "\r\n";
print $client '<LOCFG VERSION="2.21" />' . "\r\n";
print $client '<RIBCL VERSION="2.21">' . "\r\n";
print $client '<LOGIN USER_LOGIN="'.$username.'" PASSWORD="'.$password.'">' . "\r\n";
print $client '<SERVER_INFO MODE="read">' . "\r\n";
print $client '<GET_EMBEDDED_HEALTH />' . "\r\n";
print $client '</SERVER_INFO>' . "\r\n";
print $client '</LOGIN>' . "\r\n";
print $client '</RIBCL>' . "\r\n";
}
else {
open($client, $inputfile) or $p->nagios_die("ERROR: $inputfile not found");
}
# retrieve data
while (my $line = <$client>) {
print $line if ( $p->opts->verbose );
# trash all unnecessary lines
if ( $line =~ m/<GET_EMBEDDED_HEALTH_DATA>/ ) {
$isinput=1;
}
if ( $line =~ m/<\/GET_EMBEDDED_HEALTH_DATA>/ ) {
$isinput=0;
$xmlinput .= $line;
}
# broken xml
if ( $line =~ m/<DRIVES>/ ) {
$isinput=0;
$xmlinput .= $line;
}
if ( $line =~ m/<\/DRIVES>/ ) {
$isinput=1;
}
# drives
if ( $line =~ m/<Drive Bay: / ) {
($drive,$drivestatus) = ($line =~ m/Drive Bay: "(.*)"; status: "(.*)"; uid led: "Off"/);
if (defined($drive) && defined($drivestatus)) {
$drives{$drive} = $drivestatus;
}
}
if ( $isinput ) {
$xmlinput .= $line;
}
if ( $line =~ m/MESSAGE='/) {
my ($msg) = ( $line =~ m/MESSAGE='(.*)'/);
if ( $msg !~ m/No error/ ) {
if ( $msg =~ m/Syntax error/ ) {
unless ( $skipsyntaxerrors ) {
close $client;
$p->nagios_exit(
return_code => "UNKNOWN",
message => "ERROR: $msg."
);
}
}
else {
# message could be "User login name was not found"
close $client;
$p->nagios_exit(
return_code => "UNKNOWN",
message => "ERROR: $msg."
);
}
}
}
}
close $client;# parse with XML::Simple
my $xml;
if ( $xmlinput ) {
$xml = XMLin($xmlinput, ForceArray => 1);
}
else {
$p->nagios_exit(
return_code => "UNKNOWN",
message => "ERROR: No parseable output."
);
}
my $temperatures = $xml->{'TEMPERATURE'}[0]->{'TEMP'};
my @checks;
push(@checks,$xml->{'FANS'}[0]->{'FAN'});
push(@checks,$xml->{'VRM'}[0]->{'MODULE'});
push(@checks,$xml->{'POWER_SUPPLIES'}[0]->{'SUPPLY'});
my $health = $xml->{'HEALTH_AT_A_GLANCE'}[0];
my $location;
my $status;
my $temperature;
my $cautiontemp;
my $criticaltemp;
## check overall health status
my $vrmstatus = $health->{'VRM'}[0]->{'STATUS'};
if ( defined($vrmstatus) && ( $vrmstatus !~ m/^Ok$/i ) ) {
$return = "CRITICAL";
$message .= "VRM $vrmstatus, ";
}
my $temperaturestatus = $health->{'TEMPERATURE'}[0]->{'STATUS'};
if ( defined($temperaturestatus) && ( $temperaturestatus !~ m/^Ok$/i ) ) {
$return = "CRITICAL";
$message .= "Temperature $temperaturestatus, ";
}
my $powerstatus = $health->{'POWER_SUPPLIES'}[0]->{'STATUS'};
if ( defined($powerstatus) && ( $powerstatus !~ m/^Ok$/i ) ) {
$return = "CRITICAL";
$message .= "Power supply $powerstatus, ";
}
if ($optpowerredundancy) {
my $powerredundancy = $health->{'POWER_SUPPLIES'}[1]->{'REDUNDANCY'};
if ( defined($powerredundancy) && ( $powerredundancy !~ m/^Fully Redundant$/i ) ) {
$return = "CRITICAL";
$message .= "Power supply $powerredundancy, ";
}
}
my $fanstatus = $health->{'FANS'}[0]->{'STATUS'};
if ( defined($fanstatus) && ( $fanstatus !~ m/^Ok$/i ) ) {
$return = "CRITICAL";
$message .= "Fans $fanstatus, ";
}
if ($optfanredundancy) {
my $fanredundancy = $health->{'FANS'}[1]->{'REDUNDANCY'};
if ( defined($fanredundancy) && ( $fanredundancy !~ m/^Fully Redundant$/i ) ) {
$return = "CRITICAL";
$message .= "Fans $fanredundancy, ";
}
}
if ($optcheckdrives) {
my $checkdrives = $health->{'DRIVE'}[0]->{'STATUS'};
if ( defined($checkdrives) && ( $checkdrives !~ m/^Ok$/i ) ) {
$return = "CRITICAL";
$message .= "Drives $checkdrives, ";
}
}
if ( ! $message ) {
$message .= "Overall Health Ok, ";
}
# check fans, vrm and power supplies
foreach my $check (@checks) {
if (ref($check)) {
foreach my $item (@$check) {
$location=$item->{'LABEL'}[0]->{'VALUE'};
$status=$item->{'STATUS'}[0]->{'VALUE'};
if ( defined($location) && defined($status) ) {
if ( ( $status !~ m/^(Ok)$|^(n\/a)$|^(Not Installed)$/i ) ) {
# do not override previous return value from overall health
unless ( $return eq "CRITICAL" ) {
$return = "WARNING";
}
$message .= "$location: $status, ";
}
}
}
}
}
# check drive bays
if ($optcheckdrives) {
foreach (sort keys(%drives)) {
if ( ( $drives{$_} !~ m/^(Ok)$|^(n\/a)$|^(Not Installed)$/i ) ) {
unless ( $return eq "CRITICAL" ) {
$return = "WARNING";
}
$message .= "Drive Bay $_: ".$drives{$_}.", ";
}
}
}
# check temperatures
if (ref($temperatures) ) {
unless ( $notemperatures ) {
$message .= "Temperatures: ";
}
foreach my $temp (@$temperatures) {
$location=$temp->{'LOCATION'}[0]->{'VALUE'};
$status=$temp->{'STATUS'}[0]->{'VALUE'};
$temperature=$temp->{'CURRENTREADING'}[0]->{'VALUE'};
if ( defined($location) && defined($status) && defined($temperature) ) {
if ( ( $status !~ m/^(Ok)$|^(n\/a)$|^(Not Installed)$/i ) ) {
# do not override previous return value from overall health
unless ( $return eq "CRITICAL" ) {
$return = "WARNING";
}
if ( $notemperatures ) {
$message .= "$location ($status): $temperature, ";
}
}
unless ( ( $status =~ m/^(n\/a)$|^(Not Installed)$/i ) || ( $notemperatures ) ) {
$message .= "$location ($status): $temperature, ";
if ( $perfdata ) {
$cautiontemp=$temp->{'CAUTION'}[0]->{'VALUE'};
$criticaltemp=$temp->{'CRITICAL'}[0]->{'VALUE'};
if ( defined($cautiontemp) && defined($criticaltemp) ) {
$p->set_thresholds(
warning => $cautiontemp,
critical => $criticaltemp,
);
my $threshold = $p->threshold;
# add perfdata
$p->add_perfdata(
label => "'".$location."'",
value => $temperature,
uom => "",
threshold => $threshold,
);
}
}
}
}
else {
$message .= "no reading, ";
}
}
}# strip trailing ","
$message =~ s/, $//;
$p->nagios_exit(
return_code => $return,
message => $message
);


define service{
use local-service ; Name of service template to use
host_name localhost
service_description iLO
check_command check_ilo2_health!login_name!password!server_ip
notifications_enabled 0
}


Мониторим удаленный хост по snmp через сервер-посредник (так бывает, если он закрыт из нашей сети, например):

Батник на посреднике:

@echo OFF
rem memory free
rem "C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.1.2.0

rem memory overall
rem "C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.1.1.0

rem Processor 1
rem "C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.3.2.1.3.1

rem Processor 2
rem "C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.3.2.1.3.2

rem Processor 3
rem "C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.3.2.1.3.3

rem Processor 3
rem "C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.3.2.1.3.4


rem Processor overall
rem "C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.3.2.1.3.9
rem echo %PROC%


@echo on


@if %1==proc GOTO PROC
@if %1==disk GOTO DISK
@if %1== mem GOTO MEM
echo FAILED - missing parameter for batch
exit 2

:PROC
@for /f %%i in ('"C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.3.2.1.3.9') do @set count=%%i
@echo %count%
@if %count% LEQ 15 GOTO OK
@if %count% GTR 15 GOTO FAIL
@exit 0


:MEM
@for /f %%i in ('"C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.1.2.0') do @set count=%%i
@for /f %%i in ('"C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.1.1.0') do @set count2=%%i
@echo free: %count% bytes overall: %count2% bytes

@if %count% GTR 15000000 GOTO OK
@if %count% LEQ 15999999 GOTO FAIL
@exit 0


:DISK
@for /f %%i in ('"C:\Program Files\NET-SNMP\bin\snmpget.exe" -t 2 -Ovq -v 1 -c PASS SERVER_IP .1.3.6.1.4.1.20053.1.4.2.2.1.5.2 ') do @set count=%%i
@echo free: %count% bytes

@if %count% GTR 50001 GOTO OK
@if %count% LEQ 50000 GOTO FAIL
@exit 0

:WARN
echo "WARNING - we got value %count%"
@exit 1

:FAIL
echo "FAILED - we got value %count%"
@exit 2

:OK
echo "It's OK - we got value %count%"
@exit 0

Удаленный запуск батника на посреднике через NRPE:

define service{
use generic-service
host_name SERVER_IP
service_description Prom_Server_RTS - SERVER_IP Processor Load
check_command check_nrpe_prom!check_prom.bat!proc
check_period 910-2345
}


Мониторим свободное место. Скрпиптик не мой, правда, но работает замечательно:

#!/bin/sh
# Shell script to monitor or watch the disk space
# It will send an email to $ADMIN, if the (free avilable) percentage
# of space is >= 90%
# -------------------------------------------------------------------------
# Copyright (c) 2005 nixCraft project <http://cyberciti.biz/fb/>
# This script is licensed under GNU GPL version 2.0 or above
# -------------------------------------------------------------------------
# This script is part of nixCraft shell script collection (NSSC)
# Visit http://bash.cyberciti.biz/ for more information.
# ----------------------------------------------------------------------
# Linux shell script to watch disk space (should work on other UNIX oses )
# SEE URL: http://www.cyberciti.biz/tips/shell-script-to-watch-the-disk-space.html
# set admin email so that you can get email
ADMIN="admin@aaa.ru"
# set alert level 90% is default
ALERT=90
df -P -H | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
do
# echo $output
usep=$(echo $output | awk '{ print $1}' | cut -d'%' -f1 )
partition=$(echo $output | awk '{ print $2 }' )
if [ $usep -ge $ALERT ]; then
echo "Running out of space \"$partition ($usep%)\" on $(hostname) as on $(date)" |
mail -s "Alert: Almost out of disk space $usep" $ADMIN
fi
done


Извлекаем системную информацию об удаленных win серверах в вики (на серверах установлен клиент freeSSHd):

#!/usr/bin/perl
use strict;
use warnings;
my $errocode='';
#http://nagios.open.ru/twiki/data/web_name/servers/winservers_sysinfo_xx.xx.xx.xx.txt
my $UPDATED=`date +%Y%m%d-%H:%M`;
#---end header----------------------------------------------------
my @hosts = ('192.168.xxx.111','192.168.xxx.222');#---collecting----------------------------------------------------
foreach (@hosts) {
print "retrieving system info from remote host $_ \n";
open (MYFILE, ">/root/DEV/SYSINFO/winservers_sysinfo_$_.txt");
print MYFILE "This file is last updated: $UPDATED\n";
$SIG{'ALRM'} = sub { die 'Timeout' };
alarm(120);
eval
{
$errocode=`ssh support\@$_ systeminfo | enca -L ru`;

if ($errocode=~m/Unrecognized encoding/)
{
print "$errocode\n";
print MYFILE `ssh support\@$_ systeminfo`;
print MYFILE `ssh support\@$_ schtasks`;
}
else
{
print MYFILE `ssh support\@$_ systeminfo | enca -L ru -x UTF-8`;
print MYFILE `ssh support\@$_ schtasks | enca -L ru -x UTF-8`;
}
close MYFILE;
alarm(0);
};
}
`mv -f /root/DEV/SYSINFO/*.txt /var/www/html/twiki/data/web_name/servers`;


Парсим ини-файл типа [название секции] параметр=значение:

sub get_ini
{
my($path, $filename) = @_;
#-----Go through ini----------------------------------------------
open(IN, '<', "$path/$filename") or die "can't open $filename: $!\n";
while(<IN>)
{
if ($_ =~m/^\[Service\]|^\[fix_connect_1\]|^\[P2Router\]|^\[QConnect\]/) #print particular params from the section
{
$_ =~ s/\s+$//;
print MYFILE "$_\n";

my $CUR_POSITION=tell(IN);
do {
$_=readline(*IN);
if ($_ =~ m/^name/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^displayname/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^AcceptPort/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^SenderCompID/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^TargetCompID/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^P2RouterIp/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^P2RouterPort/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^User/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^ConnTimeout/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^timeout/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
} elsif ($_ =~ m/^timeshift/) {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
}


} until (($_ =~m/^\[.*\]/) or (eof(IN)));
seek(IN, $CUR_POSITION, 0);
print MYFILE "\n";
}
elsif ($_ =~m/^\[System\]|^\[Files\]/) #Print the whole section
{
my $CUR_POSITION=tell(IN);
do {
$_ =~ s/\s+$//;
print MYFILE "$_\n";
$_=readline(*IN);
} until ($_ =~m/\[.*\]/);
seek(IN, $CUR_POSITION, 0);
print MYFILE "\n";
}
}

close IN;

#---END--Go through ini----------------------------------------------
print MYFILE "</verbatim>\n";
}


Пример графика RRDTool:

#rrdtool create net.rrd --step 60 DS:input1:GAUGE:120:100:U DS:input2:GAUGE:120:100:U DS:input3:GAUGE:120:100:U DS:input4:GAUGE:120:200:U RRA:MAX:0.5:1:600

if ($pipe44) { $update=`/usr/bin/rrdtool updatev /www_path/rrd/net.rrd N:$pipe44`

$graph =`/usr/bin/rrdtool graph /www_path/rrd/fix_delays.gif -w 600 -h 200 -l 200 -u 500 --alt-autoscale-max --vertical-label "Time (ms)" -t "FIX LATENCY (ms) - MAX FROM 2 MINUTE INTERVAL .44" -s "$START" -e now DEF:input4=/www_path/rrd/net.rrd:input4:MAX LINE3:input4#ff00ff:"SETS"`;
print "$graph";


Пример графика GNUPlot:

#!/bin/bash
YESTERDAY=`date +%Y-%m-%d --date="-1 day"`;
/usr/bin/gnuplot <<-finis
set terminal png nocrop enhanced 12 size 1200,768
set output "/root/DEV/FIX_DELAYS/delays.png"
set encoding koi8r
set xlabel "Time"
set ylabel "Delay"
set xdata time
set timefmt "%H%M%S"
set ytics auto
set style line 1 lt 1 lw 1 pt 5 ps 0.65
set datafile separator "|"
plot "/root/DEV/FIX_DELAYS/delays.txt" using 1:5 title "FIX DELAYS $YESTERDAY" with linespoints linestyle 1
finis
mutt -a /PATH/delays.txt -a /PATH/delays.png -s "FIX DELAYS. Report - $1 $2" admin@domain.ru < /PATH/message.txt
cp /PATH/delays.png /twiki/pub/web_name/FixDelaysReport/delays_"$1".png




Выполняем удаленный батник через нагиосовский check_nrpe (www cgi):

#!/usr/bin/perl
print "Content-type: text/html\n\n";
local ($buffer, @pairs, $pair, $name, $value, %FORM);
# Read in text
$ENV{'REQUEST_METHOD'} =~ tr/a-z/A-Z/;
if ($ENV{'REQUEST_METHOD'} eq "POST")
{
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
}else {
$buffer = $ENV{'QUERY_STRING'};
}
# Split information into name/value pairs
@pairs = split(/&/, $buffer);
foreach $pair (@pairs)
{
($name, $value) = split(/=/, $pair);
$value =~ tr/+/ /;
$value =~ s/%(..)/pack("C", hex($1))/eg;
$FORM{$name} = $value;
}
$client_name = $FORM{client};
$server_name = $FORM{server};

unless ($client_name or $server_name)
{
print "REQUEST QUIK LOG: <font color=\"red\">-<\/font><br><br>";
print "<FORM action=\"/www_path/app_name.cgi\" method=\"POST\">";
print "Client code: <input type=\"text\" name=\"client\"> <br>";
print "Server name: <select name=\"server\">";
print "<option value=\"xxx.xxx.xxx.01\" selected>xxx.xxx.xxx.01</option>";
print "<option value=\"xxx.xxx.xxx.02\">xxx.xxx.xxx.02</option>";
print "<option value=\"xxx.xxx.xxx.03\">xxx.xxx.xxx.03</option>";
print "</select>";
print "<br><input type=\"submit\" value=\"Submit\"></FORM>";
}
else
{
print "Client's code: $client_name, Server IP: $server_name";
`/usr/local/nagios/libexec/check_nrpe -H "$server_name" -p 5108 -c remote.bat -a "$client_name" "$param1"`;
}


Проверим, есть ли в последних 1000 строках удаленного лога ключевая фраза, и возвратим код ошибки нагиосу:

@echo off
set current_date=%date:~6,4%%date:~3,2%%date:~0,2%
rem echo %current_date%
@del C:\path\monitor\temp_%11.txt
@del C:\path\monitor\temp_%12.txt
C:\path\monitor\sed.exe -e :a -e "$q;N;1000,$D;ba" C:\app_path\app_%current_date%.log > C:\path\monitor\temp_%11.txt
C:\path\monitor\sed.exe -n -e "/failed/p" C:\path\monitor\temp_%11.txt | find "failed" > C:\path\monitor\temp_%12.txt
IF ERRORLEVEL 1 GOTO OK
IF ERRORLEVEL 0 GOTO END
GOTO BEGIN
:END
echo "failed to connect"
@exit 2
:OK
echo "It's OK"
@exit 0


Посылаем почту в случае ключевой фразы в логе (шаблон):

#!/usr/bin/perl
use strict;
use warnings;
use threads;
my $TIME=time();
my @bigstring :shared = ();
my $COUNT :shared =0;
my $SUBJECT :shared ='';
my $date_current=`date +%Y%m%d`;
chomp($date_current);

my $srv = threads->new(\&server);
sub server {
while (1)
{
sleep 60;
if $COUNT {
open SENDMAIL, "|/usr/sbin/sendmail -t"
or die "$0: fatal: could not open sendmail: $!\n";
print SENDMAIL "From: admin\@domain.ru\n";
print SENDMAIL "Subject: $COUNT warning messages $SUBJECT\n\n";
print SENDMAIL "@bigstring\n";
close SENDMAIL;
print "@bigstring" . " $COUNT warning messages\n";
$COUNT=0;
$#bigstring = -1;
$TIME= time();
$SUBJECT='';
}
}
}
open my $pipe, "-|", "/usr/bin/tail","-n1" ,"-f", "/var/app/logs/" . "$date_current" . ".txt"
or die "could not start tail on file.log: $!";

while (<$pipe>)
{
if ($_=~m/failed|error/i)
{
#Assembling the subject string---------------------------------------
if ($_=~m/failed/i)
{
$COUNT++;
push(@bigstring,"$_");
$SUBJECT = "failed"
}
#END of Assembling the subject string---------------------------------
};
}
close $pipe;