#!/usr/bin/env perl
#
# Run two or more NRPE checks and return a status based on their aggregated
# results, similar to check_cluster. fork(3)s ahoy!
#
# Author: Tom Ryder <tom@sanctum.geek.nz>
# Copyright: 2017 Tom Ryder
# License: MIT (see LICENSE)
#
package main;
# Force me to write this properly
use strict;
use warnings;
use utf8;
# Require at least this Perl version
# Nothing in here should need a modern Perl
use 5.006;
# Import required modules
use English qw(-no_match_vars);
use IPC::Run3;
use Monitoring::Plugin qw(%ERRORS);
# Decree package version
our $VERSION = '2.02';
# Add warning and critical options
our @OPTS = (
{
spec => 'warning|w=s',
help => 'Warning threshold for the number of OK checks',
label => 'THRESHOLD',
},
{
spec => 'critical|c=s',
help => 'Critical threshold for the number of OK checks',
label => 'THRESHOLD',
},
);
# Regular expressions
our %RES = (
# Single HOSTNAME:CHECK pair from the command line
host_check_pair => qr{
\A # Start of string
([^:,]+) # Hostname
: # Colon
([^:,]+) # Check command
\z # End of string
}msx,
# Junk to remove from stdout
stdout_junk => qr{
(?: # Start of non-matching alternating group
[|] # Pipe character (denoting start of perfdata)
| # or
\v # Vertical whitespace
) # End of group
.* # All the rest
\z # End of string
}msx,
);
# Build a list of check definitions from a HOST:CHECK string
sub build {
my $def = shift;
# Split HOST:CHECK definition string up, check it meets the minimum
( my @defs = split m/,/msx, $def ) >= 2
or die "Need at least two HOST:CHECK pairs\n";
# Build a list of check hashrefs with hostname and check command name
my @checks;
for my $def (@defs) {
my %check;
@check{qw(host check)} = $def =~ $RES{host_check_pair}
or die "Malformed HOST:CHECK string: $def\n";
push @checks, \%check;
}
# Done, return a reference to the list of check hashrefs
return \@checks;
}
# Run the checks and collect exit values and output
sub fetch {
my ( $mp, $checks ) = @_;
# Figure out where check_nrpe should be, and ensure it's there
my $pdir = $ENV{NAGIOS_PLUGINS_DIR} || '/usr/local/nagios/libexec';
my $nrpe = "$pdir/check_nrpe";
-e $nrpe or die "$nrpe does not exist\n";
-x $nrpe or die "$nrpe is not executable\n";
# Iterate through the checks and collect exit value and output
for my $check ( @{$checks} ) {
# Build command
$check->{command} =
[ $nrpe, '-H', $check->{host}, '-c', $check->{check} ];
# Run command and save output and exit value; emit errors
run3 $check->{command}, \undef, \$check->{stdout};
$check->{exit} = $CHILD_ERROR >> 8;
}
# Done, we added the check results in-place with the commands
return;
}
# Select exit codes and messages based on the checks' outcomes, and exit
# appropriately
sub check {
my ( $mp, $checks ) = @_;
# Count the number of commands that exited 0, and the ones that didn't
my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
my $fail = @{$checks} - $pass;
# Figure out appropriate exit code and primary message
my $code = $mp->check_threshold(
check => $pass,
warning => $mp->opts->warning,
critical => $mp->opts->critical,
);
my $message = "$pass passes, $fail failures";
$mp->add_message( $code, $message );
# Iterate through the performed checks and add messages to the output with
# their details and results
for my $check ( @{$checks} ) {
# Truncate to first line before any performance data
my $out = $check->{stdout} || '[no output]';
$out =~ s{ $RES{stdout_junk} }{...}msx;
# Add the check's details including the truncated output
$mp->add_message( $ERRORS{OK},
"$check->{host}:$check->{check} <$check->{exit}> $out" );
}
# Form messages and exit
$mp->plugin_exit(
$mp->check_messages(
join => q{, },
join_all => q{, },
),
);
# Should never get here
return;
}
# Add performance data about the results to the plugin object
sub perfdata {
my ( $mp, $checks ) = @_;
# Count the number of commands that exited 0, and the ones that didn't
my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
my $fail = @{$checks} - $pass;
# Add that as performance data
$mp->add_perfdata(
label => $pass,
value => 'pass',
warning => $mp->opts->warning,
critical => $mp->opts->critical,
);
$mp->add_perfdata(
label => $fail,
value => 'fail',
);
# All done, we edited the object in place
return;
}
# Given a plugin object, try to run the check
sub run {
my $mp = shift;
# Add custom options and read all options
for my $opt (@OPTS) {
$mp->add_arg( %{$opt} );
}
$mp->getopts();
# Validate and build the list of checks from the remaining arguments
@ARGV == 1
or die "Need a comma-separated HOST:CHECK list\n";
my $checks = build $ARGV[0];
# Start counting down to timeout
alarm $mp->opts->timeout;
# Fetch the results by running the checks and adding data to each one's
# hashref
fetch $mp, $checks;
# Add performance data based on the checks' outcome
perfdata $mp, $checks;
# Select exit codes and messages based on the checks' outcome, and exit
# appropriately
check $mp, $checks;
# Shouldn't ever get here
return;
}
# Main function, entry point
sub main {
# Build Monitoring::Plugin object
my $mp = Monitoring::Plugin->new(
usage => 'Usage: %s'
. ' [--warning|-w THRESHOLD]'
. ' [--critical|-c THRESHOLD] '
. 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]',
version => $VERSION,
);
# Run the check command, catching exceptions for plugin exit
eval { run $mp } or $mp->plugin_die($EVAL_ERROR);
# Shouldn't ever get here
return;
}
main();