aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE2
-rwxr-xr-xcheck_nrpe_cluster266
2 files changed, 197 insertions, 71 deletions
diff --git a/LICENSE b/LICENSE
index bf6312f..b3215fc 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
The MIT License (MIT)
-Copyright (c) 2015 Tom Ryder
+Copyright (c) 2017 Tom Ryder
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/check_nrpe_cluster b/check_nrpe_cluster
index dadbc6c..dca66be 100755
--- a/check_nrpe_cluster
+++ b/check_nrpe_cluster
@@ -2,106 +2,232 @@
#
# Run two or more NRPE checks and return a status based on their aggregated
-# results, similar to check_cluster. fork(2)s ahoy!
+# results, similar to check_cluster. fork(3)s ahoy!
#
# Author: Tom Ryder <tom@sanctum.geek.nz>
-# Copyright: 2015
+# Copyright: 2017
#
-# $Id$
-#
-package Nagios::Plugin::NRPE::Cluster;
+package Monitoring::Plugin::NRPE::Cluster;
# Force me to write this properly
use strict;
use warnings;
use utf8;
-use autodie qw(:all);
-# Require at least Perl 5.10
+# Require at least this Perl version
use 5.010;
# Decree package version
-our $VERSION = 1.0;
+our $VERSION = 2.01;
# Import required modules
+use Carp; # core
+use Const::Fast; # dpkg: libconst-fast-perl
use English qw(-no_match_vars); # dpkg: perl-core
use IPC::Run3; # dpkg: libipc-run3-perl
-use Nagios::Plugin; # dpkg: libnagios-plugin-perl
-
-# Find path to Nagios' plugins
-my $plugins_dir =
- exists $ENV{NAGIOS_PLUGINS_DIR}
- ? $ENV{NAGIOS_PLUGINS_DIR}
- : '/usr/local/nagios/libexec';
-
-# Build Nagios::Plugin object
-my $np = Nagios::Plugin->new(
- usage => 'Usage: %s [-w THRESHOLD] [-c THRESHOLD] '
- . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]',
- version => $VERSION,
-);
+use Monitoring::Plugin; # cpanm Monitoring::Plugin
# Add warning and critical options
-$np->add_arg(
- spec => 'warning|w=s',
- help => "-w, --warning=THRESHOLD\n"
- . ' Warning threshold for the number of OK checks',
+const our @OPTS => (
+ {
+ spec => 'warning|w=s',
+ help => "-w, --warning=THRESHOLD\n"
+ . ' Warning threshold for the number of OK checks',
+ },
+ {
+ spec => 'critical|c=s',
+ help => "-c, --critical=THRESHOLD\n"
+ . ' Critical threshold for the number of OK checks',
+ },
);
-$np->add_arg(
- spec => 'critical|c=s',
- help => "-c, --critical=THRESHOLD\n"
- . ' Critical threshold for the number of OK checks',
+
+# Regular expressions
+const our %RES => (
+
+ # Single HOSTNAME:CHECK pair from the command line
+ host_check_pair => qr{
+ \A # Start of string
+ ([^:,]++) # Hostname
+ : # Colon
+ ([^:,]++) # Check command
+ \z # End of string
+ }msx,
+
+ # Junk to remove from stdout
+ stdout_junk => qr{
+ (?: # Start of non-matching alternating group
+ [|] # Pipe character (denoting start of perfdata)
+ | # or
+ \v # Vertical whitespace
+ ) # End of group
+ .* # All the rest
+ \z # End of string
+ }msx,
);
-# Read options
-$np->getopts();
+# Build a list of check definitions from a HOST:CHECK string
+sub build {
+ my $def = shift;
+
+ # Split HOST:CHECK definition string up, check it meets the minimum
+ ( my @defs = split /,/msx, $def ) >= 2
+ or croak 'Need at least two HOST:CHECK pairs';
+
+ # Build a list of check hashrefs with hostname and check command name
+ my @checks;
+ for my $def (@defs) {
+ my %check;
+ @check{qw(host check)} = $def =~ $RES{host_check_pair}
+ or croak "Malformed HOST:CHECK string: $def";
+ push @checks, \%check;
+ }
-# Need one of --warning or --critical
-if ( !$np->opts->warning && !$np->opts->critical ) {
- $np->nagios_die(q{Need one/both of --warning or --critical});
+ # Done, return a reference to the list of check hashrefs
+ return \@checks;
}
-# Verify we have at least two host:check pairs
-my @pairs;
-if (@ARGV) {
- @pairs = split m{,}msx, $ARGV[0];
+# Run the checks and collect exit values and output
+sub fetch {
+ my ( $mp, $checks ) = @_;
+
+ # Figure out where check_nrpe should be, and ensure it's there
+ my $pdir = $ENV{NAGIOS_PLUGINS_DIR} // '/usr/local/nagios/libexec';
+ my $nrpe = "$pdir/check_nrpe";
+ -x $nrpe or croak "$nrpe does not exist";
+
+ # Iterate through the checks and collect exit value and output
+ for my $check ( @{$checks} ) {
+
+ # Build command
+ $check->{command} =
+ [ $nrpe, '-H', $check->{host}, '-c', $check->{check} ];
+
+ # Run command and save output and exit value; emit errors
+ run3 $check->{command}, \undef, \$check->{stdout};
+ $check->{exit} = $CHILD_ERROR >> 8;
+ }
+
+ # Done, we added the check results in-place with the commands
+ return;
}
-if ( @pairs < 2 ) {
- $np->nagios_die(q{Need at least two HOSTNAME:CHECK definitions});
+
+# Select exit codes and messages based on the checks' outcomes, and exit
+# appropriately
+sub check {
+ my ( $mp, $checks ) = @_;
+
+ # Count the number of commands that exited 0, and the ones that didn't
+ my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
+ my $fail = @{$checks} - $pass;
+
+ # Figure out appropriate exit code and primary message
+ my $code = $mp->check_threshold(
+ check => $pass,
+ warning => $mp->opts->warning,
+ critical => $mp->opts->critical,
+ );
+ my $message = "$pass passes, $fail failures";
+ $mp->add_message( $code, $message );
+
+ # Iterate through the performed checks and add messages to the output with
+ # their details and results
+ for my $check ( @{$checks} ) {
+
+ # Truncate to first line before any performance data
+ my $out = $check->{stdout} || '[no output]';
+ $out =~ s{ $RES{stdout_junk} }{...}msx;
+
+ # Add the check's details including the truncated output
+ $mp->add_message( $code,
+ "$check->{host}:$check->{check} <$check->{exit}> $out",
+ );
+ }
+
+ # Form messages and exit
+ $mp->plugin_exit(
+ $mp->check_messages(
+ join => q{, },
+ join_all => q{, },
+ ),
+ );
+
+ # Should never get here
+ return;
}
-# Verify all of the arguments are in the expected format
-my @invalids = grep { !m{[^:]+:[^:]+}msx } @ARGV;
-if (@invalids) {
- $np->nagios_die( q{Argument(s) %s are not in HOSTNAME:CHECK format},
- join q{,}, @invalids );
+# Add performance data about the results to the plugin object
+sub perfdata {
+ my ( $mp, $checks ) = @_;
+
+ # Count the number of commands that exited 0, and the ones that didn't
+ my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
+ my $fail = @{$checks} - $pass;
+
+ # Add that as performance data
+ $mp->add_perfdata(
+ label => $pass,
+ value => 'pass',
+ warning => $mp->opts->warning,
+ critical => $mp->opts->critical,
+ );
+ $mp->add_perfdata(
+ label => $fail,
+ value => 'fail',
+ );
+
+ # All done, we edited the object in place
+ return;
}
-# Start counting down to timeout
-alarm $np->opts->timeout;
-
-# Run the checks and collect a list of successes and failures
-my ( @pass, @fail );
-foreach my $pair (@pairs) {
- my ( $hostname, $command ) = split m{:}msx, $pair;
- my $cmd = [ $plugins_dir . '/check_nrpe', '-H', $hostname, '-c', $command ];
- run3 $cmd, \undef, \undef, \undef;
- if ( $CHILD_ERROR == 0 ) {
- push @pass, $pair;
- }
- else {
- push @fail, $pair;
+# Given a plugin object, try to run the check
+sub run {
+ my $mp = shift;
+
+ # Add custom options and read all options
+ for my $opt (@OPTS) {
+ $mp->add_arg( %{$opt} );
}
+ $mp->getopts();
+
+ # Validate and build the list of checks from the remaining arguments
+ @ARGV == 1
+ or croak 'Need a comma-separated HOST:CHECK list';
+ my $checks = build $ARGV[0];
+
+ # Start counting down to timeout
+ alarm $mp->opts->timeout;
+
+ # Fetch the results by running the checks and adding data to each one's
+ # hashref
+ fetch $mp, $checks;
+
+ # Add performance data based on the checks' outcome
+ perfdata $mp, $checks;
+
+ # Select exit codes and messages based on the checks' outcome, and exit
+ # appropriately
+ check $mp, $checks;
+
+ # Shouldn't ever get here
+ return;
}
-# Compare the check results to the failures
-my $code = $np->check_threshold(
- check => scalar @pass,
- warning => $np->opts->warning,
- critical => $np->opts->critical,
-);
-$np->nagios_exit( $code, sprintf 'All checks run, %u passes, %u failures',
- scalar @pass, scalar @fail );
+# Main function, entry point
+sub main {
-# This should never happen, but if it does we may as well be explicit about it
-$np->nagios_die(q{Couldn't work out how to aggregate checks});
+ # Build Monitoring::Plugin object
+ my $mp = Monitoring::Plugin->new(
+ usage => 'Usage: %s'
+ . ' [--warning|-w THRESHOLD]'
+ . ' [--critical|-c THRESHOLD] '
+ . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]',
+ version => $VERSION,
+ );
+
+ # Run the check command, catching exceptions for plugin exit
+ eval { run $mp } or $mp->plugin_die($EVAL_ERROR);
+
+ # Shouldn't ever get here
+ return;
+}
+main();