From 6b7602d1fe64537e8e2888c80b15592b292ce520 Mon Sep 17 00:00:00 2001 From: Tom Ryder Date: Mon, 2 Oct 2017 17:52:26 +1300 Subject: Completely overhaul and refactor --- LICENSE | 2 +- check_nrpe_cluster | 266 +++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 197 insertions(+), 71 deletions(-) diff --git a/LICENSE b/LICENSE index bf6312f..b3215fc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015 Tom Ryder +Copyright (c) 2017 Tom Ryder Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/check_nrpe_cluster b/check_nrpe_cluster index dadbc6c..dca66be 100755 --- a/check_nrpe_cluster +++ b/check_nrpe_cluster @@ -2,106 +2,232 @@ # # Run two or more NRPE checks and return a status based on their aggregated -# results, similar to check_cluster. fork(2)s ahoy! +# results, similar to check_cluster. fork(3)s ahoy! # # Author: Tom Ryder -# Copyright: 2015 +# Copyright: 2017 # -# $Id$ -# -package Nagios::Plugin::NRPE::Cluster; +package Monitoring::Plugin::NRPE::Cluster; # Force me to write this properly use strict; use warnings; use utf8; -use autodie qw(:all); -# Require at least Perl 5.10 +# Require at least this Perl version use 5.010; # Decree package version -our $VERSION = 1.0; +our $VERSION = 2.01; # Import required modules +use Carp; # core +use Const::Fast; # dpkg: libconst-fast-perl use English qw(-no_match_vars); # dpkg: perl-core use IPC::Run3; # dpkg: libipc-run3-perl -use Nagios::Plugin; # dpkg: libnagios-plugin-perl - -# Find path to Nagios' plugins -my $plugins_dir = - exists $ENV{NAGIOS_PLUGINS_DIR} - ? $ENV{NAGIOS_PLUGINS_DIR} - : '/usr/local/nagios/libexec'; - -# Build Nagios::Plugin object -my $np = Nagios::Plugin->new( - usage => 'Usage: %s [-w THRESHOLD] [-c THRESHOLD] ' - . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]', - version => $VERSION, -); +use Monitoring::Plugin; # cpanm Monitoring::Plugin # Add warning and critical options -$np->add_arg( - spec => 'warning|w=s', - help => "-w, --warning=THRESHOLD\n" - . ' Warning threshold for the number of OK checks', +const our @OPTS => ( + { + spec => 'warning|w=s', + help => "-w, --warning=THRESHOLD\n" + . ' Warning threshold for the number of OK checks', + }, + { + spec => 'critical|c=s', + help => "-c, --critical=THRESHOLD\n" + . ' Critical threshold for the number of OK checks', + }, ); -$np->add_arg( - spec => 'critical|c=s', - help => "-c, --critical=THRESHOLD\n" - . ' Critical threshold for the number of OK checks', + +# Regular expressions +const our %RES => ( + + # Single HOSTNAME:CHECK pair from the command line + host_check_pair => qr{ + \A # Start of string + ([^:,]++) # Hostname + : # Colon + ([^:,]++) # Check command + \z # End of string + }msx, + + # Junk to remove from stdout + stdout_junk => qr{ + (?: # Start of non-matching alternating group + [|] # Pipe character (denoting start of perfdata) + | # or + \v # Vertical whitespace + ) # End of group + .* # All the rest + \z # End of string + }msx, ); -# Read options -$np->getopts(); +# Build a list of check definitions from a HOST:CHECK string +sub build { + my $def = shift; + + # Split HOST:CHECK definition string up, check it meets the minimum + ( my @defs = split /,/msx, $def ) >= 2 + or croak 'Need at least two HOST:CHECK pairs'; + + # Build a list of check hashrefs with hostname and check command name + my @checks; + for my $def (@defs) { + my %check; + @check{qw(host check)} = $def =~ $RES{host_check_pair} + or croak "Malformed HOST:CHECK string: $def"; + push @checks, \%check; + } -# Need one of --warning or --critical -if ( !$np->opts->warning && !$np->opts->critical ) { - $np->nagios_die(q{Need one/both of --warning or --critical}); + # Done, return a reference to the list of check hashrefs + return \@checks; } -# Verify we have at least two host:check pairs -my @pairs; -if (@ARGV) { - @pairs = split m{,}msx, $ARGV[0]; +# Run the checks and collect exit values and output +sub fetch { + my ( $mp, $checks ) = @_; + + # Figure out where check_nrpe should be, and ensure it's there + my $pdir = $ENV{NAGIOS_PLUGINS_DIR} // '/usr/local/nagios/libexec'; + my $nrpe = "$pdir/check_nrpe"; + -x $nrpe or croak "$nrpe does not exist"; + + # Iterate through the checks and collect exit value and output + for my $check ( @{$checks} ) { + + # Build command + $check->{command} = + [ $nrpe, '-H', $check->{host}, '-c', $check->{check} ]; + + # Run command and save output and exit value; emit errors + run3 $check->{command}, \undef, \$check->{stdout}; + $check->{exit} = $CHILD_ERROR >> 8; + } + + # Done, we added the check results in-place with the commands + return; } -if ( @pairs < 2 ) { - $np->nagios_die(q{Need at least two HOSTNAME:CHECK definitions}); + +# Select exit codes and messages based on the checks' outcomes, and exit +# appropriately +sub check { + my ( $mp, $checks ) = @_; + + # Count the number of commands that exited 0, and the ones that didn't + my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks}; + my $fail = @{$checks} - $pass; + + # Figure out appropriate exit code and primary message + my $code = $mp->check_threshold( + check => $pass, + warning => $mp->opts->warning, + critical => $mp->opts->critical, + ); + my $message = "$pass passes, $fail failures"; + $mp->add_message( $code, $message ); + + # Iterate through the performed checks and add messages to the output with + # their details and results + for my $check ( @{$checks} ) { + + # Truncate to first line before any performance data + my $out = $check->{stdout} || '[no output]'; + $out =~ s{ $RES{stdout_junk} }{...}msx; + + # Add the check's details including the truncated output + $mp->add_message( $code, + "$check->{host}:$check->{check} <$check->{exit}> $out", + ); + } + + # Form messages and exit + $mp->plugin_exit( + $mp->check_messages( + join => q{, }, + join_all => q{, }, + ), + ); + + # Should never get here + return; } -# Verify all of the arguments are in the expected format -my @invalids = grep { !m{[^:]+:[^:]+}msx } @ARGV; -if (@invalids) { - $np->nagios_die( q{Argument(s) %s are not in HOSTNAME:CHECK format}, - join q{,}, @invalids ); +# Add performance data about the results to the plugin object +sub perfdata { + my ( $mp, $checks ) = @_; + + # Count the number of commands that exited 0, and the ones that didn't + my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks}; + my $fail = @{$checks} - $pass; + + # Add that as performance data + $mp->add_perfdata( + label => $pass, + value => 'pass', + warning => $mp->opts->warning, + critical => $mp->opts->critical, + ); + $mp->add_perfdata( + label => $fail, + value => 'fail', + ); + + # All done, we edited the object in place + return; } -# Start counting down to timeout -alarm $np->opts->timeout; - -# Run the checks and collect a list of successes and failures -my ( @pass, @fail ); -foreach my $pair (@pairs) { - my ( $hostname, $command ) = split m{:}msx, $pair; - my $cmd = [ $plugins_dir . '/check_nrpe', '-H', $hostname, '-c', $command ]; - run3 $cmd, \undef, \undef, \undef; - if ( $CHILD_ERROR == 0 ) { - push @pass, $pair; - } - else { - push @fail, $pair; +# Given a plugin object, try to run the check +sub run { + my $mp = shift; + + # Add custom options and read all options + for my $opt (@OPTS) { + $mp->add_arg( %{$opt} ); } + $mp->getopts(); + + # Validate and build the list of checks from the remaining arguments + @ARGV == 1 + or croak 'Need a comma-separated HOST:CHECK list'; + my $checks = build $ARGV[0]; + + # Start counting down to timeout + alarm $mp->opts->timeout; + + # Fetch the results by running the checks and adding data to each one's + # hashref + fetch $mp, $checks; + + # Add performance data based on the checks' outcome + perfdata $mp, $checks; + + # Select exit codes and messages based on the checks' outcome, and exit + # appropriately + check $mp, $checks; + + # Shouldn't ever get here + return; } -# Compare the check results to the failures -my $code = $np->check_threshold( - check => scalar @pass, - warning => $np->opts->warning, - critical => $np->opts->critical, -); -$np->nagios_exit( $code, sprintf 'All checks run, %u passes, %u failures', - scalar @pass, scalar @fail ); +# Main function, entry point +sub main { -# This should never happen, but if it does we may as well be explicit about it -$np->nagios_die(q{Couldn't work out how to aggregate checks}); + # Build Monitoring::Plugin object + my $mp = Monitoring::Plugin->new( + usage => 'Usage: %s' + . ' [--warning|-w THRESHOLD]' + . ' [--critical|-c THRESHOLD] ' + . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]', + version => $VERSION, + ); + + # Run the check command, catching exceptions for plugin exit + eval { run $mp } or $mp->plugin_die($EVAL_ERROR); + + # Shouldn't ever get here + return; +} +main(); -- cgit v1.2.3