#!/usr/bin/env perl # # Run two or more NRPE checks and return a status based on their aggregated # results, similar to check_cluster. fork(3)s ahoy! # # Author: Tom Ryder # Copyright: 2015 # # $Id$ # package Nagios::Plugin::NRPE::Cluster; # Force me to write this properly use strict; use warnings; use utf8; use autodie qw(:all); # Require at least Perl 5.10 use 5.010; # Decree package version our $VERSION = 1.0; # Import required modules use English qw(-no_match_vars); # dpkg: perl-core use IPC::Run3; # dpkg: libipc-run3-perl use Nagios::Plugin; # dpkg: libnagios-plugin-perl # Find path to Nagios' plugins my $plugins_dir = exists $ENV{NAGIOS_PLUGINS_DIR} ? $ENV{NAGIOS_PLUGINS_DIR} : '/usr/local/nagios/libexec'; # Build Nagios::Plugin object my $np = Nagios::Plugin->new( usage => 'Usage: %s [-w THRESHOLD] [-c THRESHOLD] ' . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]', version => $VERSION, ); # Add warning and critical options $np->add_arg( spec => 'warning|w=s', help => "-w, --warning=THRESHOLD\n" . ' Warning threshold for the number of OK checks', ); $np->add_arg( spec => 'critical|c=s', help => "-c, --critical=THRESHOLD\n" . ' Critical threshold for the number of OK checks', ); # Read options $np->getopts(); # Need one of --warning or --critical if ( !$np->opts->warning && !$np->opts->critical ) { $np->nagios_die(q{Need one/both of --warning or --critical}); } # Verify we have at least two host:check pairs my @pairs; if (@ARGV) { @pairs = split m{,}msx, $ARGV[0]; } if ( @pairs < 2 ) { $np->nagios_die(q{Need at least two HOSTNAME:CHECK definitions}); } # Verify all of the arguments are in the expected format my @invalids = grep { !m{[^:]+:[^:]+}msx } @ARGV; if (@invalids) { $np->nagios_die( q{Argument(s) %s are not in HOSTNAME:CHECK format}, join q{,}, @invalids ); } # Start counting down to timeout alarm $np->opts->timeout; # Run the checks and collect a list of successes and failures my ( @pass, @fail ); foreach my $pair (@pairs) { my ( $hostname, $command ) = split m{:}msx, $pair; my $cmd = [ $plugins_dir . '/check_nrpe', '-H', $hostname, '-c', $command ]; run3 $cmd, \undef, \undef, \undef; if ( $CHILD_ERROR == 0 ) { push @pass, $pair; } else { push @fail, $pair; } } # Compare the check results to the failures my $code = $np->check_threshold( check => scalar @pass, warning => $np->opts->warning, critical => $np->opts->critical, ); $np->nagios_exit( $code, sprintf 'All checks run, %u passes, %u failures', scalar @pass, scalar @fail ); # This should never happen, but if it does we may as well be explicit about it $np->nagios_die(q{Couldn't work out how to aggregate checks});