#!/usr/bin/env perl # # Run two or more NRPE checks and return a status based on their aggregated # results, similar to check_cluster. fork(3)s ahoy! # # Author: Tom Ryder # Copyright: 2017 # package Monitoring::Plugin::NRPE::Cluster; # Force me to write this properly use strict; use warnings; use utf8; # Require at least this Perl version use 5.010; # Decree package version our $VERSION = 2.01; # Import required modules use Carp; # core use Const::Fast; # dpkg: libconst-fast-perl use English qw(-no_match_vars); # dpkg: perl-core use IPC::Run3; # dpkg: libipc-run3-perl use Monitoring::Plugin; # cpanm Monitoring::Plugin # Add warning and critical options const our @OPTS => ( { spec => 'warning|w=s', help => "-w, --warning=THRESHOLD\n" . ' Warning threshold for the number of OK checks', }, { spec => 'critical|c=s', help => "-c, --critical=THRESHOLD\n" . ' Critical threshold for the number of OK checks', }, ); # Regular expressions const our %RES => ( # Single HOSTNAME:CHECK pair from the command line host_check_pair => qr{ \A # Start of string ([^:,]++) # Hostname : # Colon ([^:,]++) # Check command \z # End of string }msx, # Junk to remove from stdout stdout_junk => qr{ (?: # Start of non-matching alternating group [|] # Pipe character (denoting start of perfdata) | # or \v # Vertical whitespace ) # End of group .* # All the rest \z # End of string }msx, ); # Build a list of check definitions from a HOST:CHECK string sub build { my $def = shift; # Split HOST:CHECK definition string up, check it meets the minimum ( my @defs = split /,/msx, $def ) >= 2 or croak 'Need at least two HOST:CHECK pairs'; # Build a list of check hashrefs with hostname and check command name my @checks; for my $def (@defs) { my %check; @check{qw(host check)} = $def =~ $RES{host_check_pair} or croak "Malformed HOST:CHECK string: $def"; push @checks, \%check; } # Done, return a reference to the list of check hashrefs return \@checks; } # Run the checks and collect exit values and output sub fetch { my ( $mp, $checks ) = @_; # Figure out where check_nrpe should be, and ensure it's there my $pdir = $ENV{NAGIOS_PLUGINS_DIR} // '/usr/local/nagios/libexec'; my $nrpe = "$pdir/check_nrpe"; -x $nrpe or croak "$nrpe does not exist"; # Iterate through the checks and collect exit value and output for my $check ( @{$checks} ) { # Build command $check->{command} = [ $nrpe, '-H', $check->{host}, '-c', $check->{check} ]; # Run command and save output and exit value; emit errors run3 $check->{command}, \undef, \$check->{stdout}; $check->{exit} = $CHILD_ERROR >> 8; } # Done, we added the check results in-place with the commands return; } # Select exit codes and messages based on the checks' outcomes, and exit # appropriately sub check { my ( $mp, $checks ) = @_; # Count the number of commands that exited 0, and the ones that didn't my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks}; my $fail = @{$checks} - $pass; # Figure out appropriate exit code and primary message my $code = $mp->check_threshold( check => $pass, warning => $mp->opts->warning, critical => $mp->opts->critical, ); my $message = "$pass passes, $fail failures"; $mp->add_message( $code, $message ); # Iterate through the performed checks and add messages to the output with # their details and results for my $check ( @{$checks} ) { # Truncate to first line before any performance data my $out = $check->{stdout} || '[no output]'; $out =~ s{ $RES{stdout_junk} }{...}msx; # Add the check's details including the truncated output $mp->add_message( $code, "$check->{host}:$check->{check} <$check->{exit}> $out", ); } # Form messages and exit $mp->plugin_exit( $mp->check_messages( join => q{, }, join_all => q{, }, ), ); # Should never get here return; } # Add performance data about the results to the plugin object sub perfdata { my ( $mp, $checks ) = @_; # Count the number of commands that exited 0, and the ones that didn't my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks}; my $fail = @{$checks} - $pass; # Add that as performance data $mp->add_perfdata( label => $pass, value => 'pass', warning => $mp->opts->warning, critical => $mp->opts->critical, ); $mp->add_perfdata( label => $fail, value => 'fail', ); # All done, we edited the object in place return; } # Given a plugin object, try to run the check sub run { my $mp = shift; # Add custom options and read all options for my $opt (@OPTS) { $mp->add_arg( %{$opt} ); } $mp->getopts(); # Validate and build the list of checks from the remaining arguments @ARGV == 1 or croak 'Need a comma-separated HOST:CHECK list'; my $checks = build $ARGV[0]; # Start counting down to timeout alarm $mp->opts->timeout; # Fetch the results by running the checks and adding data to each one's # hashref fetch $mp, $checks; # Add performance data based on the checks' outcome perfdata $mp, $checks; # Select exit codes and messages based on the checks' outcome, and exit # appropriately check $mp, $checks; # Shouldn't ever get here return; } # Main function, entry point sub main { # Build Monitoring::Plugin object my $mp = Monitoring::Plugin->new( usage => 'Usage: %s' . ' [--warning|-w THRESHOLD]' . ' [--critical|-c THRESHOLD] ' . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]', version => $VERSION, ); # Run the check command, catching exceptions for plugin exit eval { run $mp } or $mp->plugin_die($EVAL_ERROR); # Shouldn't ever get here return; } main();