aboutsummaryrefslogblamecommitdiff
path: root/check_nrpe_cluster
blob: 171d172a1e618a61a96d19e2f39d3021fea68bbb (plain) (tree)
1
2
3
4
5
6
7
8
9



                                                                           
                                                   

                                         

                            
 
                                          




                                 
 
                                    

                                           

                         





                                   

                                  
             
     


                                                                 

      


                                                                  
      
  

                     
            


                                                      




                                   











                                                              

  




                                                                       

                                                    





                                                                         
                                                       

                              
 

                                                            

 




                                                                  
                                                                       
                                  

                                                














                                                                  
 



























                                                                              

                                                                     











                              

 





















                                                                          

 






                                             
     



                                                                        
                                                        

















                                                                           

 

                            
 















                                                                      
#!/usr/bin/env perl

#
# Run two or more NRPE checks and return a status based on their aggregated
# results, similar to check_cluster. fork(3)s ahoy!
#
# Author: Tom Ryder <tom@sanctum.geek.nz>
# Copyright: 2017 Tom Ryder
# License: MIT (see LICENSE)
#
package Monitoring::Plugin::NRPE::Cluster;

# Force me to write this properly
use strict;
use warnings;
use utf8;

# Require at least this Perl version
# Nothing in here should need a modern Perl
use 5.006;

# Import required modules
use English qw(-no_match_vars);
use IPC::Run3;
use Monitoring::Plugin qw(%ERRORS);

# Decree package version
our $VERSION = '2.02';

# Add warning and critical options
our @OPTS = (
    {
        spec  => 'warning|w=s',
        help  => 'Warning threshold for the number of OK checks',
        label => 'THRESHOLD',
    },
    {
        spec  => 'critical|c=s',
        help  => 'Critical threshold for the number of OK checks',
        label => 'THRESHOLD',
    },
);

# Regular expressions
our %RES = (

    # Single HOSTNAME:CHECK pair from the command line
    host_check_pair => qr{
        \A        # Start of string
        ([^:,]+)  # Hostname
        :         # Colon
        ([^:,]+)  # Check command
        \z        # End of string
    }msx,

    # Junk to remove from stdout
    stdout_junk => qr{
        (?:  # Start of non-matching alternating group
            [|]  # Pipe character (denoting start of perfdata)
            |    # or
            \v   # Vertical whitespace
        )    # End of group
        .*   # All the rest
        \z   # End of string
    }msx,
);

# Build a list of check definitions from a HOST:CHECK string
sub build {
    my $def = shift;

    # Split HOST:CHECK definition string up, check it meets the minimum
    ( my @defs = split m/,/msx, $def ) >= 2
      or die "Need at least two HOST:CHECK pairs\n";

    # Build a list of check hashrefs with hostname and check command name
    my @checks;
    for my $def (@defs) {
        my %check;
        @check{qw(host check)} = $def =~ $RES{host_check_pair}
          or die "Malformed HOST:CHECK string: $def\n";
        push @checks, \%check;
    }

    # Done, return a reference to the list of check hashrefs
    return \@checks;
}

# Run the checks and collect exit values and output
sub fetch {
    my ( $mp, $checks ) = @_;

    # Figure out where check_nrpe should be, and ensure it's there
    my $pdir = $ENV{NAGIOS_PLUGINS_DIR} || '/usr/local/nagios/libexec';
    my $nrpe = "$pdir/check_nrpe";
    -e $nrpe or die "$nrpe does not exist\n";
    -x $nrpe or die "$nrpe is not executable\n";

    # Iterate through the checks and collect exit value and output
    for my $check ( @{$checks} ) {

        # Build command
        $check->{command} =
          [ $nrpe, '-H', $check->{host}, '-c', $check->{check} ];

        # Run command and save output and exit value; emit errors
        run3 $check->{command}, \undef, \$check->{stdout};
        $check->{exit} = $CHILD_ERROR >> 8;
    }

    # Done, we added the check results in-place with the commands
    return;
}

# Select exit codes and messages based on the checks' outcomes, and exit
# appropriately
sub check {
    my ( $mp, $checks ) = @_;

    # Count the number of commands that exited 0, and the ones that didn't
    my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
    my $fail = @{$checks} - $pass;

    # Figure out appropriate exit code and primary message
    my $code = $mp->check_threshold(
        check    => $pass,
        warning  => $mp->opts->warning,
        critical => $mp->opts->critical,
    );
    my $message = "$pass passes, $fail failures";
    $mp->add_message( $code, $message );

    # Iterate through the performed checks and add messages to the output with
    # their details and results
    for my $check ( @{$checks} ) {

        # Truncate to first line before any performance data
        my $out = $check->{stdout} || '[no output]';
        $out =~ s{ $RES{stdout_junk} }{...}msx;

        # Add the check's details including the truncated output
        $mp->add_message( $ERRORS{OK},
            "$check->{host}:$check->{check} <$check->{exit}> $out" );
    }

    # Form messages and exit
    $mp->plugin_exit(
        $mp->check_messages(
            join     => q{, },
            join_all => q{, },
        ),
    );

    # Should never get here
    return;
}

# Add performance data about the results to the plugin object
sub perfdata {
    my ( $mp, $checks ) = @_;

    # Count the number of commands that exited 0, and the ones that didn't
    my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
    my $fail = @{$checks} - $pass;

    # Add that as performance data
    $mp->add_perfdata(
        label    => $pass,
        value    => 'pass',
        warning  => $mp->opts->warning,
        critical => $mp->opts->critical,
    );
    $mp->add_perfdata(
        label => $fail,
        value => 'fail',
    );

    # All done, we edited the object in place
    return;
}

# Given a plugin object, try to run the check
sub run {
    my $mp = shift;

    # Add custom options and read all options
    for my $opt (@OPTS) {
        $mp->add_arg( %{$opt} );
    }
    $mp->getopts();

    # Validate and build the list of checks from the remaining arguments
    @ARGV == 1
      or die "Need a comma-separated HOST:CHECK list\n";
    my $checks = build $ARGV[0];

    # Start counting down to timeout
    alarm $mp->opts->timeout;

    # Fetch the results by running the checks and adding data to each one's
    # hashref
    fetch $mp, $checks;

    # Add performance data based on the checks' outcome
    perfdata $mp, $checks;

    # Select exit codes and messages based on the checks' outcome, and exit
    # appropriately
    check $mp, $checks;

    # Shouldn't ever get here
    return;
}

# Main function, entry point
sub main {

    # Build Monitoring::Plugin object
    my $mp = Monitoring::Plugin->new(
        usage => 'Usage: %s'
          . ' [--warning|-w THRESHOLD]'
          . ' [--critical|-c THRESHOLD] '
          . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]',
        version => $VERSION,
    );

    # Run the check command, catching exceptions for plugin exit
    eval { run $mp } or $mp->plugin_die($EVAL_ERROR);

    # Shouldn't ever get here
    return;
}
main();