aboutsummaryrefslogblamecommitdiff
path: root/check_nrpe_cluster
blob: dca66be1e7a7a23385a065873ffe8d6f0cba5601 (plain) (tree)
1
2
3
4
5
6
7
8
9



                                                                           
                                                   

                                         
                 
 
                                          




                                 
 
                                    


                        
                    

                         

                                                             

                                                           
                                                             

                                  










                                                                
  






















                                                              

  















                                                                         
 

                                                            

 






















                                                                       
 










































                                                                              

 





















                                                                          

 






                                             
     






















                                                                           

 

                            
 















                                                                      
#!/usr/bin/env perl

#
# Run two or more NRPE checks and return a status based on their aggregated
# results, similar to check_cluster. fork(3)s ahoy!
#
# Author: Tom Ryder <tom@sanctum.geek.nz>
# Copyright: 2017
#
package Monitoring::Plugin::NRPE::Cluster;

# Force me to write this properly
use strict;
use warnings;
use utf8;

# Require at least this Perl version
use 5.010;

# Decree package version
our $VERSION = 2.01;

# Import required modules
use Carp;                          # core
use Const::Fast;                   # dpkg: libconst-fast-perl
use English qw(-no_match_vars);    # dpkg: perl-core
use IPC::Run3;                     # dpkg: libipc-run3-perl
use Monitoring::Plugin;            # cpanm Monitoring::Plugin

# Add warning and critical options
const our @OPTS => (
    {
        spec => 'warning|w=s',
        help => "-w, --warning=THRESHOLD\n"
          . '   Warning threshold for the number of OK checks',
    },
    {
        spec => 'critical|c=s',
        help => "-c, --critical=THRESHOLD\n"
          . '   Critical threshold for the number of OK checks',
    },
);

# Regular expressions
const our %RES => (

    # Single HOSTNAME:CHECK pair from the command line
    host_check_pair => qr{
        \A         # Start of string
        ([^:,]++)  # Hostname
        :          # Colon
        ([^:,]++)  # Check command
        \z         # End of string
    }msx,

    # Junk to remove from stdout
    stdout_junk => qr{
        (?:  # Start of non-matching alternating group
            [|]  # Pipe character (denoting start of perfdata)
            |    # or
            \v   # Vertical whitespace
        )    # End of group
        .*   # All the rest
        \z   # End of string
    }msx,
);

# Build a list of check definitions from a HOST:CHECK string
sub build {
    my $def = shift;

    # Split HOST:CHECK definition string up, check it meets the minimum
    ( my @defs = split /,/msx, $def ) >= 2
      or croak 'Need at least two HOST:CHECK pairs';

    # Build a list of check hashrefs with hostname and check command name
    my @checks;
    for my $def (@defs) {
        my %check;
        @check{qw(host check)} = $def =~ $RES{host_check_pair}
          or croak "Malformed HOST:CHECK string: $def";
        push @checks, \%check;
    }

    # Done, return a reference to the list of check hashrefs
    return \@checks;
}

# Run the checks and collect exit values and output
sub fetch {
    my ( $mp, $checks ) = @_;

    # Figure out where check_nrpe should be, and ensure it's there
    my $pdir = $ENV{NAGIOS_PLUGINS_DIR} // '/usr/local/nagios/libexec';
    my $nrpe = "$pdir/check_nrpe";
    -x $nrpe or croak "$nrpe does not exist";

    # Iterate through the checks and collect exit value and output
    for my $check ( @{$checks} ) {

        # Build command
        $check->{command} =
          [ $nrpe, '-H', $check->{host}, '-c', $check->{check} ];

        # Run command and save output and exit value; emit errors
        run3 $check->{command}, \undef, \$check->{stdout};
        $check->{exit} = $CHILD_ERROR >> 8;
    }

    # Done, we added the check results in-place with the commands
    return;
}

# Select exit codes and messages based on the checks' outcomes, and exit
# appropriately
sub check {
    my ( $mp, $checks ) = @_;

    # Count the number of commands that exited 0, and the ones that didn't
    my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
    my $fail = @{$checks} - $pass;

    # Figure out appropriate exit code and primary message
    my $code = $mp->check_threshold(
        check    => $pass,
        warning  => $mp->opts->warning,
        critical => $mp->opts->critical,
    );
    my $message = "$pass passes, $fail failures";
    $mp->add_message( $code, $message );

    # Iterate through the performed checks and add messages to the output with
    # their details and results
    for my $check ( @{$checks} ) {

        # Truncate to first line before any performance data
        my $out = $check->{stdout} || '[no output]';
        $out =~ s{ $RES{stdout_junk} }{...}msx;

        # Add the check's details including the truncated output
        $mp->add_message( $code,
            "$check->{host}:$check->{check} <$check->{exit}> $out",
        );
    }

    # Form messages and exit
    $mp->plugin_exit(
        $mp->check_messages(
            join     => q{, },
            join_all => q{, },
        ),
    );

    # Should never get here
    return;
}

# Add performance data about the results to the plugin object
sub perfdata {
    my ( $mp, $checks ) = @_;

    # Count the number of commands that exited 0, and the ones that didn't
    my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
    my $fail = @{$checks} - $pass;

    # Add that as performance data
    $mp->add_perfdata(
        label    => $pass,
        value    => 'pass',
        warning  => $mp->opts->warning,
        critical => $mp->opts->critical,
    );
    $mp->add_perfdata(
        label => $fail,
        value => 'fail',
    );

    # All done, we edited the object in place
    return;
}

# Given a plugin object, try to run the check
sub run {
    my $mp = shift;

    # Add custom options and read all options
    for my $opt (@OPTS) {
        $mp->add_arg( %{$opt} );
    }
    $mp->getopts();

    # Validate and build the list of checks from the remaining arguments
    @ARGV == 1
      or croak 'Need a comma-separated HOST:CHECK list';
    my $checks = build $ARGV[0];

    # Start counting down to timeout
    alarm $mp->opts->timeout;

    # Fetch the results by running the checks and adding data to each one's
    # hashref
    fetch $mp, $checks;

    # Add performance data based on the checks' outcome
    perfdata $mp, $checks;

    # Select exit codes and messages based on the checks' outcome, and exit
    # appropriately
    check $mp, $checks;

    # Shouldn't ever get here
    return;
}

# Main function, entry point
sub main {

    # Build Monitoring::Plugin object
    my $mp = Monitoring::Plugin->new(
        usage => 'Usage: %s'
          . ' [--warning|-w THRESHOLD]'
          . ' [--critical|-c THRESHOLD] '
          . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]',
        version => $VERSION,
    );

    # Run the check command, catching exceptions for plugin exit
    eval { run $mp } or $mp->plugin_die($EVAL_ERROR);

    # Shouldn't ever get here
    return;
}
main();