aboutsummaryrefslogtreecommitdiff
path: root/check_nrpe_cluster
blob: 94a7fa2c9509af274cf6374ce8edd9dc9ec763c9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#!/usr/bin/env perl

#
# Run two or more NRPE checks and return a status based on their aggregated
# results, similar to check_cluster. fork(3)s ahoy!
#
# Author: Tom Ryder <tom@sanctum.geek.nz>
# Copyright: 2017 Tom Ryder
# License: MIT (see LICENSE)
#
package main;

# Force me to write this properly
use strict;
use warnings;
use utf8;

# Require at least this Perl version
# Nothing in here should need a modern Perl
use 5.006;

# Import required modules
use English qw(-no_match_vars);
use IPC::Run3;
use Monitoring::Plugin qw(%ERRORS);

# Decree package version
our $VERSION = '2.02';

# Add warning and critical options
our @OPTS = (
    {
        spec  => 'warning|w=s',
        help  => 'Warning threshold for the number of OK checks',
        label => 'THRESHOLD',
    },
    {
        spec  => 'critical|c=s',
        help  => 'Critical threshold for the number of OK checks',
        label => 'THRESHOLD',
    },
);

# Regular expressions
our %RES = (

    # Single HOSTNAME:CHECK pair from the command line
    host_check_pair => qr{
        \A        # Start of string
        ([^:,]+)  # Hostname
        :         # Colon
        ([^:,]+)  # Check command
        \z        # End of string
    }msx,

    # Junk to remove from stdout
    stdout_junk => qr{
        (?:  # Start of non-matching alternating group
            [|]  # Pipe character (denoting start of perfdata)
            |    # or
            \v   # Vertical whitespace
        )    # End of group
        .*   # All the rest
        \z   # End of string
    }msx,
);

# Build a list of check definitions from a HOST:CHECK string
sub build {
    my $def = shift;

    # Split HOST:CHECK definition string up, check it meets the minimum
    ( my @defs = split m/,/msx, $def ) >= 2
      or die "Need at least two HOST:CHECK pairs\n";

    # Build a list of check hashrefs with hostname and check command name
    my @checks;
    for my $def (@defs) {
        my %check;
        @check{qw(host check)} = $def =~ $RES{host_check_pair}
          or die "Malformed HOST:CHECK string: $def\n";
        push @checks, \%check;
    }

    # Done, return a reference to the list of check hashrefs
    return \@checks;
}

# Run the checks and collect exit values and output
sub fetch {
    my ( $mp, $checks ) = @_;

    # Figure out where check_nrpe should be, and ensure it's there
    my $pdir = $ENV{NAGIOS_PLUGINS_DIR} || '/usr/local/nagios/libexec';
    my $nrpe = "$pdir/check_nrpe";
    -e $nrpe or die "$nrpe does not exist\n";
    -x $nrpe or die "$nrpe is not executable\n";

    # Iterate through the checks and collect exit value and output
    for my $check ( @{$checks} ) {

        # Build command
        $check->{command} =
          [ $nrpe, '-H', $check->{host}, '-c', $check->{check} ];

        # Run command and save output and exit value; emit errors
        run3 $check->{command}, \undef, \$check->{stdout};
        $check->{exit} = $CHILD_ERROR >> 8;
    }

    # Done, we added the check results in-place with the commands
    return;
}

# Select exit codes and messages based on the checks' outcomes, and exit
# appropriately
sub check {
    my ( $mp, $checks ) = @_;

    # Count the number of commands that exited 0, and the ones that didn't
    my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
    my $fail = @{$checks} - $pass;

    # Figure out appropriate exit code and primary message
    my $code = $mp->check_threshold(
        check    => $pass,
        warning  => $mp->opts->warning,
        critical => $mp->opts->critical,
    );
    my $message = "$pass passes, $fail failures";
    $mp->add_message( $code, $message );

    # Iterate through the performed checks and add messages to the output with
    # their details and results
    for my $check ( @{$checks} ) {

        # Truncate to first line before any performance data
        my $out = $check->{stdout} || '[no output]';
        $out =~ s{ $RES{stdout_junk} }{...}msx;

        # Add the check's details including the truncated output
        $mp->add_message( $ERRORS{OK},
            "$check->{host}:$check->{check} <$check->{exit}> $out" );
    }

    # Form messages and exit
    $mp->plugin_exit(
        $mp->check_messages(
            join     => q{, },
            join_all => q{, },
        ),
    );

    # Should never get here
    return;
}

# Add performance data about the results to the plugin object
sub perfdata {
    my ( $mp, $checks ) = @_;

    # Count the number of commands that exited 0, and the ones that didn't
    my $pass = grep { exists $_->{exit} and $_->{exit} == 0 } @{$checks};
    my $fail = @{$checks} - $pass;

    # Add that as performance data
    $mp->add_perfdata(
        label    => $pass,
        value    => 'pass',
        warning  => $mp->opts->warning,
        critical => $mp->opts->critical,
    );
    $mp->add_perfdata(
        label => $fail,
        value => 'fail',
    );

    # All done, we edited the object in place
    return;
}

# Given a plugin object, try to run the check
sub run {
    my $mp = shift;

    # Add custom options and read all options
    for my $opt (@OPTS) {
        $mp->add_arg( %{$opt} );
    }
    $mp->getopts();

    # Validate and build the list of checks from the remaining arguments
    @ARGV == 1
      or die "Need a comma-separated HOST:CHECK list\n";
    my $checks = build $ARGV[0];

    # Start counting down to timeout
    alarm $mp->opts->timeout;

    # Fetch the results by running the checks and adding data to each one's
    # hashref
    fetch $mp, $checks;

    # Add performance data based on the checks' outcome
    perfdata $mp, $checks;

    # Select exit codes and messages based on the checks' outcome, and exit
    # appropriately
    check $mp, $checks;

    # Shouldn't ever get here
    return;
}

# Main function, entry point
sub main {

    # Build Monitoring::Plugin object
    my $mp = Monitoring::Plugin->new(
        usage => 'Usage: %s'
          . ' [--warning|-w THRESHOLD]'
          . ' [--critical|-c THRESHOLD] '
          . 'HOSTNAME1:CHECK1,HOSTNAME2:CHECK2[,HOSTNAME3:CHECK3...]',
        version => $VERSION,
    );

    # Run the check command, catching exceptions for plugin exit
    eval { run $mp } or $mp->plugin_die($EVAL_ERROR);

    # Shouldn't ever get here
    return;
}
main();