1 |
stephdl |
1.1 |
--- smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/megaraid-parser.pl.megaraid_options 2010-02-17 13:44:48.000000000 +0100 |
2 |
|
|
+++ smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/megaraid-parser.pl 2010-02-17 13:45:29.000000000 +0100 |
3 |
|
|
@@ -34,17 +34,21 @@ |
4 |
|
|
use strict; |
5 |
|
|
use Getopt::Std; |
6 |
|
|
|
7 |
|
|
-our($opt_h, $opt_s); |
8 |
|
|
+our($opt_h, $opt_s, $opt_o, $opt_m, $opt_p); |
9 |
|
|
|
10 |
|
|
|
11 |
|
|
-getopts('hs:'); |
12 |
|
|
+getopts('hs:o:p:m:'); |
13 |
|
|
|
14 |
|
|
if ( $opt_h ) { |
15 |
|
|
- print "usage w/o hotspare: $0\n"; |
16 |
|
|
- print " w/ hotspare: $0 -s <number of hotspares>\n"; |
17 |
|
|
+ print "Usage: $0 [-s number] [-m number] [-o number]\n"; |
18 |
|
|
+ print " -s is how many hotspares are attached to the controller\n"; |
19 |
|
|
+ print " -m is the number of media errors to ignore\n"; |
20 |
|
|
+ print " -p is the predictive error count to ignore\n"; |
21 |
|
|
+ print " -o is the number of other disk errors to ignore\n"; |
22 |
|
|
exit; |
23 |
|
|
} |
24 |
|
|
|
25 |
|
|
+ |
26 |
|
|
my $megacli = '/opt/MegaRAID/MegaCli/MegaCli'; |
27 |
|
|
|
28 |
|
|
## Return codes for Nagios |
29 |
|
|
@@ -55,7 +59,12 @@ |
30 |
|
|
my $hotsparecount = 0; |
31 |
|
|
my $pdbad = 0; |
32 |
|
|
my $pdcount = 0; |
33 |
|
|
-my $pderrors = 0; |
34 |
|
|
+my $mediaerrors = 0; |
35 |
|
|
+my $mediaallow = 0; |
36 |
|
|
+my $prederrors = 0; |
37 |
|
|
+my $predallow = 0; |
38 |
|
|
+my $othererrors = 0; |
39 |
|
|
+my $otherallow = 0; |
40 |
|
|
my $result = ''; |
41 |
|
|
my $status = 'OK'; |
42 |
|
|
|
43 |
|
|
@@ -79,6 +88,15 @@ |
44 |
|
|
if ( $opt_s ) { |
45 |
|
|
$hotspares = $opt_s; |
46 |
|
|
} |
47 |
|
|
+if ( $opt_m ) { |
48 |
|
|
+ $mediaallow = $opt_m; |
49 |
|
|
+} |
50 |
|
|
+if ( $opt_p ) { |
51 |
|
|
+ $predallow = $opt_p; |
52 |
|
|
+} |
53 |
|
|
+if ( $opt_o ) { |
54 |
|
|
+ $otherallow = $opt_o; |
55 |
|
|
+} |
56 |
|
|
|
57 |
|
|
# Get the number of RAID controllers we have |
58 |
|
|
open (ADPCOUNT, "$megacli -adpCount |") |
59 |
|
|
@@ -150,11 +168,17 @@ |
60 |
|
|
PDISKS: while (<PDLIST>) { |
61 |
|
|
if ( m/Slot Number:\s*(\d+)/ ) { |
62 |
|
|
$slotnumber = $1; |
63 |
|
|
- $pdcount++ unless ( $slotnumber == 255 ); |
64 |
|
|
- } elsif ( m/Error Count:\s*(\d+)/ ) { |
65 |
|
|
- $pderrors += $1; |
66 |
|
|
+ # Don't care about backplane error counts |
67 |
|
|
+ next if ( $slotnumber == 255 ); |
68 |
|
|
+ $pdcount++; |
69 |
|
|
+ } elsif ( m/(\w+) Error Count:\s*(\d+)/ ) { |
70 |
|
|
+ if ( $1 eq 'Media') { |
71 |
|
|
+ $mediaerrors += $2; |
72 |
|
|
+ } else { |
73 |
|
|
+ $othererrors += $2; |
74 |
|
|
+ } |
75 |
|
|
} elsif ( m/Predictive Failure Count:\s*(\d+)/ ) { |
76 |
|
|
- $pderrors += $1; |
77 |
|
|
+ $prederrors += $1; |
78 |
|
|
} elsif ( m/Firmware state:\s*(\w+)/ ) { |
79 |
|
|
$fwstate = $1; |
80 |
|
|
if ( $fwstate eq 'Hotspare' ) { |
81 |
|
|
@@ -177,10 +201,15 @@ |
82 |
|
|
$result .= "$pdbad Bad Drives "; |
83 |
|
|
} |
84 |
|
|
|
85 |
|
|
+my $errorcount = $mediaerrors + $prederrors + $othererrors; |
86 |
|
|
# Were there any errors? |
87 |
|
|
-if ( $pderrors ) { |
88 |
|
|
- $result .= "($pderrors Errors) "; |
89 |
|
|
- $status = max_state($status, 'WARNING'); |
90 |
|
|
+if ( $errorcount ) { |
91 |
|
|
+ $result .= "($errorcount Errors) "; |
92 |
|
|
+ if ( ( $mediaerrors > $mediaallow ) || |
93 |
|
|
+ ( $prederrors > $predallow ) || |
94 |
|
|
+ ( $othererrors > $otherallow ) ) { |
95 |
|
|
+ $status = max_state($status, 'WARNING'); |
96 |
|
|
+ } |
97 |
|
|
} |
98 |
|
|
|
99 |
|
|
# Do we have as many hotspares as expected (if any) |