1 |
stephdl |
1.1 |
--- smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/sudoers/00zabbixAgentAlias.megaraid_parser 2009-04-24 22:28:07.000000000 +0200 |
2 |
|
|
+++ smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/sudoers/00zabbixAgentAlias 2009-04-26 18:43:19.000000000 +0200 |
3 |
|
|
@@ -1,7 +1,7 @@ |
4 |
|
|
{ |
5 |
|
|
my $runasroot = '/usr/bin/mysqladmin status, /sbin/e-smith/db yum_updates show, /var/lib/zabbix/bin/sensors *'; |
6 |
|
|
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){ |
7 |
|
|
- $runasroot .= ', /opt/MegaRAID/MegaCli/MegaCli -ldinfo -Lall -Aall'; |
8 |
|
|
+ $runasroot .= ', /opt/MegaRAID/MegaCli/MegaCli *'; |
9 |
|
|
} |
10 |
|
|
$runasroot .= "\n"; |
11 |
|
|
$OUT .= 'Cmnd_Alias ZABBIX_AGENT_ROOT = '.$runasroot |
12 |
|
|
--- smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/zabbix/zabbix_agentd.conf/90UserParameters_megaRaid.megaraid_parser 2009-04-24 22:28:07.000000000 +0200 |
13 |
|
|
+++ smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/zabbix/zabbix_agentd.conf/90UserParameters_megaRaid 2009-04-26 19:39:57.000000000 +0200 |
14 |
|
|
@@ -15,13 +15,12 @@ |
15 |
|
|
# Show Value: As is |
16 |
|
|
|
17 |
|
|
# The value reported is like: |
18 |
|
|
-# State: Optimal |
19 |
|
|
-# State: Degraded |
20 |
|
|
+# State: OK: 0:0:RAID-1:2 drives:68GB:Optimal 0:1:RAID-5:4 drives:837GB:Optimal Drives:7 |
21 |
|
|
# |
22 |
|
|
|
23 |
|
|
# Tips: You can add a simple trigger on this check like: |
24 |
|
|
-# \{ hostname:raid.mega.status.str( Degraded ) \}=1 |
25 |
|
|
-UserParameter=raid.mega.status,/usr/bin/sudo /opt/MegaRAID/MegaCli/MegaCli -ldinfo -Lall -Aall | grep "^State:" |
26 |
|
|
+# \{ hostname:raid.mega.status.str( OK ) \}=0 |
27 |
|
|
+UserParameter=raid.mega.status,/var/lib/zabbix/bin/megaraid-parser.pl |
28 |
|
|
|
29 |
|
|
HERE |
30 |
|
|
} |
31 |
|
|
--- smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/megaraid-parser.pl.megaraid_parser 2009-04-26 18:39:10.000000000 +0200 |
32 |
|
|
+++ smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/megaraid-parser.pl 2009-04-26 18:38:39.000000000 +0200 |
33 |
|
|
@@ -0,0 +1,194 @@ |
34 |
|
|
+#!/usr/bin/perl -w |
35 |
|
|
+ |
36 |
|
|
+# check_megaraid_sas Nagios plugin |
37 |
|
|
+# Copyright (C) 2007 Jonathan Delgado, delgado@molbio.mgh.harvard.edu |
38 |
|
|
+# |
39 |
|
|
+# This program is free software; you can redistribute it and/or |
40 |
|
|
+# modify it under the terms of the GNU General Public License |
41 |
|
|
+# as published by the Free Software Foundation; either version 2 |
42 |
|
|
+# of the License, or (at your option) any later version. |
43 |
|
|
+# |
44 |
|
|
+# This program is distributed in the hope that it will be useful, |
45 |
|
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
46 |
|
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
47 |
|
|
+# GNU General Public License for more details. |
48 |
|
|
+# |
49 |
|
|
+# You should have received a copy of the GNU General Public License |
50 |
|
|
+# along with this program; if not, write to the Free Software |
51 |
|
|
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
52 |
|
|
+# |
53 |
|
|
+# |
54 |
|
|
+# Nagios plugin to monitor the status of volumes attached to a LSI Megaraid SAS |
55 |
|
|
+# controller, such as the Dell PERC5/i and PERC5/e. If you have any hotspares |
56 |
|
|
+# attached to the controller, you can specify the number you should expect to |
57 |
|
|
+# find with the '-s' flag. |
58 |
|
|
+# |
59 |
|
|
+# The paths for the Nagios plugins lib and MegaCli may need to me changed. |
60 |
|
|
+# |
61 |
|
|
+# $Author: delgado $ |
62 |
|
|
+# $Revision: #3 $ $Date: 2007/06/07 $ |
63 |
|
|
+ |
64 |
|
|
+# Slightly modified by Daniel B. for SME Server integration with zabbix |
65 |
|
|
+# 23 Apr 2009 |
66 |
|
|
+ |
67 |
|
|
+use strict; |
68 |
|
|
+use Getopt::Std; |
69 |
|
|
+ |
70 |
|
|
+our($opt_h, $opt_s); |
71 |
|
|
+ |
72 |
|
|
+ |
73 |
|
|
+getopts('hs:'); |
74 |
|
|
+ |
75 |
|
|
+if ( $opt_h ) { |
76 |
|
|
+ print "usage w/o hotspare: $0\n"; |
77 |
|
|
+ print " w/ hotspare: $0 -s <number of hotspares>\n"; |
78 |
|
|
+ exit; |
79 |
|
|
+} |
80 |
|
|
+ |
81 |
|
|
+my $megacli = '/usr/bin/sudo /opt/MegaRAID/MegaCli/MegaCli'; |
82 |
|
|
+ |
83 |
|
|
+## Return codes for Nagios |
84 |
|
|
+my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
85 |
|
|
+ |
86 |
|
|
+my ($adapters); |
87 |
|
|
+my $hotspares = 0; |
88 |
|
|
+my $hotsparecount = 0; |
89 |
|
|
+my $pdbad = 0; |
90 |
|
|
+my $pdcount = 0; |
91 |
|
|
+my $pderrors = 0; |
92 |
|
|
+my $result = ''; |
93 |
|
|
+my $status = 'OK'; |
94 |
|
|
+ |
95 |
|
|
+sub max_state ($$) { |
96 |
|
|
+ my ($current, $compare) = @_; |
97 |
|
|
+ |
98 |
|
|
+ if (($compare eq 'CRITICAL') || ($compare eq 'CRITICAL')) { |
99 |
|
|
+ return 'CRITICAL'; |
100 |
|
|
+ } elsif ($compare eq 'OK') { |
101 |
|
|
+ return $current; |
102 |
|
|
+ } elsif ($compare eq 'WARNING') { |
103 |
|
|
+ return 'WARNING'; |
104 |
|
|
+ } elsif (($compare eq 'UNKNOWN') && ($current eq 'OK')) { |
105 |
|
|
+ return 'UNKNOWN'; |
106 |
|
|
+ } else { |
107 |
|
|
+ return $current; |
108 |
|
|
+ } |
109 |
|
|
+} |
110 |
|
|
+ |
111 |
|
|
+ |
112 |
|
|
+if ( $opt_s ) { |
113 |
|
|
+ $hotspares = $opt_s; |
114 |
|
|
+} |
115 |
|
|
+ |
116 |
|
|
+# Get the number of RAID controllers we have |
117 |
|
|
+open (ADPCOUNT, "$megacli -adpCount |") |
118 |
|
|
+ || die "error: Could not execute MegaCli -adpCount"; |
119 |
|
|
+ |
120 |
|
|
+while (<ADPCOUNT>) { |
121 |
|
|
+ if ( m/Controller Count:\s*(\d+)/ ) { |
122 |
|
|
+ $adapters = $1; |
123 |
|
|
+ last; |
124 |
|
|
+ } |
125 |
|
|
+} |
126 |
|
|
+close ADPCOUNT; |
127 |
|
|
+ |
128 |
|
|
+ADAPTER: for ( my $adp = 0; $adp < $adapters; $adp++ ) { |
129 |
|
|
+ # Get the number of logical drives on this adapter |
130 |
|
|
+ open (LDGETNUM, "$megacli -LdGetNum -a$adp |") |
131 |
|
|
+ || die "error: Could not execute $megacli -LdGetNum -a$adp"; |
132 |
|
|
+ |
133 |
|
|
+ my ($ldnum); |
134 |
|
|
+ while (<LDGETNUM>) { |
135 |
|
|
+ if ( m/Number of Virtual drives configured on adapter \d:\s*(\d+)/ ) { |
136 |
|
|
+ $ldnum = $1; |
137 |
|
|
+ last; |
138 |
|
|
+ } |
139 |
|
|
+ } |
140 |
|
|
+ close LDGETNUM; |
141 |
|
|
+ |
142 |
|
|
+ LDISK: for ( my $ld = 0; $ld < $ldnum; $ld++ ) { |
143 |
|
|
+ # Get info on this particular logical drive |
144 |
|
|
+ open (LDINFO, "$megacli -LdInfo -L$ld -a$adp |") |
145 |
|
|
+ || die "error: Could not execute $megacli -LdInfo -L$ld -a$adp"; |
146 |
|
|
+ |
147 |
|
|
+ my ($size, $unit, $raidlevel, $ldpdcount, $state); |
148 |
|
|
+ while (<LDINFO>) { |
149 |
|
|
+ if ( m/Size:\s*((\d+)(MB|GB|TB))/ ) { |
150 |
|
|
+ $size = $2; |
151 |
|
|
+ $unit = $3; |
152 |
|
|
+ # Adjust MB to GB if that's what we got |
153 |
|
|
+ if ( $unit eq 'MB' ) { |
154 |
|
|
+ $size = sprintf( "%.0f", ($size / 1024) ); |
155 |
|
|
+ $unit= 'GB'; |
156 |
|
|
+ } |
157 |
|
|
+ } elsif ( m/State:\s*(\w+)/ ) { |
158 |
|
|
+ $state = $1; |
159 |
|
|
+ if ( $state ne 'Optimal' ) { |
160 |
|
|
+ $status = 'CRITICAL'; |
161 |
|
|
+ } |
162 |
|
|
+ } elsif ( m/Number Of Drives:\s*(\d+)/ ) { |
163 |
|
|
+ $ldpdcount = $1; |
164 |
|
|
+ } elsif ( m/RAID Level: Primary-(\d)/ ) { |
165 |
|
|
+ $raidlevel = $1; |
166 |
|
|
+ } |
167 |
|
|
+ } |
168 |
|
|
+ close LDINFO; |
169 |
|
|
+ |
170 |
|
|
+ $result .= "$adp:$ld:RAID-$raidlevel:$ldpdcount drives:$size$unit:$state "; |
171 |
|
|
+ |
172 |
|
|
+ } #LDISK |
173 |
|
|
+ close LDINFO; |
174 |
|
|
+ |
175 |
|
|
+ # Get info on physical disks for this adapter |
176 |
|
|
+ open (PDLIST, "$megacli -PdList -a$adp |") |
177 |
|
|
+ || die "error: Could not execute $megacli -PdList -a$adp"; |
178 |
|
|
+ |
179 |
|
|
+ my ($slotnumber,$fwstate); |
180 |
|
|
+ PDISKS: while (<PDLIST>) { |
181 |
|
|
+ if ( m/Slot Number:\s*(\d+)/ ) { |
182 |
|
|
+ $slotnumber = $1; |
183 |
|
|
+ $pdcount++ unless ( $slotnumber == 255 ); |
184 |
|
|
+ } elsif ( m/Error Count:\s*(\d+)/ ) { |
185 |
|
|
+ $pderrors += $1; |
186 |
|
|
+ } elsif ( m/Predictive Failure Count:\s*(\d+)/ ) { |
187 |
|
|
+ $pderrors += $1; |
188 |
|
|
+ } elsif ( m/Firmware state:\s*(\w+)/ ) { |
189 |
|
|
+ $fwstate = $1; |
190 |
|
|
+ if ( $fwstate eq 'Hotspare' ) { |
191 |
|
|
+ $hotsparecount++; |
192 |
|
|
+ } elsif ( $fwstate eq 'Online' ) { |
193 |
|
|
+ # Do nothing |
194 |
|
|
+ } elsif ( $slotnumber != 255 ) { |
195 |
|
|
+ $pdbad++; |
196 |
|
|
+ $status = 'CRITICAL'; |
197 |
|
|
+ } |
198 |
|
|
+ } |
199 |
|
|
+ } #PDISKS |
200 |
|
|
+ close PDLIST; |
201 |
|
|
+} |
202 |
|
|
+ |
203 |
|
|
+$result .= "Drives:$pdcount "; |
204 |
|
|
+ |
205 |
|
|
+# Any bad disks? |
206 |
|
|
+if ( $pdbad ) { |
207 |
|
|
+ $result .= "$pdbad Bad Drives "; |
208 |
|
|
+} |
209 |
|
|
+ |
210 |
|
|
+# Were there any errors? |
211 |
|
|
+if ( $pderrors ) { |
212 |
|
|
+ $result .= "($pderrors Errors) "; |
213 |
|
|
+ $status = max_state($status, 'WARNING'); |
214 |
|
|
+} |
215 |
|
|
+ |
216 |
|
|
+# Do we have as many hotspares as expected (if any) |
217 |
|
|
+if ( $hotspares ) { |
218 |
|
|
+ if ( $hotsparecount < $hotspares ) { |
219 |
|
|
+ $status = max_state($status, 'WARNING'); |
220 |
|
|
+ $result .= "Hotspare(s):$hotsparecount (of $hotspares)"; |
221 |
|
|
+ } else { |
222 |
|
|
+ $result .= "Hotspare(s):$hotsparecount"; |
223 |
|
|
+ } |
224 |
|
|
+} |
225 |
|
|
+ |
226 |
|
|
+print STDOUT "$status: $result\n"; |
227 |
|
|
+exit $ERRORS{$status}; |