1 |
brianr |
1.1 |
--- smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/zabbix/zabbix_agentd.conf/90UserParameters_swRaid.userparam_swRaid_status 2009-03-24 17:07:15.000000000 +0100 |
2 |
|
|
+++ smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/zabbix/zabbix_agentd.conf/90UserParameters_swRaid 2009-03-24 17:07:01.000000000 +0100 |
3 |
|
|
@@ -0,0 +1,6 @@ |
4 |
|
|
+ |
5 |
|
|
+# Report status of every Raid Array (parsing /proc/mdtstat) |
6 |
|
|
+# Just add a key named raid.sw.status, type caracter. Then A new trigger with an expression like that: |
7 |
|
|
+# \{hostname:raid.sw.status.str(CRITICAL)\}=1 |
8 |
|
|
+UserParameter=raid.sw.status,/var/lib/zabbix/bin/mdstat-parser.pl |
9 |
|
|
+ |
10 |
|
|
--- smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/mdstat-parser.pl.userparam_swRaid_status 2009-03-24 17:04:29.000000000 +0100 |
11 |
|
|
+++ smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/mdstat-parser.pl 2009-03-24 17:04:58.000000000 +0100 |
12 |
|
|
@@ -0,0 +1,115 @@ |
13 |
|
|
+#!/usr/bin/env perl |
14 |
|
|
+ |
15 |
|
|
+# Get status of Linux software RAID for SNMP / Nagios |
16 |
|
|
+# Author: Michal Ludvig <michal@logix.cz> |
17 |
|
|
+# http://www.logix.cz/michal/devel/nagios |
18 |
|
|
+# |
19 |
|
|
+# Simple parser for /proc/mdstat that outputs status of all |
20 |
|
|
+# or some RAID devices. Possible results are OK and CRITICAL. |
21 |
|
|
+# It could eventually be extended to output WARNING result in |
22 |
|
|
+# case the array is being rebuilt or if there are still some |
23 |
|
|
+# spares remaining, but for now leave it as it is. |
24 |
|
|
+# |
25 |
|
|
+# To run the script remotely via SNMP daemon (net-snmp) add the |
26 |
|
|
+# following line to /etc/snmpd.conf: |
27 |
|
|
+# |
28 |
|
|
+# extend raid-md0 /root/parse-mdstat.pl --device=md0 |
29 |
|
|
+# |
30 |
|
|
+# The script result will be available e.g. with command: |
31 |
|
|
+# |
32 |
|
|
+# snmpwalk -v2c -c public localhost .1.3.6.1.4.1.8072.1.3.2 |
33 |
|
|
+ |
34 |
|
|
+use strict; |
35 |
|
|
+use Getopt::Long; |
36 |
|
|
+ |
37 |
|
|
+# Sample /proc/mdstat output: |
38 |
|
|
+# |
39 |
|
|
+# Personalities : [raid1] [raid5] |
40 |
|
|
+# md0 : active (read-only) raid1 sdc1[1] |
41 |
|
|
+# 2096384 blocks [2/1] [_U] |
42 |
|
|
+# |
43 |
|
|
+# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S) |
44 |
|
|
+# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U] |
45 |
|
|
+# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec |
46 |
|
|
+# |
47 |
|
|
+# unused devices: <none> |
48 |
|
|
+ |
49 |
|
|
+my $file = "/proc/mdstat"; |
50 |
|
|
+my $device = "all"; |
51 |
|
|
+ |
52 |
|
|
+# Get command line options. |
53 |
|
|
+GetOptions ('file=s' => \$file, |
54 |
|
|
+ 'device=s' => \$device, |
55 |
|
|
+ 'help' => sub { &usage() } ); |
56 |
|
|
+ |
57 |
|
|
+## Strip leading "/dev/" from --device in case it has been given |
58 |
|
|
+$device =~ s/^\/dev\///; |
59 |
|
|
+ |
60 |
|
|
+## Return codes for Nagios |
61 |
|
|
+my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
62 |
|
|
+ |
63 |
|
|
+## This is a global return value - set to the worst result we get overall |
64 |
|
|
+my $retval = 0; |
65 |
|
|
+ |
66 |
|
|
+my (%active_devs, %failed_devs, %spare_devs); |
67 |
|
|
+ |
68 |
|
|
+open FILE, "< $file" or die "Can't open $file : $!"; |
69 |
|
|
+while (<FILE>) { |
70 |
|
|
+ next if ! /^(md\d+)+\s*:/; |
71 |
|
|
+ next if $device ne "all" and $device ne $1; |
72 |
|
|
+ my $dev = $1; |
73 |
|
|
+ |
74 |
|
|
+ my @array = split(/ /); |
75 |
|
|
+ for $_ (@array) { |
76 |
|
|
+ next if ! /(\w+)\[\d+\](\(.\))*/; |
77 |
|
|
+ if ($2 eq "(F)") { |
78 |
|
|
+ $failed_devs{$dev} .= "$1,"; |
79 |
|
|
+ } |
80 |
|
|
+ elsif ($2 eq "(S)") { |
81 |
|
|
+ $spare_devs{$dev} .= "$1,"; |
82 |
|
|
+ } |
83 |
|
|
+ else { |
84 |
|
|
+ $active_devs{$dev} .= "$1,"; |
85 |
|
|
+ } |
86 |
|
|
+ } |
87 |
|
|
+ if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; } |
88 |
|
|
+ else { $active_devs{$dev} =~ s/,$//; } |
89 |
|
|
+ if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; } |
90 |
|
|
+ else { $spare_devs{$dev} =~ s/,$//; } |
91 |
|
|
+ if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; } |
92 |
|
|
+ else { $failed_devs{$dev} =~ s/,$//; } |
93 |
|
|
+ |
94 |
|
|
+ $_ = <FILE>; |
95 |
|
|
+ /\[(\d+)\/(\d+)\]\s+\[(.*)\]$/; |
96 |
|
|
+ my $devs_total = $1; |
97 |
|
|
+ my $devs_up = $2; |
98 |
|
|
+ my $stat = $3; |
99 |
|
|
+ my $result = "OK"; |
100 |
|
|
+ if ($devs_total > $devs_up or $failed_devs{$dev} ne "none") { |
101 |
|
|
+ $result = "CRITICAL"; |
102 |
|
|
+ $retval = $ERRORS{"CRITICAL"}; |
103 |
|
|
+ } |
104 |
|
|
+ |
105 |
|
|
+ print "$result - $dev [$stat] has $devs_up of $devs_total devices active (active=$active_devs{$dev} failed=$failed_devs{$dev} spare=$spare_devs{$dev})\n"; |
106 |
|
|
+} |
107 |
|
|
+close FILE; |
108 |
|
|
+exit $retval; |
109 |
|
|
+ |
110 |
|
|
+# ===== |
111 |
|
|
+sub usage() |
112 |
|
|
+{ |
113 |
|
|
+ printf(" |
114 |
|
|
+Check status of Linux SW RAID |
115 |
|
|
+ |
116 |
|
|
+Author: Michal Ludvig <michal\@logix.cz> (c) 2006 |
117 |
|
|
+ http://www.logix.cz/michal/devel/nagios |
118 |
|
|
+ |
119 |
|
|
+Usage: mdstat-parser.pl [options] |
120 |
|
|
+ |
121 |
|
|
+ --file=<filename> Name of file to parse. Default is /proc/mdstat |
122 |
|
|
+ --device=<device> Name of MD device, e.g. md0. Default is \"all\" |
123 |
|
|
+ |
124 |
|
|
+"); |
125 |
|
|
+ exit(1); |
126 |
|
|
+} |
127 |
|
|
+ |