1 |
--- smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/zabbix/zabbix_agentd.conf/90UserParameters_swRaid.userparam_swRaid_status 2009-03-24 17:07:15.000000000 +0100 |
2 |
+++ smeserver-zabbix-agent-0.1/root/etc/e-smith/templates/etc/zabbix/zabbix_agentd.conf/90UserParameters_swRaid 2009-03-24 17:07:01.000000000 +0100 |
3 |
@@ -0,0 +1,6 @@ |
4 |
+ |
5 |
+# Report status of every Raid Array (parsing /proc/mdtstat) |
6 |
+# Just add a key named raid.sw.status, type caracter. Then A new trigger with an expression like that: |
7 |
+# \{hostname:raid.sw.status.str(CRITICAL)\}=1 |
8 |
+UserParameter=raid.sw.status,/var/lib/zabbix/bin/mdstat-parser.pl |
9 |
+ |
10 |
--- smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/mdstat-parser.pl.userparam_swRaid_status 2009-03-24 17:04:29.000000000 +0100 |
11 |
+++ smeserver-zabbix-agent-0.1/root/var/lib/zabbix/bin/mdstat-parser.pl 2009-03-24 17:04:58.000000000 +0100 |
12 |
@@ -0,0 +1,115 @@ |
13 |
+#!/usr/bin/env perl |
14 |
+ |
15 |
+# Get status of Linux software RAID for SNMP / Nagios |
16 |
+# Author: Michal Ludvig <michal@logix.cz> |
17 |
+# http://www.logix.cz/michal/devel/nagios |
18 |
+# |
19 |
+# Simple parser for /proc/mdstat that outputs status of all |
20 |
+# or some RAID devices. Possible results are OK and CRITICAL. |
21 |
+# It could eventually be extended to output WARNING result in |
22 |
+# case the array is being rebuilt or if there are still some |
23 |
+# spares remaining, but for now leave it as it is. |
24 |
+# |
25 |
+# To run the script remotely via SNMP daemon (net-snmp) add the |
26 |
+# following line to /etc/snmpd.conf: |
27 |
+# |
28 |
+# extend raid-md0 /root/parse-mdstat.pl --device=md0 |
29 |
+# |
30 |
+# The script result will be available e.g. with command: |
31 |
+# |
32 |
+# snmpwalk -v2c -c public localhost .1.3.6.1.4.1.8072.1.3.2 |
33 |
+ |
34 |
+use strict; |
35 |
+use Getopt::Long; |
36 |
+ |
37 |
+# Sample /proc/mdstat output: |
38 |
+# |
39 |
+# Personalities : [raid1] [raid5] |
40 |
+# md0 : active (read-only) raid1 sdc1[1] |
41 |
+# 2096384 blocks [2/1] [_U] |
42 |
+# |
43 |
+# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S) |
44 |
+# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U] |
45 |
+# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec |
46 |
+# |
47 |
+# unused devices: <none> |
48 |
+ |
49 |
+my $file = "/proc/mdstat"; |
50 |
+my $device = "all"; |
51 |
+ |
52 |
+# Get command line options. |
53 |
+GetOptions ('file=s' => \$file, |
54 |
+ 'device=s' => \$device, |
55 |
+ 'help' => sub { &usage() } ); |
56 |
+ |
57 |
+## Strip leading "/dev/" from --device in case it has been given |
58 |
+$device =~ s/^\/dev\///; |
59 |
+ |
60 |
+## Return codes for Nagios |
61 |
+my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); |
62 |
+ |
63 |
+## This is a global return value - set to the worst result we get overall |
64 |
+my $retval = 0; |
65 |
+ |
66 |
+my (%active_devs, %failed_devs, %spare_devs); |
67 |
+ |
68 |
+open FILE, "< $file" or die "Can't open $file : $!"; |
69 |
+while (<FILE>) { |
70 |
+ next if ! /^(md\d+)+\s*:/; |
71 |
+ next if $device ne "all" and $device ne $1; |
72 |
+ my $dev = $1; |
73 |
+ |
74 |
+ my @array = split(/ /); |
75 |
+ for $_ (@array) { |
76 |
+ next if ! /(\w+)\[\d+\](\(.\))*/; |
77 |
+ if ($2 eq "(F)") { |
78 |
+ $failed_devs{$dev} .= "$1,"; |
79 |
+ } |
80 |
+ elsif ($2 eq "(S)") { |
81 |
+ $spare_devs{$dev} .= "$1,"; |
82 |
+ } |
83 |
+ else { |
84 |
+ $active_devs{$dev} .= "$1,"; |
85 |
+ } |
86 |
+ } |
87 |
+ if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; } |
88 |
+ else { $active_devs{$dev} =~ s/,$//; } |
89 |
+ if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; } |
90 |
+ else { $spare_devs{$dev} =~ s/,$//; } |
91 |
+ if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; } |
92 |
+ else { $failed_devs{$dev} =~ s/,$//; } |
93 |
+ |
94 |
+ $_ = <FILE>; |
95 |
+ /\[(\d+)\/(\d+)\]\s+\[(.*)\]$/; |
96 |
+ my $devs_total = $1; |
97 |
+ my $devs_up = $2; |
98 |
+ my $stat = $3; |
99 |
+ my $result = "OK"; |
100 |
+ if ($devs_total > $devs_up or $failed_devs{$dev} ne "none") { |
101 |
+ $result = "CRITICAL"; |
102 |
+ $retval = $ERRORS{"CRITICAL"}; |
103 |
+ } |
104 |
+ |
105 |
+ print "$result - $dev [$stat] has $devs_up of $devs_total devices active (active=$active_devs{$dev} failed=$failed_devs{$dev} spare=$spare_devs{$dev})\n"; |
106 |
+} |
107 |
+close FILE; |
108 |
+exit $retval; |
109 |
+ |
110 |
+# ===== |
111 |
+sub usage() |
112 |
+{ |
113 |
+ printf(" |
114 |
+Check status of Linux SW RAID |
115 |
+ |
116 |
+Author: Michal Ludvig <michal\@logix.cz> (c) 2006 |
117 |
+ http://www.logix.cz/michal/devel/nagios |
118 |
+ |
119 |
+Usage: mdstat-parser.pl [options] |
120 |
+ |
121 |
+ --file=<filename> Name of file to parse. Default is /proc/mdstat |
122 |
+ --device=<device> Name of MD device, e.g. md0. Default is \"all\" |
123 |
+ |
124 |
+"); |
125 |
+ exit(1); |
126 |
+} |
127 |
+ |