mirror of
https://github.com/thomas-krenn/check_lsi_raid.git
synced 2026-02-24 04:38:44 +01:00
897 lines
33 KiB
Perl
Executable File
897 lines
33 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
# ======================================================================================
|
|
# $Id$
|
|
# check_lsi_raid: Nagios/Icinga plugin to check LSI Raid Controller status
|
|
# --------------------------------------------------------------------------------------
|
|
# Created as part of a semester project at the University of Applied Sciences Hagenberg
|
|
# (http://www.fh-ooe.at/en/hagenberg-campus/)
|
|
#
|
|
# Copyright (c) 2013:
|
|
# Grubhofer Martin (s1110239013@students.fh-hagenberg.at)
|
|
# Scheipner Alexander (s1110239032@students.fh-hagenberg.at)
|
|
# Werner Sebastian (s1110239038@students.fh-hagenberg.at)
|
|
# Georg Schoenberger (gschoenberger@thomas-krenn.com)
|
|
# Jonas Meurer (jmeurer@inet.de)
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation; either version 3 of the License, or (at your option) any later
|
|
# version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
# details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program; if not, see <http://www.gnu.org/licenses/>.
|
|
# ======================================================================================
|
|
use strict;
|
|
use warnings;
|
|
use Getopt::Long qw(:config no_ignore_case);
|
|
|
|
our $VERBOSITY = 0;
|
|
our $VERSION = "1.2";
|
|
our $NAME = "check_lsi_raid: Nagios/Icinga plugin to check LSI Raid Controller status";
|
|
our $C_TEMP_WARNING = 80;
|
|
our $C_TEMP_CRITICAL = 90;
|
|
our $PD_TEMP_WARNING = 40;
|
|
our $PD_TEMP_CRITICAL = 45;
|
|
our ($ignerr_m, $ignerr_o, $ignerr_p, $ignerr_s) = (0, 0, 0, 0);
|
|
our $NOENCLOSURES = 0;
|
|
|
|
use constant {
|
|
STATE_OK => 0,
|
|
STATE_WARNING => 1,
|
|
STATE_CRITICAL => 2,
|
|
STATE_UNKNOWN => 3,
|
|
};
|
|
|
|
our @ldmap_a = ('DG/VD','TYPE','State','Access','Consist','Cache','Cac','sCC','Size');
|
|
our @pdmap_a = ('EID:Slt','DID','State','DG','Size','Intf','Med','SED','PI','SeSz','Model','Sp');
|
|
|
|
# Always return the highest state level
|
|
sub getExitState {
|
|
my $returnState = STATE_OK;
|
|
# check if no state is NULL
|
|
if (!defined($_[0]) || !defined($_[1])) {
|
|
$returnState = STATE_UNKNOWN;
|
|
}
|
|
# check previous state
|
|
if ($_[0] > $returnState) {
|
|
$returnState = $_[0];
|
|
}
|
|
# check upcoming state
|
|
if ($_[1] > $returnState) {
|
|
$returnState = $_[1];
|
|
}
|
|
return $returnState;
|
|
}
|
|
|
|
# Explains the Usage of the plugin, also which options take which values
|
|
sub displayUsage {
|
|
print "Usage: \n";
|
|
print " [ -h | --help ]\n Display this help page\n";
|
|
print " [ -v | -vv | -vvv | --verbose ]\n Sets the verbosity level.\n no -v is the normal single line output for Nagios/Icinga\n -v is a more detailed version but still usable in Nagios.\n -vv is a multiline output for debugging configuration errors or more detailed information.\n -vvv is for plugin problem diagnosis.\n For further information please visit: http://nagiosplug.sourceforge.net/developer-guidelines.html#AEN39\n";
|
|
print " [ -V --version ]\n Displays the Version of the tk-lsi-plugin and the version of StorCLI\n";
|
|
print " [ -C <Controller Number> | --controller <Controller Number> ]\n Specifies a Controller number, defaults to 0\n";
|
|
print " [ -EID | --enclosure ]\n Specifies one or more Enclosures, defaults to all\n Takes either an integer as additional argument (>=0) or a comma seperated list(0,1,2,3,...)\n";
|
|
print " [ -LD | --logicaldevice ]\n Specifies one or more Logical Devices, defaults to all\n Takes either an integer as additional argument (>=0) or a comma seperated list(0,1,2,3,...)\n";
|
|
print " [ -PD | --physicaldevice ]\n Specifies one or more Physical Devices, defaults to all\n Takes either an integer as additional argument (>=0) or a comma seperated list(0,1,2,3,...)\n";
|
|
print " [ -Tw | --temperature-warn ]\n Specifies the RAID-Controller temperature warning range, default is ${C_TEMP_WARNING}C or more\n";
|
|
print " [ -Tc | --temperature-critical ]\n Specifies the RAID-Controller temperature critical error range, default is ${C_TEMP_CRITICAL}C or more.
|
|
Requires -Tw | --temperature-warn to be set.\n";
|
|
print " [ -PDTw | --physicaldevicetemperature-warn ]\n Specifies the disk temperature warning range, default is ${PD_TEMP_WARNING}C or more\n";
|
|
print " [ -PDTc | --physicaldevicetemperature-critical ]\n Specifies the disk temperature critical error range, default is ${PD_TEMP_CRITICAL}C or more.
|
|
Requires -PDTw | --physicaldevicetemperature-warn to be set.\n";
|
|
print " [ -Im | --ignore-media-errors ]\n Specifies the warning threshold for media errors per disk, default is $ignerr_m.\n";
|
|
print " [ -Io | --ignore-other-errors ]\n Specifies the warning threshold for other errors per disk, default is $ignerr_o.\n";
|
|
print " [ -Ip | --ignore-predictive-fail-count ]\n Specifies the warhing threshold for predictive fail count per disk, default is $ignerr_p.\n";
|
|
print " [ -Is | --ignore-shield-counter ]\n Specifies the warning threshold for shield counter per disk, default is $ignerr_s.\n";
|
|
print " [ -p <path> | --path <path>]\n Specifies the path to StorCLI, default is /usr/bin/storcli or C:\\Programme\\...\\storcli.exe\n";
|
|
print " [ -b <0/1> | --BBU <0/1> ]\n Check if a BBU or a CacheVault module is present. One must be present unless '-b 0' is defined.
|
|
This ensures that for a given controller a BBU/CV must be present per default.\n";
|
|
print " [ --noenclosures <0/1> ]\n Specifies if enclosures are present or not. 0 means enclosures are
|
|
present (default), 1 states no enclosures are used (no 'eall' in storcli commands).\n"
|
|
}
|
|
|
|
# Displays a short Help text for the user
|
|
sub displayHelp {
|
|
print $NAME . " Version: " . $VERSION ."\n";
|
|
print "Copyright (C) 2013 Thomas-Krenn.AG\n";
|
|
print "Current updates available at http://git.thomas-krenn.com/check_lsi_raid.git\n";
|
|
print "This Nagios/Icinga Plugin checks LSI RAID-Controllers for Controller, \nPhysical-Device and Logical Device warnings and errors.\n";
|
|
print "In order for this plugin to work properly you need to add the \nnagios-user to your sudoers file (or create a new one in /etc/sudoers.d/)\n";
|
|
displayUsage();
|
|
print "Further information about this plugin can be found at:
|
|
http://www.thomas-krenn.com/de/wiki/LSI_RAID_Monitoring_Plugin and
|
|
http://www.thomas-krenn.com/de/wiki/LSI_RAID_Monitoring_Plugin
|
|
Please send an email to the tk-monitoring plugin-user mailing list:
|
|
tk-monitoring-plugins-user\@lists.thomas-krenn.com
|
|
if you have questions regarding use of this software, to submit patches, or
|
|
suggest improvements. The mailing list archive is available at:
|
|
http://lists.thomas-krenn.com/pipermail/tk-monitoring-plugins-user\n";
|
|
exit(STATE_OK);
|
|
}
|
|
|
|
# Prints the Name, Version of the Plugin
|
|
# Also Prints the version of StorCLI
|
|
sub displayVersion {
|
|
my $sudo = $_[0];
|
|
my $storcli = $_[1];
|
|
my @storcliVersion = `$sudo $storcli -v`;
|
|
print $NAME . "\nVersion: ". $VERSION . "\n\n";
|
|
foreach my $line (@storcliVersion){
|
|
if($line =~ /^\s*Storage/) {
|
|
$line =~ s/^\s+|\s+$//g;
|
|
print $line;
|
|
}
|
|
}
|
|
print "\n";
|
|
exit(STATE_OK);
|
|
}
|
|
|
|
sub checkCommandStatus{
|
|
my @output = @{(shift)};
|
|
foreach my $line (@output){
|
|
if($line =~ /^Status/){
|
|
if($line eq "Status = Success\n"){
|
|
return 1;
|
|
}
|
|
else{
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Returns information about:
|
|
# - Controller status and controller temperature
|
|
sub getControllerStatus {
|
|
my $sudo = $_[0];
|
|
my $storcli = $_[1];
|
|
my $controller = $_[2];
|
|
my @temperature_w = @{($_[3])};
|
|
my @temperature_c = @{($_[4])};
|
|
|
|
my $command = "$sudo $storcli /c$controller show all";
|
|
my $status = 0; # Return Status
|
|
my $statusMessage = ''; # Return String
|
|
|
|
my @output = `$command`;
|
|
|
|
if(checkCommandStatus(\@output)) {
|
|
foreach my $line (@output) {
|
|
my $first;
|
|
my $last;
|
|
my $temp;
|
|
my $crit = 0;
|
|
if($line =~ /^([a-zA-Z0-9]*)/) {
|
|
$first = $1;
|
|
if($line =~ /([a-zA-Z0-9]*)$/) {
|
|
$last = $1;
|
|
if($first eq "Controller") {
|
|
if($line =~ /\s+([a-zA-Z0-9]*)/) {
|
|
if($1 eq "Status") {
|
|
if($last ne "OK" && $last ne "Optimal") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "Ctrl. status not optimal, ";
|
|
}
|
|
}
|
|
elsif($1 eq "must") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
$statusMessage .= "Ctrl. needs reboot, ";
|
|
}
|
|
}
|
|
elsif($1 eq "has") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "Ctrl. booted in safe mode, ";
|
|
}
|
|
}
|
|
elsif($1 eq "temperature") {
|
|
$temp = $last;
|
|
if($temperature_w[0] eq "in") {
|
|
if(($temp >= $temperature_w[1]) && ($temp <= $temperature_w[2])) {
|
|
# is in warn range, so also check if in critical error range
|
|
if($temperature_c[0] eq "in") {
|
|
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
|
|
}
|
|
} else {
|
|
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
|
|
}
|
|
}
|
|
if($crit eq 0) { # only warn if not already given a critical error
|
|
$status = getExitState($status, STATE_WARNING);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. warning, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. warning (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature warning (${temp}C), "; }
|
|
}
|
|
}
|
|
} else {
|
|
if(($temp < $temperature_w[1]) || ($temp > $temperature_w[2])) {
|
|
# is in warn range, so also check if in critical error range
|
|
if($temperature_c[0] eq "in") {
|
|
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
|
|
}
|
|
} else {
|
|
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
|
|
}
|
|
}
|
|
if($crit eq 0) { # only warn if not already given a critical error
|
|
$status = getExitState($status, STATE_WARNING);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. warning, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. warning (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature warning (${temp}C), "; }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
elsif($first eq "ROC") {
|
|
if($line =~ /\s+([a-zA-Z0-9]*)/) {
|
|
if($1 eq "temperature") {
|
|
$temp = $last;
|
|
if($temperature_w[0] eq "in") {
|
|
if(($temp >= $temperature_w[1]) && ($temp <= $temperature_w[2])) {
|
|
# is in warn range, so also check if in critical error range
|
|
if($temperature_c[0] eq "in") {
|
|
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
|
|
}
|
|
} else {
|
|
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
|
|
}
|
|
}
|
|
if($crit eq 0) { # only warn if not already given a critical error
|
|
$status = getExitState($status, STATE_WARNING);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. warning, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. warning (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature warning (${temp}C), "; }
|
|
}
|
|
}
|
|
} else {
|
|
if(($temp < $temperature_w[1]) || ($temp > $temperature_w[2])) {
|
|
# is in warn range, so also check if in critical error range
|
|
if($temperature_c[0] eq "in") {
|
|
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
|
|
}
|
|
} else {
|
|
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
|
|
# critical error
|
|
$crit = 1;
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
|
|
}
|
|
}
|
|
if($crit eq 0) { # only warn if not already given a critical error
|
|
$status = getExitState($status, STATE_WARNING);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. warning, "; }
|
|
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. warning (${temp}C), "; }
|
|
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature warning (${temp}C), "; }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
elsif($first eq "Memory") {
|
|
if($line =~ /(\s+[a-zA-Z0-9]*)/) {
|
|
if($1 eq "Correctable") {
|
|
if($last ne "0") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "Memory correctable errors detected, ";
|
|
}
|
|
}
|
|
elsif($1 eq "Uncorrectable") {
|
|
if($last ne "0") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
$statusMessage .= "Memory uncorrectable errors detected, ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
elsif($first eq "Failed") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "Failed to get lock key on bootup, ";
|
|
}
|
|
}
|
|
#TODO Improve rollback detection
|
|
elsif($first eq "A") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "A rollback operation is in progress, ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ($status, $statusMessage);
|
|
} else {
|
|
print "Invalid StorCLI command! ($command)\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
}
|
|
|
|
# Returns information about:
|
|
# - Logical device status
|
|
sub getLogicalDeviceStatus {
|
|
my $storcli = $_[0];
|
|
my @logDevices = @{($_[1])};
|
|
my $action = $_[2];
|
|
|
|
my $command = $storcli;
|
|
my $status = 0;
|
|
my $statusMessage = ''; # Return String
|
|
|
|
if(scalar(@logDevices) == 0) { $command .= "/vall"; }
|
|
elsif(scalar(@logDevices) == 1) { $command .= "/v$logDevices[0]"; }
|
|
else { $command .= "/v".join(",", @logDevices); }
|
|
|
|
$command .= " show $action";
|
|
my @output = `$command`;
|
|
|
|
my @logDevs;
|
|
if(checkCommandStatus(\@output)) {
|
|
if($action eq "all") {
|
|
my $currBlock;
|
|
foreach my $line(@output){
|
|
my @splittedLine;
|
|
if($line =~ /^\/(c[0-9]*\/v[0-9]*).*/){
|
|
$currBlock = $1;
|
|
next;
|
|
}
|
|
if(defined($currBlock)){
|
|
if($line =~ /^\d+\/\d+\s+\w+\d\s+\w+.*/){
|
|
@splittedLine = map { s/^\s*//; s/\s*$//; $_; } split(/\s+/,$line);
|
|
my %lineValues_h;
|
|
# The current block is the c0/v0 name
|
|
$lineValues_h{'ld'} = $currBlock;
|
|
for(my $i = 0; $i < @ldmap_a; $i++){
|
|
$lineValues_h{$ldmap_a[$i]} = $splittedLine[$i];
|
|
}
|
|
push @logDevs, \%lineValues_h;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
use Data::Dumper;
|
|
print Dumper(@logDevs);
|
|
}
|
|
|
|
# Returns information about:
|
|
# - Physical device status
|
|
sub getPhysDeviceStatus {
|
|
my $storcli = $_[1];
|
|
my @enclosures = @{($_[3])};
|
|
my @physDevices = @{($_[4])};
|
|
my @physicalDeviceTemperature_w = @{($_[5])};
|
|
my @physicalDeviceTemperature_c = @{($_[6])};
|
|
my $action = $_[7];
|
|
|
|
my $command = $storcli;
|
|
my $status = 0;
|
|
my $statusMessage = '';
|
|
|
|
if(!$NOENCLOSURES){
|
|
if(scalar(@enclosures) == 0) { $command .= "/eall"; }
|
|
elsif(scalar(@enclosures) == 1) { $command .= "/e$enclosures[0]"; }
|
|
else { $command .= "/e".join(",", @enclosures); }
|
|
}
|
|
|
|
if(scalar(@physDevices) == 0) { $command .= "/sall"; }
|
|
elsif(scalar(@physDevices) == 1) { $command .= "/s$physDevices[0]"; }
|
|
else { $command .= "/s".join(",", @physDevices); }
|
|
|
|
$command .= " show $action";
|
|
my @output = `$command`;
|
|
|
|
my @physDevs;
|
|
if(checkCommandStatus(\@output)) {
|
|
if($action eq "all") {
|
|
my $currBlock;
|
|
my %lineValues_h;
|
|
foreach my $line(@output){
|
|
my @splittedLine;
|
|
if($line =~ /^Drive \/(c[0-9]*\/e[0-9]*\/s[0-9]*) \:$/){
|
|
$currBlock = $1;
|
|
next;
|
|
}
|
|
if(defined($currBlock)){
|
|
if($line =~ /^\d+\:\d+\s+\d+\s+\w+\s+\d+.*/){
|
|
@splittedLine = map { s/^\s*//; s/\s*$//; $_; } split(/\s+/,$line);
|
|
# The current block is the c0/e252/s0 name
|
|
$lineValues_h{'pd'} = $currBlock;
|
|
my $j = 0;
|
|
for(my $i = 0; $i < @pdmap_a; $i++){
|
|
if($pdmap_a[$i] eq 'Size'){
|
|
my $size = $splittedLine[$j];
|
|
if($splittedLine[$j+1] eq 'GB' || $splittedLine[$j+1] eq 'TB'){
|
|
$size .= ''.$splittedLine[$j+1];
|
|
$j++;
|
|
}
|
|
$lineValues_h{$pdmap_a[$i]} = $size;
|
|
$j++;
|
|
}
|
|
elsif($pdmap_a[$i] eq 'Model'){
|
|
my $model = $splittedLine[$j];
|
|
# Model should be the next last element, j starts at 0
|
|
if(($j+2) != scalar(@splittedLine)){
|
|
$model .= ' '.$splittedLine[$j+1];
|
|
$j++;
|
|
}
|
|
$lineValues_h{$pdmap_a[$i]} = $model;
|
|
$j++;
|
|
}
|
|
else{
|
|
$lineValues_h{$pdmap_a[$i]} = $splittedLine[$j];
|
|
$j++;
|
|
}
|
|
}
|
|
}
|
|
if($line =~ /^(Shield Counter|Media Error Count|Other Error Count|
|
|
BBM Error Count|Drive Temperature|Predictive Failure Count|
|
|
S.M.A.R.T alert flagged by drive)\s\=\s(.*)$/){
|
|
$lineValues_h{$1} = $2;
|
|
}
|
|
# If the last value is parsed, set up for the next device
|
|
if(exists($lineValues_h{'S.M.A.R.T alert flagged by drive'})){
|
|
push @physDevs, \%lineValues_h;
|
|
unset $currBlock;
|
|
%lineValues_h = {};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
use Data::Dumper;
|
|
print Dumper(@physDevs);
|
|
}
|
|
|
|
# Returns information about:
|
|
# - Battery Backup Unit status
|
|
# - Temperature, Battery status, voltage
|
|
sub getBBUStatus {
|
|
my $sudo = $_[0];
|
|
my $storcli = $_[1];
|
|
my $controller = $_[2];
|
|
|
|
my $command = "$sudo $storcli /c$controller/bbu show status";
|
|
my $status = 0;
|
|
my $statusMessage = '';
|
|
|
|
my @output = `$command`;
|
|
|
|
if(checkCommandStatus(\@output)) {
|
|
my $blockid = 0;
|
|
foreach my $line (@output) {
|
|
my $first;
|
|
my $last;
|
|
if($line =~ /^([a-zA-Z0-9]*)/) {
|
|
$first = $1;
|
|
if($first eq 'BBU_Info' || $first eq 'BBU_Firmware_Status' || $first eq 'GasGaugeStatus') {
|
|
$blockid++;
|
|
}
|
|
if($line =~ /([a-zA-Z0-9]*)$/) {
|
|
$last = $1;
|
|
# Check BBU_Info block
|
|
if ($blockid eq 1) {
|
|
if($first eq "Battery") {
|
|
if($line =~ /\s+([a-zA-Z0-9]*)/) {
|
|
if($1 eq "State") {
|
|
if($last ne "Optimal") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "BBU state not optimal, ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
# Check BBU_Firmware_Status
|
|
if ($blockid eq 2) {
|
|
if($first eq "Temperature") {
|
|
if($last ne "OK") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "BBU temp. critical, "; }
|
|
if ($VERBOSITY >= 1) {$statusMessage .= "BBU Temperature critical, "; }
|
|
}
|
|
}
|
|
elsif($first eq "Battery") {
|
|
if($line =~ /\s+([a-zA-Z0-9]*)/) {
|
|
if($1 eq "State") {
|
|
if($last ne "Optimal") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "BBU state not optimal, ";
|
|
}
|
|
}
|
|
elsif($1 eq "Pack") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
$statusMessage .= "BBU pack missing, ";
|
|
}
|
|
}
|
|
elsif($1 eq "Replacement") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "BBU replacement required, ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
elsif($first eq "Voltage") {
|
|
if($last ne "OK") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
$statusMessage .= "BBU voltage not ok, ";
|
|
}
|
|
}
|
|
elsif($first eq "Learn") {
|
|
if($last ne "OK") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "BBU learn cycle status not ok, ";
|
|
}
|
|
}
|
|
elsif($first eq "I2C") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
$statusMessage .= "BBU I2C errors, ";
|
|
}
|
|
}
|
|
elsif($first eq "Remaining") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "BBU capacity low, "; }
|
|
if ($VERBOSITY >= 1) {$statusMessage .= "BBU remaining capacity is low, "; }
|
|
}
|
|
}
|
|
}
|
|
# Check GasGaugeStatus
|
|
if ($blockid eq 3) {
|
|
if($first eq "Over") {
|
|
if($line =~ /\s+([a-zA-Z0-9]*)/) {
|
|
if($1 eq "Temperature") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
if ($VERBOSITY == 0) {$statusMessage .= "BBU temp. critical, "; }
|
|
if ($VERBOSITY >= 1) {$statusMessage .= "BBU Temperature critical, "; }
|
|
}
|
|
} elsif($1 eq "Charged") {
|
|
if($last ne "No") {
|
|
$status = getExitState($status, STATE_CRITICAL);
|
|
$statusMessage .= "BBU over charged, ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ($status, $statusMessage);
|
|
} else {
|
|
print "Invalid StorCLI command! ($command)\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
}
|
|
|
|
# Returns information about:
|
|
# - Cache Vault module status
|
|
# - If CacheVault must be replaced
|
|
sub getCVStatus {
|
|
my $sudo = $_[0];
|
|
my $storcli = $_[1];
|
|
my $controller = $_[2];
|
|
my $command = "$sudo $storcli /c$controller/cv show status";
|
|
my $status = 0;
|
|
my $statusMessage = '';
|
|
|
|
my @output = `$command`;
|
|
if(checkCommandStatus(\@output)) {
|
|
my $currBlock;
|
|
foreach my $line (@output) {
|
|
if($line =~ /^(Cachevault_Info|Firmware_Status)/){
|
|
$currBlock = $1;
|
|
next;
|
|
}
|
|
if(defined($currBlock)){
|
|
$line =~ s/^\s+|\s+$//g;#trim line
|
|
if($currBlock eq 'Cachevault_Info' && $line =~ /^State/){
|
|
my @vals = split('\s{2,}',$line);
|
|
if($vals[1] ne "Optimal") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
if($VERBOSITY == 0){$statusMessage .= "CacheVault state not optimal, ";}
|
|
if($VERBOSITY >= 1){$statusMessage .= "Cachevault Info state $1, "; }
|
|
}
|
|
}
|
|
elsif($currBlock eq 'Firmware_Status' && $line =~ /^Replacement required/){
|
|
$line =~ /([a-zA-Z0-9]*)$/;
|
|
if($1 ne "No") {
|
|
$status = getExitState($status, STATE_WARNING);
|
|
$statusMessage .= "CacheVault replacement required, ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ($status, $statusMessage);
|
|
} else {
|
|
print "Invalid StorCLI command! ($command)\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
}
|
|
|
|
# Checks if wheter BBU or CV is present
|
|
# - One of the two show commands must return 'Success'
|
|
sub checkBBUorCVIsPresent{
|
|
my $sudo = $_[0];
|
|
my $storcli = $_[1];
|
|
my $controller = $_[2];
|
|
my $status = 0;
|
|
my $statusMessage = '';
|
|
|
|
my ($bbu,$cv);
|
|
my $command = "$sudo $storcli /c$controller/bbu show";
|
|
my @output = `$command`;
|
|
if(checkCommandStatus(\@output)) {
|
|
$bbu = 1;
|
|
}
|
|
else{$bbu = 0};
|
|
$command = "$sudo $storcli /c$controller/cv show";
|
|
@output = `$command`;
|
|
if(checkCommandStatus(\@output)) {
|
|
$cv = 1;
|
|
}
|
|
else{$cv = 0};
|
|
return ($bbu, $cv);
|
|
}
|
|
|
|
# Nagios development guidelines: temperature threshold sheme
|
|
# http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
|
|
# Returns a temperature range (array) in or out which a temperature should be
|
|
# Array content: ("in" or "out", range from, range to)
|
|
# Example ranges:
|
|
# Generate an alert if x...
|
|
# -Tw 10 < 0 or > 10, (outside the range of {0 .. 10})
|
|
# -Tw 10: < 10, (outside {10 .. inf})
|
|
# -Tw ~:10 > 10, (outside the range of {-inf .. 10})
|
|
# -Tw 10:20 < 10 or > 20, (outside the range of {10 .. 20})
|
|
# -Tw @10:20 >= 10 and <= 20, (inside the range of {10 .. 20})
|
|
sub getThresholds {
|
|
my @thresholds = @{($_[0])};
|
|
my $default = $_[1];
|
|
|
|
if(scalar(@thresholds) eq 0) {
|
|
return @thresholds = ("out", -273, $default);
|
|
}
|
|
if(substr($thresholds[0], 0, 1) eq "@") {
|
|
if($thresholds[0] =~ /^\@([0-9]*)\:([0-9]*)$/) {
|
|
@thresholds = ("in", $1, $2);
|
|
} else {
|
|
print "Invalid temperature parameter!\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
} elsif(substr($thresholds[0], 0, 1) eq "~") {
|
|
if($thresholds[0] =~ /^\~\:([0-9]*)$/) {
|
|
@thresholds = ("out", -273, $1);
|
|
} else {
|
|
print "Invalid temperature parameter!\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
} elsif(index($thresholds[0], ":") ne -1) {
|
|
if($thresholds[0] =~ /^([0-9]*)\:([0-9]{1,3})$/) {
|
|
@thresholds = ("out", $1, $2);
|
|
} elsif($thresholds[0] =~ /^([0-9]*)\:$/) {
|
|
@thresholds = ("in", -273, ($1 - 1));
|
|
} else {
|
|
print "Invalid temperature parameter!\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
} else {
|
|
@thresholds = ("out", 0, $thresholds[0]);
|
|
}
|
|
if(($thresholds[1] =~ /^(-?[0-9]*)$/) && ($thresholds[2] =~ /^(-?[0-9]*)$/)) {
|
|
return @thresholds;
|
|
} else {
|
|
print "Invalid temperature parameter!\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
}
|
|
|
|
MAIN: {
|
|
my ($storcli, $sudo, $noSudo);
|
|
my $controller = 0;
|
|
my @enclosures;
|
|
my @logDevices;
|
|
my @physDevices;
|
|
my @temperature_w;
|
|
my @temperature_c;
|
|
my @physicalDeviceTemperature_w;
|
|
my @physicalDeviceTemperature_c;
|
|
my $bbu = 1;
|
|
my $platform = $^O;
|
|
my $statusMessage = '';
|
|
|
|
if ( !(GetOptions(
|
|
'h|help' => sub {displayHelp();},
|
|
'v|verbose' => sub {$VERBOSITY = 1 },
|
|
'vv' => sub {$VERBOSITY = 2},
|
|
'vvv' => sub {$VERBOSITY = 3},
|
|
'V|version' => sub {displayVersion($sudo, $storcli);},
|
|
'C|controller=i' => \$controller,
|
|
'EID|enclosure=s' => \@enclosures,
|
|
'LD|logicaldevice=s' => \@logDevices,
|
|
'PD|physicaldevice=s' => \@physDevices,
|
|
'Tw|temperature-warn=s' => \@temperature_w,
|
|
'Tc|temperature-critical=s' => \@temperature_c,
|
|
'PDTw|physicaldevicetemperature-warn=s' => \@physicalDeviceTemperature_w,
|
|
'PDTc|physicaldevicetemperature-critical=s' => \@physicalDeviceTemperature_c,
|
|
'Im|ignore-media-errors=i' => \$ignerr_m,
|
|
'Io|ignore-other-errors=i' => \$ignerr_o,
|
|
'Ip|ignore-predictive-fail-count=i' => \$ignerr_p,
|
|
'Is|ignore-shield-counter=i' => \$ignerr_s,
|
|
'p|path=s' => \$storcli,
|
|
'b|BBU=i' => \$bbu,
|
|
'noenclosures=i' => \$NOENCLOSURES,
|
|
'nosudo' => \$noSudo,
|
|
))) {
|
|
print $NAME . " Version: " . $VERSION ."\n";
|
|
displayUsage();
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
# Check smartclt tool
|
|
if(!defined($storcli)){
|
|
eval('use File::Which');
|
|
if ($platform eq 'linux'){
|
|
$storcli = which('storcli');
|
|
if(!defined($storcli)){
|
|
$storcli = which('storcli64');
|
|
}
|
|
}
|
|
else{
|
|
$storcli = which('storcli.exe');
|
|
if(!defined($storcli)){
|
|
$storcli = which('storcli64.exe');
|
|
}
|
|
}
|
|
}
|
|
if(! -x $storcli){
|
|
print "Error: cannot find storcli executable.\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
if ($platform eq 'linux') {
|
|
if(!defined($noSudo)){
|
|
my $sudo;
|
|
chomp($sudo = `which sudo`);
|
|
if(! -x $sudo){
|
|
print "Error: cannot find sudo executable.\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
$storcli = $sudo.' '.$storcli;
|
|
}
|
|
}
|
|
# Prepare storcli command
|
|
$storcli .= " /c$controller";
|
|
|
|
|
|
# Input validation
|
|
#FIXME Replace with storcli show ctrlcount
|
|
# my @controllerVersion = `$storcli /c$controller show all`;
|
|
# if($controllerVersion[2] eq "Description = Controller $controller not found\n") {
|
|
# print "Invalid controller number, device not found!\n";
|
|
# exit(STATE_UNKNOWN);
|
|
# }
|
|
if(($bbu != 1) && ($bbu != 0)) {
|
|
print "Invalid BBU/CV parameter, must be 0 or 1!\n";
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
|
|
@enclosures = split(/,/,join(',', @enclosures));
|
|
@logDevices = split(/,/,join(',', @logDevices));
|
|
@physDevices = split(/,/,join(',', @physDevices));
|
|
# check given thresholds
|
|
if(@temperature_c && !@temperature_w){
|
|
print "Please also specify warning threshold!\n";
|
|
displayUsage();
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
@temperature_w = getThresholds(\@temperature_w, $C_TEMP_WARNING);
|
|
@temperature_c = getThresholds(\@temperature_c, $C_TEMP_CRITICAL);
|
|
if(@physicalDeviceTemperature_c && !@physicalDeviceTemperature_w){
|
|
print "Please also specify PD warning threshold!\n";
|
|
displayUsage();
|
|
exit(STATE_UNKNOWN);
|
|
}
|
|
@physicalDeviceTemperature_w = getThresholds(\@physicalDeviceTemperature_w, $PD_TEMP_WARNING);
|
|
@physicalDeviceTemperature_c = getThresholds(\@physicalDeviceTemperature_c, $PD_TEMP_CRITICAL);
|
|
|
|
# Set exit status
|
|
my $exitstatus = 0;
|
|
my $newexitstatus = 0;
|
|
my $newstatusMessage = '';
|
|
# ($newexitstatus, $statusMessage) = getControllerStatus($sudo, $storcli, $controller, \@temperature_w, \@temperature_c);
|
|
# $newstatusMessage .= $statusMessage;
|
|
# $exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
# my ($bbuPresent,$cvPresent) = (0,0);
|
|
# if($bbu == 1){
|
|
# ($bbuPresent,$cvPresent) = checkBBUorCVIsPresent($sudo, $storcli, $controller);
|
|
# if($bbuPresent == 0 && $cvPresent == 0){
|
|
# $exitstatus = getExitState(STATE_CRITICAL, $exitstatus);
|
|
# $newstatusMessage .= "No BBU or CV found, ";
|
|
# }
|
|
# }
|
|
# if($bbuPresent == 1){
|
|
# ($newexitstatus, $statusMessage) = getBBUStatus($sudo, $storcli, $controller);
|
|
# $newstatusMessage .= $statusMessage;
|
|
# $exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
# }
|
|
# if($cvPresent == 1){
|
|
# ($newexitstatus, $statusMessage) = getCVStatus($sudo, $storcli, $controller);
|
|
# $newstatusMessage .= $statusMessage;
|
|
# $exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
# }
|
|
# ($newexitstatus, $statusMessage) = getLogicalDeviceStatus($sudo, $storcli, $controller, \@logDevices, "init");
|
|
# $newstatusMessage .= $statusMessage;
|
|
$exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
($newexitstatus, $statusMessage) = getLogicalDeviceStatus($storcli, \@logDevices, "all");
|
|
$newstatusMessage .= $statusMessage;
|
|
# $exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
# ($newexitstatus, $statusMessage) = getPhysDeviceStatus($sudo, $storcli, $controller, \@enclosures, \@physDevices, \@physicalDeviceTemperature_w, \@physicalDeviceTemperature_c, "initialization");
|
|
# $newstatusMessage .= $statusMessage;
|
|
# $exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
# ($newexitstatus, $statusMessage) = getPhysDeviceStatus($sudo, $storcli, $controller, \@enclosures, \@physDevices, \@physicalDeviceTemperature_w, \@physicalDeviceTemperature_c, "rebuild");
|
|
# $newstatusMessage .= $statusMessage;
|
|
# $exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
($newexitstatus, $statusMessage) = getPhysDeviceStatus($sudo, $storcli, $controller, \@enclosures, \@physDevices, \@physicalDeviceTemperature_w, \@physicalDeviceTemperature_c, "all");
|
|
$newstatusMessage .= $statusMessage;
|
|
$exitstatus = getExitState($newexitstatus, $exitstatus);
|
|
if($exitstatus == 0) { print "LSIRAID OK (Ctrl #$controller) | STATUS=$exitstatus\n"; }
|
|
elsif($exitstatus == 1) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID WARNING (Ctrl #$controller): [$newstatusMessage] | STATUS=$exitstatus\n"; }
|
|
elsif($exitstatus == 2) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID CRITICAL (Ctrl #$controller): [$newstatusMessage] | STATUS=$exitstatus\n"; }
|
|
exit($exitstatus);
|
|
}
|