Fix critical status for non optimal logical volumes

* Minor fix: uninitialized variables for bbu=0
* Typo fix: "other error count" instead of "media error count"
* Output STATUS variable for nagios performance data processing
* Add commandline options to define warning threshold for media errors, other
  errors, predictive fail count and shield count
* Set state to critical for non-optimal virtual/logical disks

Signed-off-by: Georg Schönberger <gschoenberger@thomas-krenn.com>
This commit is contained in:
Jonas Meurer
2013-11-28 10:12:33 +01:00
committed by Georg Schönberger
parent 941826b0c5
commit bc8df13fb8
2 changed files with 38 additions and 20 deletions

View File

@@ -3,6 +3,14 @@ Changelog for check_lsi_raid, a Nagios/Icinga plugin to check LSI RAID
controllers
###############################################################################
Version 1.1 20131119 (Jonas Meurer <jmeurer@inet.de>)
* Minor fix: uninitialized variables for bbu=0
* Typo fix: "other error count" instead of "media error count"
* Output STATUS variable for nagios performance data processing
* Add commandline options to define warning threshold for media errors, other
errors, predictive fail count and shield count
* Set state to critical for non-optimal virtual/logical disks
Version 1.0 20131028
* First stable release. Improved testing with reading output from simple text
files.

View File

@@ -36,6 +36,7 @@ our $C_TEMP_WARNING = 80;
our $C_TEMP_CRITICAL = 90;
our $PD_TEMP_WARNING = 40;
our $PD_TEMP_CRITICAL = 45;
our ($ignerr_m, $ignerr_o, $ignerr_p, $ignerr_s) = (0, 0, 0, 0);
use constant {
STATE_OK => 0,
@@ -78,6 +79,10 @@ sub displayUsage {
print " [ -PDTw | --physicaldevicetemperature-warn ]\n Specifies the disk temperature warning range, default is ${PD_TEMP_WARNING}C or more\n";
print " [ -PDTc | --physicaldevicetemperature-critical ]\n Specifies the disk temperature critical error range, default is ${PD_TEMP_CRITICAL}C or more.
Requires -PDTw | --physicaldevicetemperature-warn to be set.\n";
print " [ -Im | --ignore-media-errors ]\n Specifies the warning threshold for media errors per disk, default is $ignerr_m.\n";
print " [ -Io | --ignore-other-errors ]\n Specifies the warning threshold for other errors per disk, default is $ignerr_o.\n";
print " [ -Ip | --ignore-predictive-fail-count ]\n Specifies the warhing threshold for predictive fail count per disk, default is $ignerr_p.\n";
print " [ -Is | --ignore-shield-counter ]\n Specifies the warning threshold for shield counter per disk, default is $ignerr_s.\n";
print " [ -p <path> | --path <path>]\n Specifies the path to StorCLI, default is /usr/bin/storcli or C:\\Programme\\...\\storcli.exe\n";
print " [ -b <0/1> | BBU <0/1> ]\n Check if a BBU or a CacheVault module is present. One must be present unless '-b 0' is defined.
This ensures that for a given controller a BBU/CV must be present per default.\n";
@@ -370,6 +375,7 @@ sub getLogicalDeviceStatus {
my @values = split(' ',$line);
if($values[2] ne "Optl") {
if($values[1] ne 'Cac1'){
$status = getExitState($status, STATE_CRITICAL);
#check for consistency
if($values[4] eq 'Yes'){
if ($VERBOSITY == 0) {$statusMessage .= "Virtual disk state not optimal, "; }
@@ -488,27 +494,27 @@ sub getPhysDeviceStatus {
} elsif($values[0] =~ /^[a-zA-Z\.]*/) {
# Check the drive state in block Detailed information
if($values[0] eq "Shield") {
if($values[3] ne "0") {
if($values[3] > $ignerr_s) {
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "Shield counter (phys. drive) not 0, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: shield counter not 0, "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: shield counter not 0, "; }
if ($VERBOSITY == 0) {$statusMessage .= "Shield counter (phys. drive) $values[3] (>$ignerr_s), "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: shield counter $values[3] (>$ignerr_s), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: shield counter $values[3] (>$ignerr_s), "; }
}
}
elsif($values[0] eq "Media") {
if($values[4] ne "0") {
if($values[4] > $ignerr_m) {
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "Media error count (phys. drive) not 0, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: media error count not 0, "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: media error count not 0, "; }
if ($VERBOSITY == 0) {$statusMessage .= "Media error count (phys. drive) $values[4] (>$ignerr_m), "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: media error count $values[4] (>$ignerr_m), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: media error count $values[4] (>$ignerr_m), "; }
}
}
elsif($values[0] eq "Other") {
if($values[4] ne "0") {
if($values[4] > $ignerr_o) {
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "Other error count (phys. drive) not 0, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: media error count not 0, "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: media error count not 0, "; }
if ($VERBOSITY == 0) {$statusMessage .= "Other error count (phys. drive) $values[4] (>$ignerr_o), "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: other error count $values[4] (>$ignerr_o), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: other error count $values[4] (>$ignerr_o), "; }
}
}
elsif($values[0] eq "Drive") {
@@ -580,11 +586,11 @@ sub getPhysDeviceStatus {
}
}
elsif($values[0] eq "Predictive") {
if($values[4] ne "0") {
if($values[4] > $ignerr_p) {
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "Phys. drive Predictive Fail Count not 0, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: Predictive Fail Count not 0, "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: Predictive Fail Count not 0, "; }
if ($VERBOSITY == 0) {$statusMessage .= "Phys. drive Predictive Fail Count $values[4] (>$ignerr_p), "; }
if ($VERBOSITY == 1) {$statusMessage .= "Physical drive $output_dev: Predictive Fail Count $values[4] (>$ignerr_p), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Physical drive $output_dev in enclosure $output_enc: Predictive Fail Count $values[4] (>$ignerr_p), "; }
}
}
elsif($values[0] eq "S.M.A.R.T") {
@@ -907,6 +913,10 @@ MAIN: {
'Tc|temperature-critical=s' => \@temperature_c,
'PDTw|physicaldevicetemperature-warn=s' => \@physicalDeviceTemperature_w,
'PDTc|physicaldevicetemperature-critical=s' => \@physicalDeviceTemperature_c,
'Im|ignore-media-errors=i' => \$ignerr_m,
'Io|ignore-other-errors=i' => \$ignerr_o,
'Ip|ignore-predictive-fail-count=i' => \$ignerr_p,
'Is|ignore-shield-counter=i' => \$ignerr_s,
'p|path=s' => \$storcli,
'b|BBU=i' => \$bbu
))) {
@@ -985,7 +995,7 @@ MAIN: {
($newexitstatus, $statusMessage) = getControllerStatus($sudo, $storcli, $controller, \@temperature_w, \@temperature_c);
$newstatusMessage .= $statusMessage;
$exitstatus = getExitState($newexitstatus, $exitstatus);
my ($bbuPresent,$cvPresent);
my ($bbuPresent,$cvPresent) = (0,0);
if($bbu == 1){
($bbuPresent,$cvPresent) = checkBBUorCVIsPresent($sudo, $storcli, $controller);
if($bbuPresent == 0 && $cvPresent == 0){
@@ -1018,8 +1028,8 @@ MAIN: {
($newexitstatus, $statusMessage) = getPhysDeviceStatus($sudo, $storcli, $controller, \@enclosures, \@physDevices, \@physicalDeviceTemperature_w, \@physicalDeviceTemperature_c, "all");
$newstatusMessage .= $statusMessage;
$exitstatus = getExitState($newexitstatus, $exitstatus);
if($exitstatus == 0) { print "LSIRAID OK (Ctrl #$controller)\n"; }
elsif($exitstatus == 1) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID WARNING (Ctrl #$controller): [$newstatusMessage]\n"; }
elsif($exitstatus == 2) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID CRITICAL (Ctrl #$controller): [$newstatusMessage]\n"; }
if($exitstatus == 0) { print "LSIRAID OK (Ctrl #$controller) | STATUS=$exitstatus\n"; }
elsif($exitstatus == 1) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID WARNING (Ctrl #$controller): [$newstatusMessage] | STATUS=$exitstatus\n"; }
elsif($exitstatus == 2) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID CRITICAL (Ctrl #$controller): [$newstatusMessage] | STATUS=$exitstatus\n"; }
exit($exitstatus);
}