Files
check_lsi_raid/check_lsi_raid
2014-09-22 17:18:30 +02:00

897 lines
33 KiB
Perl
Executable File

#!/usr/bin/perl -w
# ======================================================================================
# $Id$
# check_lsi_raid: Nagios/Icinga plugin to check LSI Raid Controller status
# --------------------------------------------------------------------------------------
# Created as part of a semester project at the University of Applied Sciences Hagenberg
# (http://www.fh-ooe.at/en/hagenberg-campus/)
#
# Copyright (c) 2013:
# Grubhofer Martin (s1110239013@students.fh-hagenberg.at)
# Scheipner Alexander (s1110239032@students.fh-hagenberg.at)
# Werner Sebastian (s1110239038@students.fh-hagenberg.at)
# Georg Schoenberger (gschoenberger@thomas-krenn.com)
# Jonas Meurer (jmeurer@inet.de)
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, see <http://www.gnu.org/licenses/>.
# ======================================================================================
use strict;
use warnings;
use Getopt::Long qw(:config no_ignore_case);
our $VERBOSITY = 0;
our $VERSION = "1.2";
our $NAME = "check_lsi_raid: Nagios/Icinga plugin to check LSI Raid Controller status";
our $C_TEMP_WARNING = 80;
our $C_TEMP_CRITICAL = 90;
our $PD_TEMP_WARNING = 40;
our $PD_TEMP_CRITICAL = 45;
our ($ignerr_m, $ignerr_o, $ignerr_p, $ignerr_s) = (0, 0, 0, 0);
our $NOENCLOSURES = 0;
use constant {
STATE_OK => 0,
STATE_WARNING => 1,
STATE_CRITICAL => 2,
STATE_UNKNOWN => 3,
};
our @ldmap_a = ('DG/VD','TYPE','State','Access','Consist','Cache','Cac','sCC','Size');
our @pdmap_a = ('EID:Slt','DID','State','DG','Size','Intf','Med','SED','PI','SeSz','Model','Sp');
# Always return the highest state level
sub getExitState {
my $returnState = STATE_OK;
# check if no state is NULL
if (!defined($_[0]) || !defined($_[1])) {
$returnState = STATE_UNKNOWN;
}
# check previous state
if ($_[0] > $returnState) {
$returnState = $_[0];
}
# check upcoming state
if ($_[1] > $returnState) {
$returnState = $_[1];
}
return $returnState;
}
# Explains the Usage of the plugin, also which options take which values
sub displayUsage {
print "Usage: \n";
print " [ -h | --help ]\n Display this help page\n";
print " [ -v | -vv | -vvv | --verbose ]\n Sets the verbosity level.\n no -v is the normal single line output for Nagios/Icinga\n -v is a more detailed version but still usable in Nagios.\n -vv is a multiline output for debugging configuration errors or more detailed information.\n -vvv is for plugin problem diagnosis.\n For further information please visit: http://nagiosplug.sourceforge.net/developer-guidelines.html#AEN39\n";
print " [ -V --version ]\n Displays the Version of the tk-lsi-plugin and the version of StorCLI\n";
print " [ -C <Controller Number> | --controller <Controller Number> ]\n Specifies a Controller number, defaults to 0\n";
print " [ -EID | --enclosure ]\n Specifies one or more Enclosures, defaults to all\n Takes either an integer as additional argument (>=0) or a comma seperated list(0,1,2,3,...)\n";
print " [ -LD | --logicaldevice ]\n Specifies one or more Logical Devices, defaults to all\n Takes either an integer as additional argument (>=0) or a comma seperated list(0,1,2,3,...)\n";
print " [ -PD | --physicaldevice ]\n Specifies one or more Physical Devices, defaults to all\n Takes either an integer as additional argument (>=0) or a comma seperated list(0,1,2,3,...)\n";
print " [ -Tw | --temperature-warn ]\n Specifies the RAID-Controller temperature warning range, default is ${C_TEMP_WARNING}C or more\n";
print " [ -Tc | --temperature-critical ]\n Specifies the RAID-Controller temperature critical error range, default is ${C_TEMP_CRITICAL}C or more.
Requires -Tw | --temperature-warn to be set.\n";
print " [ -PDTw | --physicaldevicetemperature-warn ]\n Specifies the disk temperature warning range, default is ${PD_TEMP_WARNING}C or more\n";
print " [ -PDTc | --physicaldevicetemperature-critical ]\n Specifies the disk temperature critical error range, default is ${PD_TEMP_CRITICAL}C or more.
Requires -PDTw | --physicaldevicetemperature-warn to be set.\n";
print " [ -Im | --ignore-media-errors ]\n Specifies the warning threshold for media errors per disk, default is $ignerr_m.\n";
print " [ -Io | --ignore-other-errors ]\n Specifies the warning threshold for other errors per disk, default is $ignerr_o.\n";
print " [ -Ip | --ignore-predictive-fail-count ]\n Specifies the warhing threshold for predictive fail count per disk, default is $ignerr_p.\n";
print " [ -Is | --ignore-shield-counter ]\n Specifies the warning threshold for shield counter per disk, default is $ignerr_s.\n";
print " [ -p <path> | --path <path>]\n Specifies the path to StorCLI, default is /usr/bin/storcli or C:\\Programme\\...\\storcli.exe\n";
print " [ -b <0/1> | --BBU <0/1> ]\n Check if a BBU or a CacheVault module is present. One must be present unless '-b 0' is defined.
This ensures that for a given controller a BBU/CV must be present per default.\n";
print " [ --noenclosures <0/1> ]\n Specifies if enclosures are present or not. 0 means enclosures are
present (default), 1 states no enclosures are used (no 'eall' in storcli commands).\n"
}
# Displays a short Help text for the user
sub displayHelp {
print $NAME . " Version: " . $VERSION ."\n";
print "Copyright (C) 2013 Thomas-Krenn.AG\n";
print "Current updates available at http://git.thomas-krenn.com/check_lsi_raid.git\n";
print "This Nagios/Icinga Plugin checks LSI RAID-Controllers for Controller, \nPhysical-Device and Logical Device warnings and errors.\n";
print "In order for this plugin to work properly you need to add the \nnagios-user to your sudoers file (or create a new one in /etc/sudoers.d/)\n";
displayUsage();
print "Further information about this plugin can be found at:
http://www.thomas-krenn.com/de/wiki/LSI_RAID_Monitoring_Plugin and
http://www.thomas-krenn.com/de/wiki/LSI_RAID_Monitoring_Plugin
Please send an email to the tk-monitoring plugin-user mailing list:
tk-monitoring-plugins-user\@lists.thomas-krenn.com
if you have questions regarding use of this software, to submit patches, or
suggest improvements. The mailing list archive is available at:
http://lists.thomas-krenn.com/pipermail/tk-monitoring-plugins-user\n";
exit(STATE_OK);
}
# Prints the Name, Version of the Plugin
# Also Prints the version of StorCLI
sub displayVersion {
my $sudo = $_[0];
my $storcli = $_[1];
my @storcliVersion = `$sudo $storcli -v`;
print $NAME . "\nVersion: ". $VERSION . "\n\n";
foreach my $line (@storcliVersion){
if($line =~ /^\s*Storage/) {
$line =~ s/^\s+|\s+$//g;
print $line;
}
}
print "\n";
exit(STATE_OK);
}
sub checkCommandStatus{
my @output = @{(shift)};
foreach my $line (@output){
if($line =~ /^Status/){
if($line eq "Status = Success\n"){
return 1;
}
else{
return 0;
}
}
}
}
# Returns information about:
# - Controller status and controller temperature
sub getControllerStatus {
my $sudo = $_[0];
my $storcli = $_[1];
my $controller = $_[2];
my @temperature_w = @{($_[3])};
my @temperature_c = @{($_[4])};
my $command = "$sudo $storcli /c$controller show all";
my $status = 0; # Return Status
my $statusMessage = ''; # Return String
my @output = `$command`;
if(checkCommandStatus(\@output)) {
foreach my $line (@output) {
my $first;
my $last;
my $temp;
my $crit = 0;
if($line =~ /^([a-zA-Z0-9]*)/) {
$first = $1;
if($line =~ /([a-zA-Z0-9]*)$/) {
$last = $1;
if($first eq "Controller") {
if($line =~ /\s+([a-zA-Z0-9]*)/) {
if($1 eq "Status") {
if($last ne "OK" && $last ne "Optimal") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "Ctrl. status not optimal, ";
}
}
elsif($1 eq "must") {
if($last ne "No") {
$status = getExitState($status, STATE_CRITICAL);
$statusMessage .= "Ctrl. needs reboot, ";
}
}
elsif($1 eq "has") {
if($last ne "No") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "Ctrl. booted in safe mode, ";
}
}
elsif($1 eq "temperature") {
$temp = $last;
if($temperature_w[0] eq "in") {
if(($temp >= $temperature_w[1]) && ($temp <= $temperature_w[2])) {
# is in warn range, so also check if in critical error range
if($temperature_c[0] eq "in") {
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
}
} else {
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
}
}
if($crit eq 0) { # only warn if not already given a critical error
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. warning, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. warning (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature warning (${temp}C), "; }
}
}
} else {
if(($temp < $temperature_w[1]) || ($temp > $temperature_w[2])) {
# is in warn range, so also check if in critical error range
if($temperature_c[0] eq "in") {
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
}
} else {
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature is critical (${temp}C), "; }
}
}
if($crit eq 0) { # only warn if not already given a critical error
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "Ctrl. temp. warning, "; }
if ($VERBOSITY == 1) {$statusMessage .= "Ctrl. temp. warning (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "Controller temperature warning (${temp}C), "; }
}
}
}
}
}
}
elsif($first eq "ROC") {
if($line =~ /\s+([a-zA-Z0-9]*)/) {
if($1 eq "temperature") {
$temp = $last;
if($temperature_w[0] eq "in") {
if(($temp >= $temperature_w[1]) && ($temp <= $temperature_w[2])) {
# is in warn range, so also check if in critical error range
if($temperature_c[0] eq "in") {
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
}
} else {
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
}
}
if($crit eq 0) { # only warn if not already given a critical error
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. warning, "; }
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. warning (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature warning (${temp}C), "; }
}
}
} else {
if(($temp < $temperature_w[1]) || ($temp > $temperature_w[2])) {
# is in warn range, so also check if in critical error range
if($temperature_c[0] eq "in") {
if(($temp >= $temperature_c[1]) && ($temp <= $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
}
} else {
if(($temp < $temperature_c[1]) || ($temp > $temperature_c[2])) {
# critical error
$crit = 1;
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. critical, "; }
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. is critical (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature is critical (${temp}C), "; }
}
}
if($crit eq 0) { # only warn if not already given a critical error
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "ROC temp. warning, "; }
if ($VERBOSITY == 1) {$statusMessage .= "ROC temp. warning (${temp}C), "; }
if ($VERBOSITY >= 2) {$statusMessage .= "ROC temperature warning (${temp}C), "; }
}
}
}
}
}
}
elsif($first eq "Memory") {
if($line =~ /(\s+[a-zA-Z0-9]*)/) {
if($1 eq "Correctable") {
if($last ne "0") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "Memory correctable errors detected, ";
}
}
elsif($1 eq "Uncorrectable") {
if($last ne "0") {
$status = getExitState($status, STATE_CRITICAL);
$statusMessage .= "Memory uncorrectable errors detected, ";
}
}
}
}
elsif($first eq "Failed") {
if($last ne "No") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "Failed to get lock key on bootup, ";
}
}
#TODO Improve rollback detection
elsif($first eq "A") {
if($last ne "No") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "A rollback operation is in progress, ";
}
}
}
}
}
return ($status, $statusMessage);
} else {
print "Invalid StorCLI command! ($command)\n";
exit(STATE_UNKNOWN);
}
}
# Returns information about:
# - Logical device status
sub getLogicalDeviceStatus {
my $storcli = $_[0];
my @logDevices = @{($_[1])};
my $action = $_[2];
my $command = $storcli;
my $status = 0;
my $statusMessage = ''; # Return String
if(scalar(@logDevices) == 0) { $command .= "/vall"; }
elsif(scalar(@logDevices) == 1) { $command .= "/v$logDevices[0]"; }
else { $command .= "/v".join(",", @logDevices); }
$command .= " show $action";
my @output = `$command`;
my @logDevs;
if(checkCommandStatus(\@output)) {
if($action eq "all") {
my $currBlock;
foreach my $line(@output){
my @splittedLine;
if($line =~ /^\/(c[0-9]*\/v[0-9]*).*/){
$currBlock = $1;
next;
}
if(defined($currBlock)){
if($line =~ /^\d+\/\d+\s+\w+\d\s+\w+.*/){
@splittedLine = map { s/^\s*//; s/\s*$//; $_; } split(/\s+/,$line);
my %lineValues_h;
# The current block is the c0/v0 name
$lineValues_h{'ld'} = $currBlock;
for(my $i = 0; $i < @ldmap_a; $i++){
$lineValues_h{$ldmap_a[$i]} = $splittedLine[$i];
}
push @logDevs, \%lineValues_h;
}
}
}
}
}
use Data::Dumper;
print Dumper(@logDevs);
}
# Returns information about:
# - Physical device status
sub getPhysDeviceStatus {
my $storcli = $_[1];
my @enclosures = @{($_[3])};
my @physDevices = @{($_[4])};
my @physicalDeviceTemperature_w = @{($_[5])};
my @physicalDeviceTemperature_c = @{($_[6])};
my $action = $_[7];
my $command = $storcli;
my $status = 0;
my $statusMessage = '';
if(!$NOENCLOSURES){
if(scalar(@enclosures) == 0) { $command .= "/eall"; }
elsif(scalar(@enclosures) == 1) { $command .= "/e$enclosures[0]"; }
else { $command .= "/e".join(",", @enclosures); }
}
if(scalar(@physDevices) == 0) { $command .= "/sall"; }
elsif(scalar(@physDevices) == 1) { $command .= "/s$physDevices[0]"; }
else { $command .= "/s".join(",", @physDevices); }
$command .= " show $action";
my @output = `$command`;
my @physDevs;
if(checkCommandStatus(\@output)) {
if($action eq "all") {
my $currBlock;
my %lineValues_h;
foreach my $line(@output){
my @splittedLine;
if($line =~ /^Drive \/(c[0-9]*\/e[0-9]*\/s[0-9]*) \:$/){
$currBlock = $1;
next;
}
if(defined($currBlock)){
if($line =~ /^\d+\:\d+\s+\d+\s+\w+\s+\d+.*/){
@splittedLine = map { s/^\s*//; s/\s*$//; $_; } split(/\s+/,$line);
# The current block is the c0/e252/s0 name
$lineValues_h{'pd'} = $currBlock;
my $j = 0;
for(my $i = 0; $i < @pdmap_a; $i++){
if($pdmap_a[$i] eq 'Size'){
my $size = $splittedLine[$j];
if($splittedLine[$j+1] eq 'GB' || $splittedLine[$j+1] eq 'TB'){
$size .= ''.$splittedLine[$j+1];
$j++;
}
$lineValues_h{$pdmap_a[$i]} = $size;
$j++;
}
elsif($pdmap_a[$i] eq 'Model'){
my $model = $splittedLine[$j];
# Model should be the next last element, j starts at 0
if(($j+2) != scalar(@splittedLine)){
$model .= ' '.$splittedLine[$j+1];
$j++;
}
$lineValues_h{$pdmap_a[$i]} = $model;
$j++;
}
else{
$lineValues_h{$pdmap_a[$i]} = $splittedLine[$j];
$j++;
}
}
}
if($line =~ /^(Shield Counter|Media Error Count|Other Error Count|
BBM Error Count|Drive Temperature|Predictive Failure Count|
S.M.A.R.T alert flagged by drive)\s\=\s(.*)$/){
$lineValues_h{$1} = $2;
}
# If the last value is parsed, set up for the next device
if(exists($lineValues_h{'S.M.A.R.T alert flagged by drive'})){
push @physDevs, \%lineValues_h;
unset $currBlock;
%lineValues_h = {};
}
}
}
}
}
use Data::Dumper;
print Dumper(@physDevs);
}
# Returns information about:
# - Battery Backup Unit status
# - Temperature, Battery status, voltage
sub getBBUStatus {
my $sudo = $_[0];
my $storcli = $_[1];
my $controller = $_[2];
my $command = "$sudo $storcli /c$controller/bbu show status";
my $status = 0;
my $statusMessage = '';
my @output = `$command`;
if(checkCommandStatus(\@output)) {
my $blockid = 0;
foreach my $line (@output) {
my $first;
my $last;
if($line =~ /^([a-zA-Z0-9]*)/) {
$first = $1;
if($first eq 'BBU_Info' || $first eq 'BBU_Firmware_Status' || $first eq 'GasGaugeStatus') {
$blockid++;
}
if($line =~ /([a-zA-Z0-9]*)$/) {
$last = $1;
# Check BBU_Info block
if ($blockid eq 1) {
if($first eq "Battery") {
if($line =~ /\s+([a-zA-Z0-9]*)/) {
if($1 eq "State") {
if($last ne "Optimal") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "BBU state not optimal, ";
}
}
}
}
}
# Check BBU_Firmware_Status
if ($blockid eq 2) {
if($first eq "Temperature") {
if($last ne "OK") {
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "BBU temp. critical, "; }
if ($VERBOSITY >= 1) {$statusMessage .= "BBU Temperature critical, "; }
}
}
elsif($first eq "Battery") {
if($line =~ /\s+([a-zA-Z0-9]*)/) {
if($1 eq "State") {
if($last ne "Optimal") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "BBU state not optimal, ";
}
}
elsif($1 eq "Pack") {
if($last ne "No") {
$status = getExitState($status, STATE_CRITICAL);
$statusMessage .= "BBU pack missing, ";
}
}
elsif($1 eq "Replacement") {
if($last ne "No") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "BBU replacement required, ";
}
}
}
}
elsif($first eq "Voltage") {
if($last ne "OK") {
$status = getExitState($status, STATE_CRITICAL);
$statusMessage .= "BBU voltage not ok, ";
}
}
elsif($first eq "Learn") {
if($last ne "OK") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "BBU learn cycle status not ok, ";
}
}
elsif($first eq "I2C") {
if($last ne "No") {
$status = getExitState($status, STATE_CRITICAL);
$statusMessage .= "BBU I2C errors, ";
}
}
elsif($first eq "Remaining") {
if($last ne "No") {
$status = getExitState($status, STATE_WARNING);
if ($VERBOSITY == 0) {$statusMessage .= "BBU capacity low, "; }
if ($VERBOSITY >= 1) {$statusMessage .= "BBU remaining capacity is low, "; }
}
}
}
# Check GasGaugeStatus
if ($blockid eq 3) {
if($first eq "Over") {
if($line =~ /\s+([a-zA-Z0-9]*)/) {
if($1 eq "Temperature") {
if($last ne "No") {
$status = getExitState($status, STATE_CRITICAL);
if ($VERBOSITY == 0) {$statusMessage .= "BBU temp. critical, "; }
if ($VERBOSITY >= 1) {$statusMessage .= "BBU Temperature critical, "; }
}
} elsif($1 eq "Charged") {
if($last ne "No") {
$status = getExitState($status, STATE_CRITICAL);
$statusMessage .= "BBU over charged, ";
}
}
}
}
}
}
}
}
return ($status, $statusMessage);
} else {
print "Invalid StorCLI command! ($command)\n";
exit(STATE_UNKNOWN);
}
}
# Returns information about:
# - Cache Vault module status
# - If CacheVault must be replaced
sub getCVStatus {
my $sudo = $_[0];
my $storcli = $_[1];
my $controller = $_[2];
my $command = "$sudo $storcli /c$controller/cv show status";
my $status = 0;
my $statusMessage = '';
my @output = `$command`;
if(checkCommandStatus(\@output)) {
my $currBlock;
foreach my $line (@output) {
if($line =~ /^(Cachevault_Info|Firmware_Status)/){
$currBlock = $1;
next;
}
if(defined($currBlock)){
$line =~ s/^\s+|\s+$//g;#trim line
if($currBlock eq 'Cachevault_Info' && $line =~ /^State/){
my @vals = split('\s{2,}',$line);
if($vals[1] ne "Optimal") {
$status = getExitState($status, STATE_WARNING);
if($VERBOSITY == 0){$statusMessage .= "CacheVault state not optimal, ";}
if($VERBOSITY >= 1){$statusMessage .= "Cachevault Info state $1, "; }
}
}
elsif($currBlock eq 'Firmware_Status' && $line =~ /^Replacement required/){
$line =~ /([a-zA-Z0-9]*)$/;
if($1 ne "No") {
$status = getExitState($status, STATE_WARNING);
$statusMessage .= "CacheVault replacement required, ";
}
}
}
}
return ($status, $statusMessage);
} else {
print "Invalid StorCLI command! ($command)\n";
exit(STATE_UNKNOWN);
}
}
# Checks if wheter BBU or CV is present
# - One of the two show commands must return 'Success'
sub checkBBUorCVIsPresent{
my $sudo = $_[0];
my $storcli = $_[1];
my $controller = $_[2];
my $status = 0;
my $statusMessage = '';
my ($bbu,$cv);
my $command = "$sudo $storcli /c$controller/bbu show";
my @output = `$command`;
if(checkCommandStatus(\@output)) {
$bbu = 1;
}
else{$bbu = 0};
$command = "$sudo $storcli /c$controller/cv show";
@output = `$command`;
if(checkCommandStatus(\@output)) {
$cv = 1;
}
else{$cv = 0};
return ($bbu, $cv);
}
# Nagios development guidelines: temperature threshold sheme
# http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT
# Returns a temperature range (array) in or out which a temperature should be
# Array content: ("in" or "out", range from, range to)
# Example ranges:
# Generate an alert if x...
# -Tw 10 < 0 or > 10, (outside the range of {0 .. 10})
# -Tw 10: < 10, (outside {10 .. inf})
# -Tw ~:10 > 10, (outside the range of {-inf .. 10})
# -Tw 10:20 < 10 or > 20, (outside the range of {10 .. 20})
# -Tw @10:20 >= 10 and <= 20, (inside the range of {10 .. 20})
sub getThresholds {
my @thresholds = @{($_[0])};
my $default = $_[1];
if(scalar(@thresholds) eq 0) {
return @thresholds = ("out", -273, $default);
}
if(substr($thresholds[0], 0, 1) eq "@") {
if($thresholds[0] =~ /^\@([0-9]*)\:([0-9]*)$/) {
@thresholds = ("in", $1, $2);
} else {
print "Invalid temperature parameter!\n";
exit(STATE_UNKNOWN);
}
} elsif(substr($thresholds[0], 0, 1) eq "~") {
if($thresholds[0] =~ /^\~\:([0-9]*)$/) {
@thresholds = ("out", -273, $1);
} else {
print "Invalid temperature parameter!\n";
exit(STATE_UNKNOWN);
}
} elsif(index($thresholds[0], ":") ne -1) {
if($thresholds[0] =~ /^([0-9]*)\:([0-9]{1,3})$/) {
@thresholds = ("out", $1, $2);
} elsif($thresholds[0] =~ /^([0-9]*)\:$/) {
@thresholds = ("in", -273, ($1 - 1));
} else {
print "Invalid temperature parameter!\n";
exit(STATE_UNKNOWN);
}
} else {
@thresholds = ("out", 0, $thresholds[0]);
}
if(($thresholds[1] =~ /^(-?[0-9]*)$/) && ($thresholds[2] =~ /^(-?[0-9]*)$/)) {
return @thresholds;
} else {
print "Invalid temperature parameter!\n";
exit(STATE_UNKNOWN);
}
}
MAIN: {
my ($storcli, $sudo, $noSudo);
my $controller = 0;
my @enclosures;
my @logDevices;
my @physDevices;
my @temperature_w;
my @temperature_c;
my @physicalDeviceTemperature_w;
my @physicalDeviceTemperature_c;
my $bbu = 1;
my $platform = $^O;
my $statusMessage = '';
if ( !(GetOptions(
'h|help' => sub {displayHelp();},
'v|verbose' => sub {$VERBOSITY = 1 },
'vv' => sub {$VERBOSITY = 2},
'vvv' => sub {$VERBOSITY = 3},
'V|version' => sub {displayVersion($sudo, $storcli);},
'C|controller=i' => \$controller,
'EID|enclosure=s' => \@enclosures,
'LD|logicaldevice=s' => \@logDevices,
'PD|physicaldevice=s' => \@physDevices,
'Tw|temperature-warn=s' => \@temperature_w,
'Tc|temperature-critical=s' => \@temperature_c,
'PDTw|physicaldevicetemperature-warn=s' => \@physicalDeviceTemperature_w,
'PDTc|physicaldevicetemperature-critical=s' => \@physicalDeviceTemperature_c,
'Im|ignore-media-errors=i' => \$ignerr_m,
'Io|ignore-other-errors=i' => \$ignerr_o,
'Ip|ignore-predictive-fail-count=i' => \$ignerr_p,
'Is|ignore-shield-counter=i' => \$ignerr_s,
'p|path=s' => \$storcli,
'b|BBU=i' => \$bbu,
'noenclosures=i' => \$NOENCLOSURES,
'nosudo' => \$noSudo,
))) {
print $NAME . " Version: " . $VERSION ."\n";
displayUsage();
exit(STATE_UNKNOWN);
}
# Check smartclt tool
if(!defined($storcli)){
eval('use File::Which');
if ($platform eq 'linux'){
$storcli = which('storcli');
if(!defined($storcli)){
$storcli = which('storcli64');
}
}
else{
$storcli = which('storcli.exe');
if(!defined($storcli)){
$storcli = which('storcli64.exe');
}
}
}
if(! -x $storcli){
print "Error: cannot find storcli executable.\n";
exit(STATE_UNKNOWN);
}
if ($platform eq 'linux') {
if(!defined($noSudo)){
my $sudo;
chomp($sudo = `which sudo`);
if(! -x $sudo){
print "Error: cannot find sudo executable.\n";
exit(STATE_UNKNOWN);
}
$storcli = $sudo.' '.$storcli;
}
}
# Prepare storcli command
$storcli .= " /c$controller";
# Input validation
#FIXME Replace with storcli show ctrlcount
# my @controllerVersion = `$storcli /c$controller show all`;
# if($controllerVersion[2] eq "Description = Controller $controller not found\n") {
# print "Invalid controller number, device not found!\n";
# exit(STATE_UNKNOWN);
# }
if(($bbu != 1) && ($bbu != 0)) {
print "Invalid BBU/CV parameter, must be 0 or 1!\n";
exit(STATE_UNKNOWN);
}
@enclosures = split(/,/,join(',', @enclosures));
@logDevices = split(/,/,join(',', @logDevices));
@physDevices = split(/,/,join(',', @physDevices));
# check given thresholds
if(@temperature_c && !@temperature_w){
print "Please also specify warning threshold!\n";
displayUsage();
exit(STATE_UNKNOWN);
}
@temperature_w = getThresholds(\@temperature_w, $C_TEMP_WARNING);
@temperature_c = getThresholds(\@temperature_c, $C_TEMP_CRITICAL);
if(@physicalDeviceTemperature_c && !@physicalDeviceTemperature_w){
print "Please also specify PD warning threshold!\n";
displayUsage();
exit(STATE_UNKNOWN);
}
@physicalDeviceTemperature_w = getThresholds(\@physicalDeviceTemperature_w, $PD_TEMP_WARNING);
@physicalDeviceTemperature_c = getThresholds(\@physicalDeviceTemperature_c, $PD_TEMP_CRITICAL);
# Set exit status
my $exitstatus = 0;
my $newexitstatus = 0;
my $newstatusMessage = '';
# ($newexitstatus, $statusMessage) = getControllerStatus($sudo, $storcli, $controller, \@temperature_w, \@temperature_c);
# $newstatusMessage .= $statusMessage;
# $exitstatus = getExitState($newexitstatus, $exitstatus);
# my ($bbuPresent,$cvPresent) = (0,0);
# if($bbu == 1){
# ($bbuPresent,$cvPresent) = checkBBUorCVIsPresent($sudo, $storcli, $controller);
# if($bbuPresent == 0 && $cvPresent == 0){
# $exitstatus = getExitState(STATE_CRITICAL, $exitstatus);
# $newstatusMessage .= "No BBU or CV found, ";
# }
# }
# if($bbuPresent == 1){
# ($newexitstatus, $statusMessage) = getBBUStatus($sudo, $storcli, $controller);
# $newstatusMessage .= $statusMessage;
# $exitstatus = getExitState($newexitstatus, $exitstatus);
# }
# if($cvPresent == 1){
# ($newexitstatus, $statusMessage) = getCVStatus($sudo, $storcli, $controller);
# $newstatusMessage .= $statusMessage;
# $exitstatus = getExitState($newexitstatus, $exitstatus);
# }
# ($newexitstatus, $statusMessage) = getLogicalDeviceStatus($sudo, $storcli, $controller, \@logDevices, "init");
# $newstatusMessage .= $statusMessage;
$exitstatus = getExitState($newexitstatus, $exitstatus);
($newexitstatus, $statusMessage) = getLogicalDeviceStatus($storcli, \@logDevices, "all");
$newstatusMessage .= $statusMessage;
# $exitstatus = getExitState($newexitstatus, $exitstatus);
# ($newexitstatus, $statusMessage) = getPhysDeviceStatus($sudo, $storcli, $controller, \@enclosures, \@physDevices, \@physicalDeviceTemperature_w, \@physicalDeviceTemperature_c, "initialization");
# $newstatusMessage .= $statusMessage;
# $exitstatus = getExitState($newexitstatus, $exitstatus);
# ($newexitstatus, $statusMessage) = getPhysDeviceStatus($sudo, $storcli, $controller, \@enclosures, \@physDevices, \@physicalDeviceTemperature_w, \@physicalDeviceTemperature_c, "rebuild");
# $newstatusMessage .= $statusMessage;
# $exitstatus = getExitState($newexitstatus, $exitstatus);
($newexitstatus, $statusMessage) = getPhysDeviceStatus($sudo, $storcli, $controller, \@enclosures, \@physDevices, \@physicalDeviceTemperature_w, \@physicalDeviceTemperature_c, "all");
$newstatusMessage .= $statusMessage;
$exitstatus = getExitState($newexitstatus, $exitstatus);
if($exitstatus == 0) { print "LSIRAID OK (Ctrl #$controller) | STATUS=$exitstatus\n"; }
elsif($exitstatus == 1) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID WARNING (Ctrl #$controller): [$newstatusMessage] | STATUS=$exitstatus\n"; }
elsif($exitstatus == 2) { chop($newstatusMessage); chop($newstatusMessage); print "LSIRAID CRITICAL (Ctrl #$controller): [$newstatusMessage] | STATUS=$exitstatus\n"; }
exit($exitstatus);
}