cleaning up EPICS2SMS.sh
This commit is contained in:
+125
-29
@@ -2,72 +2,168 @@
|
||||
|
||||
# --- configuration ---------------------------------------------------------
|
||||
|
||||
# PV_NAME="X12SA-FE-VMMG-0010:PLC_RELAY-D" # a PV with states "ON"/"OFF"
|
||||
PV_NAME="AGEBD-PARAMS:INJECTION-RATE" # a PV with numerical value + units
|
||||
PHONE_NUMBER="0041793083005" # the phone number(s), space separated
|
||||
EMAIL="andreas.menzel@psi.ch" # the email address(es), space separated
|
||||
# Example of an enum PV (typically returned with quotes by caget, e.g. "\"ON\""):
|
||||
# PV_NAME="X12SA-FE-VMMG-0010:PLC_RELAY-D"
|
||||
|
||||
PV_NAME="AGEBD-PARAMS:INJECTION-RATE" # EPICS PV to monitor
|
||||
PHONE_NUMBER="0041793083005" # phone number(s), space separated (may be empty)
|
||||
EMAIL="andreas.menzel@psi.ch" # email address(es), space separated (may be empty)
|
||||
POLL_INTERVAL=5 # seconds between polls while Running
|
||||
ERROR_INTERVAL=60 # seconds between polls while NOT Running
|
||||
|
||||
send_sms() {
|
||||
local numbers="$1 $2" # to combine PHONE_NUMBER and EMAIL
|
||||
local message="$3"
|
||||
local email_hdr=""
|
||||
# Define what "Running" means for the PV:
|
||||
# CHECK_MODE="numeric": Running if numeric PV value > OK_THRESHOLD
|
||||
# CHECK_MODE="enum": Running if PV string equals OK_STATE exactly
|
||||
CHECK_MODE="numeric" # numeric | enum
|
||||
OK_THRESHOLD=0 # used when CHECK_MODE=numeric
|
||||
OK_STATE="\"ON\"" # used when CHECK_MODE=enum (match exact caget output)
|
||||
|
||||
# build To: header(s)
|
||||
for tok in $numbers; do
|
||||
# Notification delivery
|
||||
SENDMAIL="/usr/sbin/sendmail"
|
||||
SMS_GATEWAY_DOMAIN="sms.switch.ch"
|
||||
|
||||
# Set to 1 to print the email payload instead of sending it (useful for testing)
|
||||
DRY_RUN=0
|
||||
|
||||
# --- helpers ---------------------------------------------------------------
|
||||
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date '+%F %T')" "$*"
|
||||
}
|
||||
|
||||
require_command() {
|
||||
local cmd="$1"
|
||||
command -v "$cmd" >/dev/null 2>&1 || {
|
||||
log "ERROR: required command not found: $cmd"
|
||||
exit 2
|
||||
}
|
||||
}
|
||||
|
||||
is_running() {
|
||||
# Decide whether a PV value should be treated as "Running".
|
||||
# Returns:
|
||||
# 0 => Running
|
||||
# 1 => Not Running
|
||||
# 2 => Configuration error
|
||||
local v="$1"
|
||||
|
||||
case "$CHECK_MODE" in
|
||||
numeric)
|
||||
# IMPORTANT: pass PV value as data (-v v="...") rather than splicing it into awk code.
|
||||
# (v+0) forces numeric conversion so non-numeric values become 0.
|
||||
awk -v v="$v" -v t="$OK_THRESHOLD" 'BEGIN { exit !((v+0) > (t+0)) }'
|
||||
;;
|
||||
enum)
|
||||
[[ "$v" == "$OK_STATE" ]]
|
||||
;;
|
||||
*)
|
||||
log "ERROR: unknown CHECK_MODE='$CHECK_MODE' (expected 'numeric' or 'enum')"
|
||||
return 2
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
send_notification() {
|
||||
# This sends to:
|
||||
# - phone numbers via <number>@sms.switch.ch
|
||||
# - email addresses directly
|
||||
#
|
||||
# Usage: send_notification "<phone numbers>" "<email addresses>" "<message>"
|
||||
local numbers="$1 $2" # combine PHONE_NUMBER and EMAIL
|
||||
local message="$3"
|
||||
|
||||
local subject="[EPICS] $PV_NAME"
|
||||
local -a to_lines=()
|
||||
|
||||
# Build To: header lines
|
||||
local -a recipients=()
|
||||
read -r -a recipients <<<"$numbers"
|
||||
|
||||
local tok
|
||||
for tok in "${recipients[@]}"; do
|
||||
if [[ "$tok" == *"@"* ]]; then
|
||||
email_hdr+="To:$tok"$'\n'
|
||||
to_lines+=("To: $tok")
|
||||
else
|
||||
email_hdr+="To:${tok}@sms.switch.ch"$'\n'
|
||||
to_lines+=("To: ${tok}@${SMS_GATEWAY_DOMAIN}")
|
||||
fi
|
||||
done
|
||||
|
||||
# append a timestampe
|
||||
if ((${#to_lines[@]} == 0)); then
|
||||
log "ERROR: no recipients configured (PHONE_NUMBER and EMAIL are empty)"
|
||||
return 3
|
||||
fi
|
||||
|
||||
# Append a timestamp
|
||||
local message_with_date
|
||||
message_with_date="$message"$'\n'"$(date)"
|
||||
|
||||
# mimic: (echo '$email_hdr'; echo '$message_with_date') | /usr/sbin/sendmail -t
|
||||
# Choose send command (real vs dry-run)
|
||||
local -a send_cmd=("$SENDMAIL" -t)
|
||||
((DRY_RUN)) && send_cmd=(cat)
|
||||
|
||||
{
|
||||
printf "%s" "$email_hdr"
|
||||
printf "\n"
|
||||
printf "%s\n" "$message_with_date"
|
||||
# } | cat # for testing
|
||||
} | /usr/sbin/sendmail -t # for production
|
||||
printf '%s\n' "${to_lines[@]}"
|
||||
printf 'Subject: %s\n' "$subject"
|
||||
printf '\n'
|
||||
printf '%s\n' "$message_with_date"
|
||||
} | "${send_cmd[@]}"
|
||||
}
|
||||
|
||||
# --- main loop ------------------------------------------------------------
|
||||
|
||||
require_command caget
|
||||
|
||||
# Basic startup sanity checks
|
||||
if [[ -z "${PHONE_NUMBER// /}" && -z "${EMAIL// /}" ]]; then
|
||||
log "ERROR: at least one recipient must be configured (PHONE_NUMBER and/or EMAIL)."
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ((!DRY_RUN)); then
|
||||
if [[ ! -x "$SENDMAIL" ]]; then
|
||||
log "ERROR: SENDMAIL='$SENDMAIL' is not executable (set DRY_RUN=1 to test without sendmail)."
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
alert_sent=0 # 0 = no alert sent for current outage, 1 = already sent
|
||||
|
||||
echo "[$(date)] Starting EPICS monitor for PV '$PV_NAME' ..."
|
||||
log "Starting EPICS monitor for PV '$PV_NAME' (CHECK_MODE=$CHECK_MODE)..."
|
||||
|
||||
while true; do
|
||||
# Get PV value as a plain string; adjust flags if needed (-S for string)
|
||||
# caget "$PV_NAME" # only for debugging
|
||||
# Get PV value as a plain string; adjust flags if needed (-S for string PVs)
|
||||
value=$(caget -noname -nounit "$PV_NAME" 2>/dev/null)
|
||||
status=$?
|
||||
|
||||
# If caget fails, treat that as "not Running" (e.g. IOC down)
|
||||
if [[ $status -ne 0 ]]; then
|
||||
echo "[$(date)] WARNING: caget failed for '$PV_NAME' (exit $status)"
|
||||
value="UNAVAILABLE"
|
||||
log "WARNING: caget failed for '$PV_NAME' (exit $status)"
|
||||
value="UNAVAILABLE (caget exit $status)"
|
||||
fi
|
||||
|
||||
# if [[ "$value" == "\"ON\"" ]]; then # for a PV with states "ON"/"OFF"
|
||||
if awk "BEGIN { exit !($value < 0) }"; then # to check whether value is smaller than zero
|
||||
if is_running "$value"; then
|
||||
# System is OK again
|
||||
if [[ $alert_sent -eq 1 ]]; then
|
||||
echo "[$(date)] PV '$PV_NAME' back to Running."
|
||||
log "PV '$PV_NAME' back to Running (value='$value')."
|
||||
fi
|
||||
alert_sent=0
|
||||
sleep "$POLL_INTERVAL"
|
||||
else
|
||||
rc=$?
|
||||
if [[ $rc -eq 2 ]]; then
|
||||
log "ERROR: stopping due to configuration error in CHECK_MODE."
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# System not OK
|
||||
if [[ $alert_sent -eq 0 ]]; then
|
||||
msg="Alert: PV $PV_NAME is '$value'."
|
||||
send_sms "$PHONE_NUMBER" "$EMAIL" "$msg"
|
||||
alert_sent=1
|
||||
if send_notification "$PHONE_NUMBER" "$EMAIL" "$msg"; then
|
||||
alert_sent=1
|
||||
log "Alert sent."
|
||||
else
|
||||
log "ERROR: failed to send alert; will retry on next check."
|
||||
fi
|
||||
fi
|
||||
sleep "$ERROR_INTERVAL"
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user