cleaning up EPICS2SMS.sh

This commit is contained in:
2025-12-12 22:19:09 +01:00
parent 62249261a2
commit d4ccab2ca9
+125 -29
View File
@@ -2,72 +2,168 @@
# --- configuration ---------------------------------------------------------
# PV_NAME="X12SA-FE-VMMG-0010:PLC_RELAY-D" # a PV with states "ON"/"OFF"
PV_NAME="AGEBD-PARAMS:INJECTION-RATE" # a PV with numerical value + units
PHONE_NUMBER="0041793083005" # the phone number(s), space separated
EMAIL="andreas.menzel@psi.ch" # the email address(es), space separated
# Example of an enum PV (typically returned with quotes by caget, e.g. "\"ON\""):
# PV_NAME="X12SA-FE-VMMG-0010:PLC_RELAY-D"
PV_NAME="AGEBD-PARAMS:INJECTION-RATE" # EPICS PV to monitor
PHONE_NUMBER="0041793083005" # phone number(s), space separated (may be empty)
EMAIL="andreas.menzel@psi.ch" # email address(es), space separated (may be empty)
POLL_INTERVAL=5 # seconds between polls while Running
ERROR_INTERVAL=60 # seconds between polls while NOT Running
send_sms() {
local numbers="$1 $2" # to combine PHONE_NUMBER and EMAIL
local message="$3"
local email_hdr=""
# Define what "Running" means for the PV:
# CHECK_MODE="numeric": Running if numeric PV value > OK_THRESHOLD
# CHECK_MODE="enum": Running if PV string equals OK_STATE exactly
CHECK_MODE="numeric" # numeric | enum
OK_THRESHOLD=0 # used when CHECK_MODE=numeric
OK_STATE="\"ON\"" # used when CHECK_MODE=enum (match exact caget output)
# build To: header(s)
for tok in $numbers; do
# Notification delivery
SENDMAIL="/usr/sbin/sendmail"
SMS_GATEWAY_DOMAIN="sms.switch.ch"
# Set to 1 to print the email payload instead of sending it (useful for testing)
DRY_RUN=0
# --- helpers ---------------------------------------------------------------
log() {
printf '[%s] %s\n' "$(date '+%F %T')" "$*"
}
require_command() {
local cmd="$1"
command -v "$cmd" >/dev/null 2>&1 || {
log "ERROR: required command not found: $cmd"
exit 2
}
}
is_running() {
# Decide whether a PV value should be treated as "Running".
# Returns:
# 0 => Running
# 1 => Not Running
# 2 => Configuration error
local v="$1"
case "$CHECK_MODE" in
numeric)
# IMPORTANT: pass PV value as data (-v v="...") rather than splicing it into awk code.
# (v+0) forces numeric conversion so non-numeric values become 0.
awk -v v="$v" -v t="$OK_THRESHOLD" 'BEGIN { exit !((v+0) > (t+0)) }'
;;
enum)
[[ "$v" == "$OK_STATE" ]]
;;
*)
log "ERROR: unknown CHECK_MODE='$CHECK_MODE' (expected 'numeric' or 'enum')"
return 2
;;
esac
}
send_notification() {
# This sends to:
# - phone numbers via <number>@sms.switch.ch
# - email addresses directly
#
# Usage: send_notification "<phone numbers>" "<email addresses>" "<message>"
local numbers="$1 $2" # combine PHONE_NUMBER and EMAIL
local message="$3"
local subject="[EPICS] $PV_NAME"
local -a to_lines=()
# Build To: header lines
local -a recipients=()
read -r -a recipients <<<"$numbers"
local tok
for tok in "${recipients[@]}"; do
if [[ "$tok" == *"@"* ]]; then
email_hdr+="To:$tok"$'\n'
to_lines+=("To: $tok")
else
email_hdr+="To:${tok}@sms.switch.ch"$'\n'
to_lines+=("To: ${tok}@${SMS_GATEWAY_DOMAIN}")
fi
done
# append a timestampe
if ((${#to_lines[@]} == 0)); then
log "ERROR: no recipients configured (PHONE_NUMBER and EMAIL are empty)"
return 3
fi
# Append a timestamp
local message_with_date
message_with_date="$message"$'\n'"$(date)"
# mimic: (echo '$email_hdr'; echo '$message_with_date') | /usr/sbin/sendmail -t
# Choose send command (real vs dry-run)
local -a send_cmd=("$SENDMAIL" -t)
((DRY_RUN)) && send_cmd=(cat)
{
printf "%s" "$email_hdr"
printf "\n"
printf "%s\n" "$message_with_date"
# } | cat # for testing
} | /usr/sbin/sendmail -t # for production
printf '%s\n' "${to_lines[@]}"
printf 'Subject: %s\n' "$subject"
printf '\n'
printf '%s\n' "$message_with_date"
} | "${send_cmd[@]}"
}
# --- main loop ------------------------------------------------------------
require_command caget
# Basic startup sanity checks
if [[ -z "${PHONE_NUMBER// /}" && -z "${EMAIL// /}" ]]; then
log "ERROR: at least one recipient must be configured (PHONE_NUMBER and/or EMAIL)."
exit 2
fi
if ((!DRY_RUN)); then
if [[ ! -x "$SENDMAIL" ]]; then
log "ERROR: SENDMAIL='$SENDMAIL' is not executable (set DRY_RUN=1 to test without sendmail)."
exit 2
fi
fi
alert_sent=0 # 0 = no alert sent for current outage, 1 = already sent
echo "[$(date)] Starting EPICS monitor for PV '$PV_NAME' ..."
log "Starting EPICS monitor for PV '$PV_NAME' (CHECK_MODE=$CHECK_MODE)..."
while true; do
# Get PV value as a plain string; adjust flags if needed (-S for string)
# caget "$PV_NAME" # only for debugging
# Get PV value as a plain string; adjust flags if needed (-S for string PVs)
value=$(caget -noname -nounit "$PV_NAME" 2>/dev/null)
status=$?
# If caget fails, treat that as "not Running" (e.g. IOC down)
if [[ $status -ne 0 ]]; then
echo "[$(date)] WARNING: caget failed for '$PV_NAME' (exit $status)"
value="UNAVAILABLE"
log "WARNING: caget failed for '$PV_NAME' (exit $status)"
value="UNAVAILABLE (caget exit $status)"
fi
# if [[ "$value" == "\"ON\"" ]]; then # for a PV with states "ON"/"OFF"
if awk "BEGIN { exit !($value < 0) }"; then # to check whether value is smaller than zero
if is_running "$value"; then
# System is OK again
if [[ $alert_sent -eq 1 ]]; then
echo "[$(date)] PV '$PV_NAME' back to Running."
log "PV '$PV_NAME' back to Running (value='$value')."
fi
alert_sent=0
sleep "$POLL_INTERVAL"
else
rc=$?
if [[ $rc -eq 2 ]]; then
log "ERROR: stopping due to configuration error in CHECK_MODE."
exit 2
fi
# System not OK
if [[ $alert_sent -eq 0 ]]; then
msg="Alert: PV $PV_NAME is '$value'."
send_sms "$PHONE_NUMBER" "$EMAIL" "$msg"
alert_sent=1
if send_notification "$PHONE_NUMBER" "$EMAIL" "$msg"; then
alert_sent=1
log "Alert sent."
else
log "ERROR: failed to send alert; will retry on next check."
fi
fi
sleep "$ERROR_INTERVAL"
fi