140 lines
3.5 KiB
Bash
Executable File
140 lines
3.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# EPICS monitor script
|
|
# - polls an EPICS PV
|
|
# - considers the system "OK" if numeric PV value > OK_THRESHOLD
|
|
# - sends ONE alert when it becomes NOT OK (or unreadable)
|
|
# - waits until OK again before sending another alert
|
|
#
|
|
# Mail/SMS transport is delegated to smtp_send.py (direct SMTP to smtp.psi.ch).
|
|
# IMPORTANT: From address must be a registered sender (e.g. cSAXS@psi.ch).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Recipients:
|
|
# - Swisscom mail2sms gateway address (phone number + domain)
|
|
# - Optional CC to a normal mailbox for traceability
|
|
RECIPIENTS=(
|
|
"0041793083005@sms.switch.ch"
|
|
"andreas.menzel@psi.ch"
|
|
)
|
|
|
|
ALERT_SUBJECT="EPICS alert"
|
|
FROM_ADDR="cSAXS@psi.ch"
|
|
|
|
PV_NAME="AGEBD-PARAMS:INJECTION-RATE"
|
|
OK_THRESHOLD=0
|
|
|
|
POLL_INTERVAL=5
|
|
ERROR_INTERVAL=60
|
|
|
|
log() {
|
|
printf '[%s] %s\n' "$(date '+%F %T')" "$*"
|
|
}
|
|
|
|
require_command() {
|
|
local cmd="$1"
|
|
command -v "$cmd" >/dev/null 2>&1 || {
|
|
log "ERROR: required command not found: $cmd"
|
|
exit 2
|
|
}
|
|
}
|
|
|
|
# Resolve smtp_send.py relative to this script, not the current working directory.
|
|
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
|
PYTHON_SENDER="${SCRIPT_DIR}/smtp_send.py"
|
|
|
|
is_numeric() {
|
|
# Accepts integers, decimals, and scientific notation.
|
|
# Examples: 1, -1, 1.0, .5, 1e-3, -2.3E+4
|
|
local s="$1"
|
|
[[ "$s" =~ ^-?([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][+-]?[0-9]+)?$ ]]
|
|
}
|
|
|
|
is_ok() {
|
|
# Contract:
|
|
# - Returns 0 (true) iff the PV value is numeric AND value > OK_THRESHOLD.
|
|
# - Non-numeric values are treated as NOT running (robust against threshold changes).
|
|
local v="$1"
|
|
|
|
is_numeric "$v" || return 1
|
|
|
|
awk -v v="$v" -v t="$OK_THRESHOLD" 'BEGIN { exit !(v > t) }'
|
|
}
|
|
|
|
send_alert() {
|
|
local body="$1"
|
|
local subject="${2:-$ALERT_SUBJECT}"
|
|
|
|
local rc=0
|
|
local to
|
|
for to in "${RECIPIENTS[@]}"; do
|
|
# smtp_send.py expects: --to (repeatable), optional flags, then message.
|
|
if ! python3 "$PYTHON_SENDER" \
|
|
--to "$to" \
|
|
--subject "$subject" \
|
|
--from-addr "$FROM_ADDR" \
|
|
"$body"; then
|
|
log "ERROR: failed to send alert to '$to'"
|
|
rc=1
|
|
fi
|
|
done
|
|
|
|
return "$rc"
|
|
}
|
|
|
|
# --- startup checks ---------------------------------------------------------
|
|
|
|
require_command caget
|
|
require_command python3
|
|
|
|
if [[ ! -f "$PYTHON_SENDER" ]]; then
|
|
log "ERROR: Python sender script not found: $PYTHON_SENDER"
|
|
exit 2
|
|
fi
|
|
|
|
alert_sent=0
|
|
log "Starting EPICS monitor for PV '$PV_NAME' (OK if value > $OK_THRESHOLD) ..."
|
|
|
|
while true; do
|
|
value="$(caget -noname -nounit "$PV_NAME" 2>/dev/null)"
|
|
status=$?
|
|
|
|
pv_read_ok=1
|
|
if [[ $status -ne 0 ]]; then
|
|
pv_read_ok=0
|
|
log "WARNING: caget failed for '$PV_NAME' (exit $status)"
|
|
fi
|
|
|
|
if ((pv_read_ok)) && is_ok "$value"; then
|
|
# System OK again
|
|
if [[ $alert_sent -eq 1 ]]; then
|
|
log "PV back to OK (value='$value')"
|
|
fi
|
|
alert_sent=0
|
|
sleep "$POLL_INTERVAL"
|
|
continue
|
|
fi
|
|
|
|
# Not OK: either unreadable, or value indicates not OK.
|
|
if [[ $alert_sent -eq 0 ]]; then
|
|
if ((pv_read_ok)); then
|
|
log "PV not OK (value='$value') - sending alert"
|
|
msg="Alert: PV $PV_NAME not OK (value='$value') at $(date)"
|
|
else
|
|
log "PV unreadable - sending alert"
|
|
msg="Alert: PV $PV_NAME unreadable (caget exit $status) at $(date)"
|
|
fi
|
|
|
|
if send_alert "$msg" "$ALERT_SUBJECT"; then
|
|
alert_sent=1
|
|
log "Alert sent."
|
|
else
|
|
log "ERROR: alert send failed; will retry"
|
|
alert_sent=0
|
|
fi
|
|
fi
|
|
|
|
sleep "$ERROR_INTERVAL"
|
|
done
|