From d4ccab2ca972e6c0e0d2a822404d5496a67354eb Mon Sep 17 00:00:00 2001 From: menzel Date: Fri, 12 Dec 2025 22:19:09 +0100 Subject: [PATCH] cleaning up EPICS2SMS.sh --- EPICS2SMS.sh | 154 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 125 insertions(+), 29 deletions(-) diff --git a/EPICS2SMS.sh b/EPICS2SMS.sh index 60241f5..ab1d4d6 100755 --- a/EPICS2SMS.sh +++ b/EPICS2SMS.sh @@ -2,72 +2,168 @@ # --- configuration --------------------------------------------------------- -# PV_NAME="X12SA-FE-VMMG-0010:PLC_RELAY-D" # a PV with states "ON"/"OFF" -PV_NAME="AGEBD-PARAMS:INJECTION-RATE" # a PV with numerical value + units -PHONE_NUMBER="0041793083005" # the phone number(s), space separated -EMAIL="andreas.menzel@psi.ch" # the email address(es), space separated +# Example of an enum PV (typically returned with quotes by caget, e.g. "\"ON\""): +# PV_NAME="X12SA-FE-VMMG-0010:PLC_RELAY-D" + +PV_NAME="AGEBD-PARAMS:INJECTION-RATE" # EPICS PV to monitor +PHONE_NUMBER="0041793083005" # phone number(s), space separated (may be empty) +EMAIL="andreas.menzel@psi.ch" # email address(es), space separated (may be empty) POLL_INTERVAL=5 # seconds between polls while Running ERROR_INTERVAL=60 # seconds between polls while NOT Running -send_sms() { - local numbers="$1 $2" # to combine PHONE_NUMBER and EMAIL - local message="$3" - local email_hdr="" +# Define what "Running" means for the PV: +# CHECK_MODE="numeric": Running if numeric PV value > OK_THRESHOLD +# CHECK_MODE="enum": Running if PV string equals OK_STATE exactly +CHECK_MODE="numeric" # numeric | enum +OK_THRESHOLD=0 # used when CHECK_MODE=numeric +OK_STATE="\"ON\"" # used when CHECK_MODE=enum (match exact caget output) - # build To: header(s) - for tok in $numbers; do +# Notification delivery +SENDMAIL="/usr/sbin/sendmail" +SMS_GATEWAY_DOMAIN="sms.switch.ch" + +# Set to 1 to print the email payload instead of sending it (useful for testing) +DRY_RUN=0 + +# --- helpers --------------------------------------------------------------- + +log() { + printf '[%s] %s\n' "$(date '+%F %T')" "$*" +} + +require_command() { + local cmd="$1" + command -v "$cmd" >/dev/null 2>&1 || { + log "ERROR: required command not found: $cmd" + exit 2 + } +} + +is_running() { + # Decide whether a PV value should be treated as "Running". + # Returns: + # 0 => Running + # 1 => Not Running + # 2 => Configuration error + local v="$1" + + case "$CHECK_MODE" in + numeric) + # IMPORTANT: pass PV value as data (-v v="...") rather than splicing it into awk code. + # (v+0) forces numeric conversion so non-numeric values become 0. + awk -v v="$v" -v t="$OK_THRESHOLD" 'BEGIN { exit !((v+0) > (t+0)) }' + ;; + enum) + [[ "$v" == "$OK_STATE" ]] + ;; + *) + log "ERROR: unknown CHECK_MODE='$CHECK_MODE' (expected 'numeric' or 'enum')" + return 2 + ;; + esac +} + +send_notification() { + # This sends to: + # - phone numbers via @sms.switch.ch + # - email addresses directly + # + # Usage: send_notification "" "" "" + local numbers="$1 $2" # combine PHONE_NUMBER and EMAIL + local message="$3" + + local subject="[EPICS] $PV_NAME" + local -a to_lines=() + + # Build To: header lines + local -a recipients=() + read -r -a recipients <<<"$numbers" + + local tok + for tok in "${recipients[@]}"; do if [[ "$tok" == *"@"* ]]; then - email_hdr+="To:$tok"$'\n' + to_lines+=("To: $tok") else - email_hdr+="To:${tok}@sms.switch.ch"$'\n' + to_lines+=("To: ${tok}@${SMS_GATEWAY_DOMAIN}") fi done - # append a timestampe + if ((${#to_lines[@]} == 0)); then + log "ERROR: no recipients configured (PHONE_NUMBER and EMAIL are empty)" + return 3 + fi + + # Append a timestamp local message_with_date message_with_date="$message"$'\n'"$(date)" - # mimic: (echo '$email_hdr'; echo '$message_with_date') | /usr/sbin/sendmail -t + # Choose send command (real vs dry-run) + local -a send_cmd=("$SENDMAIL" -t) + ((DRY_RUN)) && send_cmd=(cat) + { - printf "%s" "$email_hdr" - printf "\n" - printf "%s\n" "$message_with_date" - # } | cat # for testing - } | /usr/sbin/sendmail -t # for production + printf '%s\n' "${to_lines[@]}" + printf 'Subject: %s\n' "$subject" + printf '\n' + printf '%s\n' "$message_with_date" + } | "${send_cmd[@]}" } # --- main loop ------------------------------------------------------------ +require_command caget + +# Basic startup sanity checks +if [[ -z "${PHONE_NUMBER// /}" && -z "${EMAIL// /}" ]]; then + log "ERROR: at least one recipient must be configured (PHONE_NUMBER and/or EMAIL)." + exit 2 +fi + +if ((!DRY_RUN)); then + if [[ ! -x "$SENDMAIL" ]]; then + log "ERROR: SENDMAIL='$SENDMAIL' is not executable (set DRY_RUN=1 to test without sendmail)." + exit 2 + fi +fi + alert_sent=0 # 0 = no alert sent for current outage, 1 = already sent -echo "[$(date)] Starting EPICS monitor for PV '$PV_NAME' ..." +log "Starting EPICS monitor for PV '$PV_NAME' (CHECK_MODE=$CHECK_MODE)..." while true; do - # Get PV value as a plain string; adjust flags if needed (-S for string) - # caget "$PV_NAME" # only for debugging + # Get PV value as a plain string; adjust flags if needed (-S for string PVs) value=$(caget -noname -nounit "$PV_NAME" 2>/dev/null) status=$? # If caget fails, treat that as "not Running" (e.g. IOC down) if [[ $status -ne 0 ]]; then - echo "[$(date)] WARNING: caget failed for '$PV_NAME' (exit $status)" - value="UNAVAILABLE" + log "WARNING: caget failed for '$PV_NAME' (exit $status)" + value="UNAVAILABLE (caget exit $status)" fi - # if [[ "$value" == "\"ON\"" ]]; then # for a PV with states "ON"/"OFF" - if awk "BEGIN { exit !($value < 0) }"; then # to check whether value is smaller than zero + if is_running "$value"; then # System is OK again if [[ $alert_sent -eq 1 ]]; then - echo "[$(date)] PV '$PV_NAME' back to Running." + log "PV '$PV_NAME' back to Running (value='$value')." fi alert_sent=0 sleep "$POLL_INTERVAL" else + rc=$? + if [[ $rc -eq 2 ]]; then + log "ERROR: stopping due to configuration error in CHECK_MODE." + exit 2 + fi + # System not OK if [[ $alert_sent -eq 0 ]]; then msg="Alert: PV $PV_NAME is '$value'." - send_sms "$PHONE_NUMBER" "$EMAIL" "$msg" - alert_sent=1 + if send_notification "$PHONE_NUMBER" "$EMAIL" "$msg"; then + alert_sent=1 + log "Alert sent." + else + log "ERROR: failed to send alert; will retry on next check." + fi fi sleep "$ERROR_INTERVAL" fi