Updating ensure_jupyter and ra_vscode_jupyter such that kernel startup is faster and more stable.

This commit is contained in:
2026-02-05 18:37:03 +01:00
parent c78ab59714
commit b7faa7f63b
2 changed files with 93 additions and 62 deletions
+67 -18
View File
@@ -14,21 +14,17 @@
# - If not, it submits a new Jupyter job (via jupyter_on_ra.sh) and waits
# until the job is RUNNING.
# - Locates the Slurm stdout logfile for the job.
# - Parses the logfile to extract the Jupyter URL.
# - If the token is missing or redacted (token=...), replaces it with
# a dummy token "cristallina".
# - Parses the logfile to extract the Jupyter base URL and port.
# - IMPORTANT for VS Code: reads the REAL Jupyter token from the runtime
# jpserver-*.json on the compute node (logs may redact token as "...").
#
# Output:
# - Default (human mode): prints the full Jupyter URL.
# - Machine mode (`ensure_jupyter.sh machine`): prints "NODE PORT".
# - Default (human mode): prints the full Jupyter URL with the real token.
# - Machine mode (`ensure_jupyter.sh machine`): prints "NODE PORT TOKEN".
#
# Where it runs:
# - On the RA *login node*.
#
# Typical usage:
# ./ensure_jupyter.sh
# ./ensure_jupyter.sh machine
#
set -euo pipefail
@@ -62,15 +58,49 @@ extract_url() {
| head -n 1 || true)"
[[ -z "${url:-}" ]] && return 1
# Replace missing or redacted token with dummy token
if [[ "$url" != *"token="* || "$url" == *"token=..."* ]]; then
url="${url%%\?*}?token=cristallina"
fi
echo "$url"
}
get_runtime_token() {
local node="$1"
local port="$2"
# Read the most relevant jpserver-*.json from the compute node runtime dir.
# Prefer the json whose "port" matches; otherwise fall back to the newest.
ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${node}" "python - <<'PY'
import glob, json, os, sys
rt = os.path.expanduser('~/.local/share/jupyter/runtime')
paths = sorted(glob.glob(os.path.join(rt, 'jpserver-*.json')), key=os.path.getmtime)
if not paths:
sys.exit(1)
want_port = int(sys.argv[1])
chosen = None
for p in reversed(paths):
try:
d = json.load(open(p))
except Exception:
continue
if int(d.get('port', -1)) == want_port:
chosen = d
break
if chosen is None:
# fall back to newest readable file
for p in reversed(paths):
try:
chosen = json.load(open(p))
break
except Exception:
pass
if not chosen or 'token' not in chosen:
sys.exit(1)
print(chosen['token'])
PY $port" 2>/dev/null || true
}
info="$(job_info || true)"
if [[ -n "${info:-}" ]]; then
@@ -102,8 +132,27 @@ done
port="$(sed -n 's#.*://[^:/]*:\([0-9]\+\)/.*#\1#p' <<<"$url" | head -n 1)"
if [[ "$MODE" == "machine" ]]; then
echo "$node $port"
# Get the real token from runtime json on the compute node (fixes VS Code auth stalls)
token="$(get_runtime_token "$node" "$port")"
# If runtime lookup failed, try to extract a non-redacted token from the log URL (rare)
if [[ -z "${token:-}" ]]; then
tok_from_log="$(sed -n 's/.*token=\([^&[:space:]]\+\).*/\1/p' <<<"$url" | head -n 1)"
if [[ -n "${tok_from_log:-}" && "${tok_from_log}" != "..." ]]; then
token="$tok_from_log"
fi
fi
base="${url%%\?*}"
if [[ -n "${token:-}" ]]; then
final_url="${base}?token=${token}"
else
echo "$url"
# No token available; still print base URL, but VS Code may not authenticate.
final_url="${base}"
fi
if [[ "$MODE" == "machine" ]]; then
echo "$node $port ${token:-}"
else
echo "$final_url"
fi
+26 -44
View File
@@ -1,71 +1,53 @@
#!/usr/bin/env bash
#
# ra_vscode_jupyter.sh
#
# Purpose:
# One-command workflow to attach VS Code (Remote-SSH) to the node running your
# Jupyter Slurm job on the RA cluster.
#
# Usage:
# ./ra_vscode_jupyter.sh [username]
#
# Notes:
# - username is optional. If omitted, uses DEFAULT_USER below (edit once).
# - Script updates a stable SSH alias "ra-jupyter" pointing at the current node.
# - Inside VS Code on that node, connect notebooks to:
# http://127.0.0.1:<PORT>/
#
# One command on your laptop:
# - Ask RA login node to ensure a Jupyter job is running and return NODE+PORT+TOKEN
# - Update a stable SSH alias "ra-jupyter" to point at that node via ProxyJump
# - Open VS Code directly on that node
# - Print the authenticated Jupyter URL to use inside VS Code (localhost on-node)
set -euo pipefail
# ---- Edit defaults here once (user can customize their local copy) ----
DEFAULT_USER="vonka_j"
LOGIN_HOST="ra-l-005.psi.ch"
LOGIN="ra-l-005.psi.ch" # adjust if needed
USER_NAME="vonka_j"
SSH_ALIAS="ra-jupyter"
REMOTE_NOTEBOOK_ROOT="/das/home" # used only to build a default folder to open
# ----------------------------------------------------------------------
USER_NAME="${1:-$DEFAULT_USER}"
SSH_CONFIG="$HOME/.ssh/config"
# Ask the login node to ensure Jupyter is running and return NODE+PORT
read -r NODE PORT < <(
ssh "${USER_NAME}@${LOGIN_HOST}" "bash -lc '~/ensure_jupyter.sh machine ${USER_NAME}'"
read -r NODE PORT TOKEN < <(
ssh "${USER_NAME}@${LOGIN}" 'bash -lc "~/ensure_jupyter.sh machine"'
)
if [[ -z "${NODE:-}" || -z "${PORT:-}" ]]; then
echo "Could not obtain NODE/PORT from cluster." >&2
exit 1
fi
mkdir -p "$HOME/.ssh"
touch "$SSH_CONFIG"
# Remove existing alias block if present
# Remove any existing block for this alias
perl -0777 -i -pe "s/\nHost ${SSH_ALIAS}\n(?:[^\n]*\n)*?(?=\nHost |\$)//g" "$SSH_CONFIG"
# Add updated alias pointing to current compute node
# Add updated alias pointing to the current compute node
cat >> "$SSH_CONFIG" <<EOF
Host ${SSH_ALIAS}
HostName ${NODE}
User ${USER_NAME}
ProxyJump ${USER_NAME}@${LOGIN_HOST}
ProxyJump ${USER_NAME}@${LOGIN}
ServerAliveInterval 30
ServerAliveCountMax 6
EOF
echo "VS Code Remote-SSH target updated: ${SSH_ALIAS} -> ${NODE}"
echo "Inside VS Code, connect Jupyter to:"
echo " http://127.0.0.1:${PORT}/"
echo "Inside that VS Code window, connect Jupyter to:"
if [[ -n "${TOKEN:-}" ]]; then
echo " http://127.0.0.1:${PORT}/?token=${TOKEN}"
else
echo " http://127.0.0.1:${PORT}/"
echo " (warning: no token found; auth may fail)"
fi
echo
# Locate VS Code CLI
CODE_BIN="$(command -v code || true)"
if [[ -z "${CODE_BIN:-}" && -x "/Applications/Visual Studio Code.app/Contents/Resources/app/bin/code" ]]; then
CODE_BIN="/Applications/Visual Studio Code.app/Contents/Resources/app/bin/code"
fi
if [[ -z "${CODE_BIN:-}" ]]; then
echo "VS Code CLI not found. Install it via:"
echo " VS Code → Cmd+Shift+P → Install 'code' command in PATH"
exit 1
fi
# Open the user's home on the cluster (best default for notebooks/repos)
"$CODE_BIN" --remote "ssh-remote+${SSH_ALIAS}" "${REMOTE_NOTEBOOK_ROOT}/${USER_NAME}"
# Open VS Code directly on that remote host (requires 'code' CLI installed)
code --disable-gpu --remote "ssh-remote+${SSH_ALIAS}" "/das/home/${USER_NAME}"