Updating ensure_jupyter and ra_vscode_jupyter such that kernel startup is faster and more stable.
This commit is contained in:
+67
-18
@@ -14,21 +14,17 @@
|
||||
# - If not, it submits a new Jupyter job (via jupyter_on_ra.sh) and waits
|
||||
# until the job is RUNNING.
|
||||
# - Locates the Slurm stdout logfile for the job.
|
||||
# - Parses the logfile to extract the Jupyter URL.
|
||||
# - If the token is missing or redacted (token=...), replaces it with
|
||||
# a dummy token "cristallina".
|
||||
# - Parses the logfile to extract the Jupyter base URL and port.
|
||||
# - IMPORTANT for VS Code: reads the REAL Jupyter token from the runtime
|
||||
# jpserver-*.json on the compute node (logs may redact token as "...").
|
||||
#
|
||||
# Output:
|
||||
# - Default (human mode): prints the full Jupyter URL.
|
||||
# - Machine mode (`ensure_jupyter.sh machine`): prints "NODE PORT".
|
||||
# - Default (human mode): prints the full Jupyter URL with the real token.
|
||||
# - Machine mode (`ensure_jupyter.sh machine`): prints "NODE PORT TOKEN".
|
||||
#
|
||||
# Where it runs:
|
||||
# - On the RA *login node*.
|
||||
#
|
||||
# Typical usage:
|
||||
# ./ensure_jupyter.sh
|
||||
# ./ensure_jupyter.sh machine
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -62,15 +58,49 @@ extract_url() {
|
||||
| head -n 1 || true)"
|
||||
|
||||
[[ -z "${url:-}" ]] && return 1
|
||||
|
||||
# Replace missing or redacted token with dummy token
|
||||
if [[ "$url" != *"token="* || "$url" == *"token=..."* ]]; then
|
||||
url="${url%%\?*}?token=cristallina"
|
||||
fi
|
||||
|
||||
echo "$url"
|
||||
}
|
||||
|
||||
get_runtime_token() {
|
||||
local node="$1"
|
||||
local port="$2"
|
||||
|
||||
# Read the most relevant jpserver-*.json from the compute node runtime dir.
|
||||
# Prefer the json whose "port" matches; otherwise fall back to the newest.
|
||||
ssh -o BatchMode=yes -o ConnectTimeout=8 "${TARGET_USER}@${node}" "python - <<'PY'
|
||||
import glob, json, os, sys
|
||||
rt = os.path.expanduser('~/.local/share/jupyter/runtime')
|
||||
paths = sorted(glob.glob(os.path.join(rt, 'jpserver-*.json')), key=os.path.getmtime)
|
||||
if not paths:
|
||||
sys.exit(1)
|
||||
|
||||
want_port = int(sys.argv[1])
|
||||
chosen = None
|
||||
for p in reversed(paths):
|
||||
try:
|
||||
d = json.load(open(p))
|
||||
except Exception:
|
||||
continue
|
||||
if int(d.get('port', -1)) == want_port:
|
||||
chosen = d
|
||||
break
|
||||
|
||||
if chosen is None:
|
||||
# fall back to newest readable file
|
||||
for p in reversed(paths):
|
||||
try:
|
||||
chosen = json.load(open(p))
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not chosen or 'token' not in chosen:
|
||||
sys.exit(1)
|
||||
|
||||
print(chosen['token'])
|
||||
PY $port" 2>/dev/null || true
|
||||
}
|
||||
|
||||
info="$(job_info || true)"
|
||||
|
||||
if [[ -n "${info:-}" ]]; then
|
||||
@@ -102,8 +132,27 @@ done
|
||||
|
||||
port="$(sed -n 's#.*://[^:/]*:\([0-9]\+\)/.*#\1#p' <<<"$url" | head -n 1)"
|
||||
|
||||
if [[ "$MODE" == "machine" ]]; then
|
||||
echo "$node $port"
|
||||
# Get the real token from runtime json on the compute node (fixes VS Code auth stalls)
|
||||
token="$(get_runtime_token "$node" "$port")"
|
||||
|
||||
# If runtime lookup failed, try to extract a non-redacted token from the log URL (rare)
|
||||
if [[ -z "${token:-}" ]]; then
|
||||
tok_from_log="$(sed -n 's/.*token=\([^&[:space:]]\+\).*/\1/p' <<<"$url" | head -n 1)"
|
||||
if [[ -n "${tok_from_log:-}" && "${tok_from_log}" != "..." ]]; then
|
||||
token="$tok_from_log"
|
||||
fi
|
||||
fi
|
||||
|
||||
base="${url%%\?*}"
|
||||
if [[ -n "${token:-}" ]]; then
|
||||
final_url="${base}?token=${token}"
|
||||
else
|
||||
echo "$url"
|
||||
# No token available; still print base URL, but VS Code may not authenticate.
|
||||
final_url="${base}"
|
||||
fi
|
||||
|
||||
if [[ "$MODE" == "machine" ]]; then
|
||||
echo "$node $port ${token:-}"
|
||||
else
|
||||
echo "$final_url"
|
||||
fi
|
||||
+26
-44
@@ -1,71 +1,53 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# ra_vscode_jupyter.sh
|
||||
#
|
||||
# Purpose:
|
||||
# One-command workflow to attach VS Code (Remote-SSH) to the node running your
|
||||
# Jupyter Slurm job on the RA cluster.
|
||||
#
|
||||
# Usage:
|
||||
# ./ra_vscode_jupyter.sh [username]
|
||||
#
|
||||
# Notes:
|
||||
# - username is optional. If omitted, uses DEFAULT_USER below (edit once).
|
||||
# - Script updates a stable SSH alias "ra-jupyter" pointing at the current node.
|
||||
# - Inside VS Code on that node, connect notebooks to:
|
||||
# http://127.0.0.1:<PORT>/
|
||||
#
|
||||
# One command on your laptop:
|
||||
# - Ask RA login node to ensure a Jupyter job is running and return NODE+PORT+TOKEN
|
||||
# - Update a stable SSH alias "ra-jupyter" to point at that node via ProxyJump
|
||||
# - Open VS Code directly on that node
|
||||
# - Print the authenticated Jupyter URL to use inside VS Code (localhost on-node)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ---- Edit defaults here once (user can customize their local copy) ----
|
||||
DEFAULT_USER="vonka_j"
|
||||
LOGIN_HOST="ra-l-005.psi.ch"
|
||||
LOGIN="ra-l-005.psi.ch" # adjust if needed
|
||||
USER_NAME="vonka_j"
|
||||
SSH_ALIAS="ra-jupyter"
|
||||
REMOTE_NOTEBOOK_ROOT="/das/home" # used only to build a default folder to open
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
USER_NAME="${1:-$DEFAULT_USER}"
|
||||
SSH_CONFIG="$HOME/.ssh/config"
|
||||
|
||||
# Ask the login node to ensure Jupyter is running and return NODE+PORT
|
||||
read -r NODE PORT < <(
|
||||
ssh "${USER_NAME}@${LOGIN_HOST}" "bash -lc '~/ensure_jupyter.sh machine ${USER_NAME}'"
|
||||
read -r NODE PORT TOKEN < <(
|
||||
ssh "${USER_NAME}@${LOGIN}" 'bash -lc "~/ensure_jupyter.sh machine"'
|
||||
)
|
||||
|
||||
if [[ -z "${NODE:-}" || -z "${PORT:-}" ]]; then
|
||||
echo "Could not obtain NODE/PORT from cluster." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$HOME/.ssh"
|
||||
touch "$SSH_CONFIG"
|
||||
|
||||
# Remove existing alias block if present
|
||||
# Remove any existing block for this alias
|
||||
perl -0777 -i -pe "s/\nHost ${SSH_ALIAS}\n(?:[^\n]*\n)*?(?=\nHost |\$)//g" "$SSH_CONFIG"
|
||||
|
||||
# Add updated alias pointing to current compute node
|
||||
# Add updated alias pointing to the current compute node
|
||||
cat >> "$SSH_CONFIG" <<EOF
|
||||
|
||||
Host ${SSH_ALIAS}
|
||||
HostName ${NODE}
|
||||
User ${USER_NAME}
|
||||
ProxyJump ${USER_NAME}@${LOGIN_HOST}
|
||||
ProxyJump ${USER_NAME}@${LOGIN}
|
||||
ServerAliveInterval 30
|
||||
ServerAliveCountMax 6
|
||||
EOF
|
||||
|
||||
echo "VS Code Remote-SSH target updated: ${SSH_ALIAS} -> ${NODE}"
|
||||
echo "Inside VS Code, connect Jupyter to:"
|
||||
echo " http://127.0.0.1:${PORT}/"
|
||||
echo "Inside that VS Code window, connect Jupyter to:"
|
||||
if [[ -n "${TOKEN:-}" ]]; then
|
||||
echo " http://127.0.0.1:${PORT}/?token=${TOKEN}"
|
||||
else
|
||||
echo " http://127.0.0.1:${PORT}/"
|
||||
echo " (warning: no token found; auth may fail)"
|
||||
fi
|
||||
echo
|
||||
|
||||
# Locate VS Code CLI
|
||||
CODE_BIN="$(command -v code || true)"
|
||||
if [[ -z "${CODE_BIN:-}" && -x "/Applications/Visual Studio Code.app/Contents/Resources/app/bin/code" ]]; then
|
||||
CODE_BIN="/Applications/Visual Studio Code.app/Contents/Resources/app/bin/code"
|
||||
fi
|
||||
|
||||
if [[ -z "${CODE_BIN:-}" ]]; then
|
||||
echo "VS Code CLI not found. Install it via:"
|
||||
echo " VS Code → Cmd+Shift+P → Install 'code' command in PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Open the user's home on the cluster (best default for notebooks/repos)
|
||||
"$CODE_BIN" --remote "ssh-remote+${SSH_ALIAS}" "${REMOTE_NOTEBOOK_ROOT}/${USER_NAME}"
|
||||
# Open VS Code directly on that remote host (requires 'code' CLI installed)
|
||||
code --disable-gpu --remote "ssh-remote+${SSH_ALIAS}" "/das/home/${USER_NAME}"
|
||||
Reference in New Issue
Block a user