Adding cluster tools to this repo

2024-11-26 16:33:36 +01:00
parent 7649164f88
commit f9f062d219
4 changed files with 279 additions and 0 deletions
--- a/cluster_tools/jupyter_on_ra.sh
+++ b/cluster_tools/jupyter_on_ra.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Slurm submission script to start a Jupyterlab instance on the RA cluster for 
+# data analysis Cristallina @ SwissFEL. 
+
+# Requirements: user account on RA and access to /sf/cristallina
+
+# To execute from cristallina console use:
+# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh" 
+# or when using more computing power we start a batch job which takes the options below into account:
+# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh"
+
+# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1
+
+#SBATCH --job-name=analysis             # Job name
+#SBATCH --partition week                # or week, shared, hour, day-rhel8
+#SBATCH --nodes=1                       # Run all processes on a single node
+#SBATCH --ntasks=1                      # Run a single task
+#SBATCH --cpus-per-task=14              # Number of CPU cores per task
+#SBATCH --mem-per-cpu=16G
+#SBATCH --time=96:00:00                 # Time limit hrs:min:sec
+#SBATCH --output=jupyterlab_%j_%N.log   # Standard output and error log
+#SBATCH --exclude=ra-c-[085-100]        # older AMD Epyc nodes which are problematic with bitshuffle
+
+echo "Starting Jupyterlab..."
+
+echo "Date              = $(date)"
+echo "Hostname          = $(hostname -s)"
+echo "Working Directory = $(pwd)"
+echo ""
+echo "Number of Nodes Allocated      = $SLURM_JOB_NUM_NODES"
+echo "Number of Tasks Allocated      = $SLURM_NTASKS"
+echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
+
+# loading our minimal conda environment
+source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh
+
+# and activating the actual analysis environment
+# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge
+conda activate /sf/cristallina/applications/conda/envs/analysis_edge
+
+# password equivalent
+export JUPYTER_TOKEN=cristallina
+
+# single user: 
+# jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.disable_rtc=True
+
+# experimental: use collaboration environment:
+jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore --YDocExtension.document_save_delay=10 --MySQLiteYStore.document_ttl=1800
+
+echo "Jupyterlab finished."
+
+# cleanup
+echo "Cleaning up temporary database files"
+find "/tmp" -type f -name "ystore_[0-9]*.db" -exec rm {} \;
+
--- a/cluster_tools/jupyter_on_sf.sh
+++ b/cluster_tools/jupyter_on_sf.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Slurm submission script to start a Jupyterlab instance on the SF cluster for 
+# data analysis Cristallina @ SwissFEL. 
+
+# Requirements: user account on SF and access to /sf/cristallina
+
+# To execute from cristallina console use:
+# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh" 
+# or when using more computing power we start a batch job which takes the options below into account:
+# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh"
+
+# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1
+
+#SBATCH --job-name=analysis             # Job name
+#SBATCH --partition prod-aramis         # or week, shared, hour, day-rhel8
+#SBATCH --nodes=1                       # Run all processes on a single node
+#SBATCH --ntasks=1                      # Run a single task
+
+
+
+#SBATCH --output=jupyterlab_%j_%N.log   # Standard output and error log
+
+
+echo "Starting Jupyterlab..."
+
+echo "Date              = $(date)"
+echo "Hostname          = $(hostname -s)"
+echo "Working Directory = $(pwd)"
+echo ""
+echo "Number of Nodes Allocated      = $SLURM_JOB_NUM_NODES"
+echo "Number of Tasks Allocated      = $SLURM_NTASKS"
+echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
+
+# loading our minimal conda environment
+source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh
+
+# and activating the actual analysis environment
+# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge
+conda activate /sf/cristallina/applications/conda/envs/analysis_edge
+
+# password equivalent
+export JUPYTER_TOKEN=cristallina
+
+# single user: jupyter lab --no-browser --ip 0.0.0.0
+# experimental: use collaboration environment
+
+jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore
+
+echo "Jupyterlab finished."
+
+
--- a/cluster_tools/ra_cluster_cristallina.sh
+++ b/cluster_tools/ra_cluster_cristallina.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+
+function find_my_pgroups(){
+  echo "You seem to have access to these pgroups:"
+  for d in /sf/*/data/p*/raw; do
+    ls $d >/dev/null 2>/dev/null && echo $d
+  done | cut -d\/ -f5
+}
+
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage:   $0 <pgroup> <human comment>"
+  echo "Example: $0 p17123 commisioning_spectrometer"
+  echo
+  find_my_pgroups
+  exit 1
+fi
+
+# Change privileges of a newly created files such the whole group (e.g. users in pgroup for an experiment) can write to them 
+# Add a line to ~/.bashrc if it doesn't exist already and source the file again
+grep -qxF 'umask 0002' ~/.bashrc || echo 'umask 0002' >> ~/.bashrc 
+source ~/.bashrc
+
+
+pgroup=$1
+comment=$2
+shortcut_folder_path="./pgroup_shortcuts"
+#ppath=${pgroup::3}
+timestamp=$(date "+%Y%m%d-%H%M%S")
+
+source=/sf/cristallina/data/$pgroup
+target=$shortcut_folder_path/$pgroup-$comment
+
+# If folder pgroup_shortcuts does not exist, crate it
+mkdir -p $shortcut_folder_path
+
+if [ -d $target ]; then
+  echo "pgroup symlink (\"$target\") exists already."
+  echo "Won't create symlink."
+elif [ ! -d $source ]; then
+  echo "pgroup folder (\"$source\") does not exist."
+  echo "Won't create symlink."
+  find_my_pgroups
+else
+  echo "Creating symlink to pgroup $pgroup at $target..."
+  ln -s $source $target
+fi
+
+
+#echo
+#pgroup_conda=$source/anaconda
+#if [ -d $pgroup_conda ]; then
+#  echo "conda folder in pgroup $pgroup exists already."
+#else
+#  echo "No conda installation in pgroup $pgroup yet."
+#  echo "Conda install may take some time..."
+#  set -e
+
+#  source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh
+#  conda activate alvra-analysis
+#  conda env export --prefix /sf/alvra/anaconda/alvra-analysis --file packages-$timestamp.yaml
+#  time conda env create --prefix $pgroup_conda/alvra-analysis-$pgroup --file packages-$timestamp.yaml
+#  rm packages-$timestamp.yaml
+#  conda activate $pgroup_conda/alvra-analysis-$pgroup
+#  conda install --yes --channel paulscherrerinstitute alvra_tools
+#  conda env export --prefix $pgroup_conda/alvra-analysis-$pgroup --file $pgroup_conda/packages-$timestamp.yaml
+#  conda deactivate
+
+#  set +e
+#fi
+
+condarc=~/.condarc
+condarc_bak=$condarc-$timestamp
+
+if [ -f $condarc ]; then
+  cp $condarc $condarc_bak
+fi
+
+
+echo
+echo "Updating .condarc:"
+# later added means higher priority
+#source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh
+#conda config --add envs_dirs /sf/alvra/anaconda
+#conda config --add envs_dirs $pgroup_conda
+#conda config --add channels conda-forge
+#conda config --add channels paulscherrerinstitute
+miniconda=/sf/cristallina/applications/conda/envs/miniconda
+source ${miniconda}/etc/profile.d/conda.sh
+conda config --add envs_dirs /sf/cristallina/applications/conda/envs
+conda activate analysis_edge
+
+
+# delete backup if identical to updated original
+if [ -f $condarc_bak ]; then
+  cmp --silent $condarc $condarc_bak
+  if [ $? -eq 0 ]; then
+    echo
+    echo ".condarc unchanged."
+    rm $condarc_bak
+  else
+    echo
+    echo "Created backup: $condarc_bak"
+    echo
+    echo "Your old .condarc:"
+    echo "=================="
+    cat $condarc_bak
+  fi
+fi
+
+
+
--- a/cluster_tools/start_jupyter.sh
+++ b/cluster_tools/start_jupyter.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# cleanup old log files
+mv jupyterlab*.log ~/old_logs 2>/dev/null
+
+
+# shortname, used in slurm queue
+SHORT=$(echo $USER | cut -c1-7)
+
+# Jupyter (Lab) does not like to run with several instances 
+# as the same user on a NFS system as this causes broken databases.
+# So we abort here in this case.
+if [ "$(squeue | grep $SHORT | awk '{print $3}')" == "analysis" ]; then
+  echo "Jupyter instance already running. Aborting."
+  exit 1
+fi
+
+# submits the batch job to the RA cluster
+sbatch jupyter_on_ra.sh
+
+# prepare spinner
+i=1
+sp="/-\|"
+printf 'Waiting for Jupyterlab to start.  '
+
+
+# wait until the Jupyterlab instance shows up
+while [  "$(squeue | grep $SHORT | wc -w )" -lt 2 ]
+do 
+    sleep 0.25
+    printf "\b${sp:i++%${#sp}:1}"
+done
+
+
+printf '\nWaiting for Jupyterlab logfile.  '
+
+# and wait till there is a logfile (parsing ls, I know...)
+while [  "$(ls jupyterlab*.log 2>/dev/null | xargs cat | wc -w )" -lt 50 ]
+do
+    sleep 0.25
+    printf "\b${sp:i++%${#sp}:1}"
+done
+
+# wait a bit till the startup of the jupyterlab server is complete
+sleep 2
+printf '\nScanning for corresponding node in logfile.  '
+
+LOGFILE=$(find . -maxdepth 1 -name "jupyterlab*" -printf "%T@ %Tc %p\n" | sort -n | awk '{print $NF}') 
+
+
+until grep -q -E "Jupyter Server.*is running at" $LOGFILE
+do
+    sleep 0.25
+    printf "\b${sp:i++%${#sp}:1}"
+done
+
+# and output the corresponding entry
+OUTPUT=$(grep -o "http://r.*" $LOGFILE )
+printf  "\nJupyter instances running at: \n${OUTPUT/token=.../token=cristallina}\n"
+