From f9f062d2197658861e4e2dc5cba92a9dc75fd749 Mon Sep 17 00:00:00 2001 From: Vonka Jakub Date: Tue, 26 Nov 2024 16:33:36 +0100 Subject: [PATCH] Adding cluster tools to this repo --- cluster_tools/jupyter_on_ra.sh | 55 ++++++++++++ cluster_tools/jupyter_on_sf.sh | 51 +++++++++++ cluster_tools/ra_cluster_cristallina.sh | 113 ++++++++++++++++++++++++ cluster_tools/start_jupyter.sh | 60 +++++++++++++ 4 files changed, 279 insertions(+) create mode 100755 cluster_tools/jupyter_on_ra.sh create mode 100755 cluster_tools/jupyter_on_sf.sh create mode 100644 cluster_tools/ra_cluster_cristallina.sh create mode 100755 cluster_tools/start_jupyter.sh diff --git a/cluster_tools/jupyter_on_ra.sh b/cluster_tools/jupyter_on_ra.sh new file mode 100755 index 0000000..78114b9 --- /dev/null +++ b/cluster_tools/jupyter_on_ra.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Slurm submission script to start a Jupyterlab instance on the RA cluster for +# data analysis Cristallina @ SwissFEL. + +# Requirements: user account on RA and access to /sf/cristallina + +# To execute from cristallina console use: +# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh" +# or when using more computing power we start a batch job which takes the options below into account: +# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh" + +# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1 + +#SBATCH --job-name=analysis # Job name +#SBATCH --partition week # or week, shared, hour, day-rhel8 +#SBATCH --nodes=1 # Run all processes on a single node +#SBATCH --ntasks=1 # Run a single task +#SBATCH --cpus-per-task=14 # Number of CPU cores per task +#SBATCH --mem-per-cpu=16G +#SBATCH --time=96:00:00 # Time limit hrs:min:sec +#SBATCH --output=jupyterlab_%j_%N.log # Standard output and error log +#SBATCH --exclude=ra-c-[085-100] # older AMD Epyc nodes which are problematic with bitshuffle + +echo "Starting Jupyterlab..." + +echo "Date = $(date)" +echo "Hostname = $(hostname -s)" +echo "Working Directory = $(pwd)" +echo "" +echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES" +echo "Number of Tasks Allocated = $SLURM_NTASKS" +echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK" + +# loading our minimal conda environment +source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh + +# and activating the actual analysis environment +# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge +conda activate /sf/cristallina/applications/conda/envs/analysis_edge + +# password equivalent +export JUPYTER_TOKEN=cristallina + +# single user: +# jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.disable_rtc=True + +# experimental: use collaboration environment: +jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore --YDocExtension.document_save_delay=10 --MySQLiteYStore.document_ttl=1800 + +echo "Jupyterlab finished." + +# cleanup +echo "Cleaning up temporary database files" +find "/tmp" -type f -name "ystore_[0-9]*.db" -exec rm {} \; + diff --git a/cluster_tools/jupyter_on_sf.sh b/cluster_tools/jupyter_on_sf.sh new file mode 100755 index 0000000..b6bbe74 --- /dev/null +++ b/cluster_tools/jupyter_on_sf.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Slurm submission script to start a Jupyterlab instance on the SF cluster for +# data analysis Cristallina @ SwissFEL. + +# Requirements: user account on SF and access to /sf/cristallina + +# To execute from cristallina console use: +# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh" +# or when using more computing power we start a batch job which takes the options below into account: +# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh" + +# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1 + +#SBATCH --job-name=analysis # Job name +#SBATCH --partition prod-aramis # or week, shared, hour, day-rhel8 +#SBATCH --nodes=1 # Run all processes on a single node +#SBATCH --ntasks=1 # Run a single task + + + +#SBATCH --output=jupyterlab_%j_%N.log # Standard output and error log + + +echo "Starting Jupyterlab..." + +echo "Date = $(date)" +echo "Hostname = $(hostname -s)" +echo "Working Directory = $(pwd)" +echo "" +echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES" +echo "Number of Tasks Allocated = $SLURM_NTASKS" +echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK" + +# loading our minimal conda environment +source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh + +# and activating the actual analysis environment +# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge +conda activate /sf/cristallina/applications/conda/envs/analysis_edge + +# password equivalent +export JUPYTER_TOKEN=cristallina + +# single user: jupyter lab --no-browser --ip 0.0.0.0 +# experimental: use collaboration environment + +jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore + +echo "Jupyterlab finished." + + diff --git a/cluster_tools/ra_cluster_cristallina.sh b/cluster_tools/ra_cluster_cristallina.sh new file mode 100644 index 0000000..8a60dbc --- /dev/null +++ b/cluster_tools/ra_cluster_cristallina.sh @@ -0,0 +1,113 @@ +#!/bin/bash + + +function find_my_pgroups(){ + echo "You seem to have access to these pgroups:" + for d in /sf/*/data/p*/raw; do + ls $d >/dev/null 2>/dev/null && echo $d + done | cut -d\/ -f5 +} + + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + echo "Example: $0 p17123 commisioning_spectrometer" + echo + find_my_pgroups + exit 1 +fi + +# Change privileges of a newly created files such the whole group (e.g. users in pgroup for an experiment) can write to them +# Add a line to ~/.bashrc if it doesn't exist already and source the file again +grep -qxF 'umask 0002' ~/.bashrc || echo 'umask 0002' >> ~/.bashrc +source ~/.bashrc + + +pgroup=$1 +comment=$2 +shortcut_folder_path="./pgroup_shortcuts" +#ppath=${pgroup::3} +timestamp=$(date "+%Y%m%d-%H%M%S") + +source=/sf/cristallina/data/$pgroup +target=$shortcut_folder_path/$pgroup-$comment + +# If folder pgroup_shortcuts does not exist, crate it +mkdir -p $shortcut_folder_path + +if [ -d $target ]; then + echo "pgroup symlink (\"$target\") exists already." + echo "Won't create symlink." +elif [ ! -d $source ]; then + echo "pgroup folder (\"$source\") does not exist." + echo "Won't create symlink." + find_my_pgroups +else + echo "Creating symlink to pgroup $pgroup at $target..." + ln -s $source $target +fi + + +#echo +#pgroup_conda=$source/anaconda +#if [ -d $pgroup_conda ]; then +# echo "conda folder in pgroup $pgroup exists already." +#else +# echo "No conda installation in pgroup $pgroup yet." +# echo "Conda install may take some time..." +# set -e + +# source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh +# conda activate alvra-analysis +# conda env export --prefix /sf/alvra/anaconda/alvra-analysis --file packages-$timestamp.yaml +# time conda env create --prefix $pgroup_conda/alvra-analysis-$pgroup --file packages-$timestamp.yaml +# rm packages-$timestamp.yaml +# conda activate $pgroup_conda/alvra-analysis-$pgroup +# conda install --yes --channel paulscherrerinstitute alvra_tools +# conda env export --prefix $pgroup_conda/alvra-analysis-$pgroup --file $pgroup_conda/packages-$timestamp.yaml +# conda deactivate + +# set +e +#fi + +condarc=~/.condarc +condarc_bak=$condarc-$timestamp + +if [ -f $condarc ]; then + cp $condarc $condarc_bak +fi + + +echo +echo "Updating .condarc:" +# later added means higher priority +#source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh +#conda config --add envs_dirs /sf/alvra/anaconda +#conda config --add envs_dirs $pgroup_conda +#conda config --add channels conda-forge +#conda config --add channels paulscherrerinstitute +miniconda=/sf/cristallina/applications/conda/envs/miniconda +source ${miniconda}/etc/profile.d/conda.sh +conda config --add envs_dirs /sf/cristallina/applications/conda/envs +conda activate analysis_edge + + +# delete backup if identical to updated original +if [ -f $condarc_bak ]; then + cmp --silent $condarc $condarc_bak + if [ $? -eq 0 ]; then + echo + echo ".condarc unchanged." + rm $condarc_bak + else + echo + echo "Created backup: $condarc_bak" + echo + echo "Your old .condarc:" + echo "==================" + cat $condarc_bak + fi +fi + + + diff --git a/cluster_tools/start_jupyter.sh b/cluster_tools/start_jupyter.sh new file mode 100755 index 0000000..52e8a63 --- /dev/null +++ b/cluster_tools/start_jupyter.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# cleanup old log files +mv jupyterlab*.log ~/old_logs 2>/dev/null + + +# shortname, used in slurm queue +SHORT=$(echo $USER | cut -c1-7) + +# Jupyter (Lab) does not like to run with several instances +# as the same user on a NFS system as this causes broken databases. +# So we abort here in this case. +if [ "$(squeue | grep $SHORT | awk '{print $3}')" == "analysis" ]; then + echo "Jupyter instance already running. Aborting." + exit 1 +fi + +# submits the batch job to the RA cluster +sbatch jupyter_on_ra.sh + +# prepare spinner +i=1 +sp="/-\|" +printf 'Waiting for Jupyterlab to start. ' + + +# wait until the Jupyterlab instance shows up +while [ "$(squeue | grep $SHORT | wc -w )" -lt 2 ] +do + sleep 0.25 + printf "\b${sp:i++%${#sp}:1}" +done + + +printf '\nWaiting for Jupyterlab logfile. ' + +# and wait till there is a logfile (parsing ls, I know...) +while [ "$(ls jupyterlab*.log 2>/dev/null | xargs cat | wc -w )" -lt 50 ] +do + sleep 0.25 + printf "\b${sp:i++%${#sp}:1}" +done + +# wait a bit till the startup of the jupyterlab server is complete +sleep 2 +printf '\nScanning for corresponding node in logfile. ' + +LOGFILE=$(find . -maxdepth 1 -name "jupyterlab*" -printf "%T@ %Tc %p\n" | sort -n | awk '{print $NF}') + + +until grep -q -E "Jupyter Server.*is running at" $LOGFILE +do + sleep 0.25 + printf "\b${sp:i++%${#sp}:1}" +done + +# and output the corresponding entry +OUTPUT=$(grep -o "http://r.*" $LOGFILE ) +printf "\nJupyter instances running at: \n${OUTPUT/token=.../token=cristallina}\n" +