Adding cluster tools to this repo

This commit is contained in:
2024-11-26 16:33:36 +01:00
parent 7649164f88
commit f9f062d219
4 changed files with 279 additions and 0 deletions

55
cluster_tools/jupyter_on_ra.sh Executable file
View File

@@ -0,0 +1,55 @@
#!/bin/bash
# Slurm submission script to start a Jupyterlab instance on the RA cluster for
# data analysis Cristallina @ SwissFEL.
# Requirements: user account on RA and access to /sf/cristallina
# To execute from cristallina console use:
# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh"
# or when using more computing power we start a batch job which takes the options below into account:
# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh"
# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1
#SBATCH --job-name=analysis # Job name
#SBATCH --partition week # or week, shared, hour, day-rhel8
#SBATCH --nodes=1 # Run all processes on a single node
#SBATCH --ntasks=1 # Run a single task
#SBATCH --cpus-per-task=14 # Number of CPU cores per task
#SBATCH --mem-per-cpu=16G
#SBATCH --time=96:00:00 # Time limit hrs:min:sec
#SBATCH --output=jupyterlab_%j_%N.log # Standard output and error log
#SBATCH --exclude=ra-c-[085-100] # older AMD Epyc nodes which are problematic with bitshuffle
echo "Starting Jupyterlab..."
echo "Date = $(date)"
echo "Hostname = $(hostname -s)"
echo "Working Directory = $(pwd)"
echo ""
echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES"
echo "Number of Tasks Allocated = $SLURM_NTASKS"
echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
# loading our minimal conda environment
source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh
# and activating the actual analysis environment
# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge
conda activate /sf/cristallina/applications/conda/envs/analysis_edge
# password equivalent
export JUPYTER_TOKEN=cristallina
# single user:
# jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.disable_rtc=True
# experimental: use collaboration environment:
jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore --YDocExtension.document_save_delay=10 --MySQLiteYStore.document_ttl=1800
echo "Jupyterlab finished."
# cleanup
echo "Cleaning up temporary database files"
find "/tmp" -type f -name "ystore_[0-9]*.db" -exec rm {} \;

51
cluster_tools/jupyter_on_sf.sh Executable file
View File

@@ -0,0 +1,51 @@
#!/bin/bash
# Slurm submission script to start a Jupyterlab instance on the SF cluster for
# data analysis Cristallina @ SwissFEL.
# Requirements: user account on SF and access to /sf/cristallina
# To execute from cristallina console use:
# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh"
# or when using more computing power we start a batch job which takes the options below into account:
# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh"
# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1
#SBATCH --job-name=analysis # Job name
#SBATCH --partition prod-aramis # or week, shared, hour, day-rhel8
#SBATCH --nodes=1 # Run all processes on a single node
#SBATCH --ntasks=1 # Run a single task
#SBATCH --output=jupyterlab_%j_%N.log # Standard output and error log
echo "Starting Jupyterlab..."
echo "Date = $(date)"
echo "Hostname = $(hostname -s)"
echo "Working Directory = $(pwd)"
echo ""
echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES"
echo "Number of Tasks Allocated = $SLURM_NTASKS"
echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
# loading our minimal conda environment
source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh
# and activating the actual analysis environment
# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge
conda activate /sf/cristallina/applications/conda/envs/analysis_edge
# password equivalent
export JUPYTER_TOKEN=cristallina
# single user: jupyter lab --no-browser --ip 0.0.0.0
# experimental: use collaboration environment
jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore
echo "Jupyterlab finished."

View File

@@ -0,0 +1,113 @@
#!/bin/bash
function find_my_pgroups(){
echo "You seem to have access to these pgroups:"
for d in /sf/*/data/p*/raw; do
ls $d >/dev/null 2>/dev/null && echo $d
done | cut -d\/ -f5
}
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <pgroup> <human comment>"
echo "Example: $0 p17123 commisioning_spectrometer"
echo
find_my_pgroups
exit 1
fi
# Change privileges of a newly created files such the whole group (e.g. users in pgroup for an experiment) can write to them
# Add a line to ~/.bashrc if it doesn't exist already and source the file again
grep -qxF 'umask 0002' ~/.bashrc || echo 'umask 0002' >> ~/.bashrc
source ~/.bashrc
pgroup=$1
comment=$2
shortcut_folder_path="./pgroup_shortcuts"
#ppath=${pgroup::3}
timestamp=$(date "+%Y%m%d-%H%M%S")
source=/sf/cristallina/data/$pgroup
target=$shortcut_folder_path/$pgroup-$comment
# If folder pgroup_shortcuts does not exist, crate it
mkdir -p $shortcut_folder_path
if [ -d $target ]; then
echo "pgroup symlink (\"$target\") exists already."
echo "Won't create symlink."
elif [ ! -d $source ]; then
echo "pgroup folder (\"$source\") does not exist."
echo "Won't create symlink."
find_my_pgroups
else
echo "Creating symlink to pgroup $pgroup at $target..."
ln -s $source $target
fi
#echo
#pgroup_conda=$source/anaconda
#if [ -d $pgroup_conda ]; then
# echo "conda folder in pgroup $pgroup exists already."
#else
# echo "No conda installation in pgroup $pgroup yet."
# echo "Conda install may take some time..."
# set -e
# source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh
# conda activate alvra-analysis
# conda env export --prefix /sf/alvra/anaconda/alvra-analysis --file packages-$timestamp.yaml
# time conda env create --prefix $pgroup_conda/alvra-analysis-$pgroup --file packages-$timestamp.yaml
# rm packages-$timestamp.yaml
# conda activate $pgroup_conda/alvra-analysis-$pgroup
# conda install --yes --channel paulscherrerinstitute alvra_tools
# conda env export --prefix $pgroup_conda/alvra-analysis-$pgroup --file $pgroup_conda/packages-$timestamp.yaml
# conda deactivate
# set +e
#fi
condarc=~/.condarc
condarc_bak=$condarc-$timestamp
if [ -f $condarc ]; then
cp $condarc $condarc_bak
fi
echo
echo "Updating .condarc:"
# later added means higher priority
#source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh
#conda config --add envs_dirs /sf/alvra/anaconda
#conda config --add envs_dirs $pgroup_conda
#conda config --add channels conda-forge
#conda config --add channels paulscherrerinstitute
miniconda=/sf/cristallina/applications/conda/envs/miniconda
source ${miniconda}/etc/profile.d/conda.sh
conda config --add envs_dirs /sf/cristallina/applications/conda/envs
conda activate analysis_edge
# delete backup if identical to updated original
if [ -f $condarc_bak ]; then
cmp --silent $condarc $condarc_bak
if [ $? -eq 0 ]; then
echo
echo ".condarc unchanged."
rm $condarc_bak
else
echo
echo "Created backup: $condarc_bak"
echo
echo "Your old .condarc:"
echo "=================="
cat $condarc_bak
fi
fi

60
cluster_tools/start_jupyter.sh Executable file
View File

@@ -0,0 +1,60 @@
#!/bin/bash
# cleanup old log files
mv jupyterlab*.log ~/old_logs 2>/dev/null
# shortname, used in slurm queue
SHORT=$(echo $USER | cut -c1-7)
# Jupyter (Lab) does not like to run with several instances
# as the same user on a NFS system as this causes broken databases.
# So we abort here in this case.
if [ "$(squeue | grep $SHORT | awk '{print $3}')" == "analysis" ]; then
echo "Jupyter instance already running. Aborting."
exit 1
fi
# submits the batch job to the RA cluster
sbatch jupyter_on_ra.sh
# prepare spinner
i=1
sp="/-\|"
printf 'Waiting for Jupyterlab to start. '
# wait until the Jupyterlab instance shows up
while [ "$(squeue | grep $SHORT | wc -w )" -lt 2 ]
do
sleep 0.25
printf "\b${sp:i++%${#sp}:1}"
done
printf '\nWaiting for Jupyterlab logfile. '
# and wait till there is a logfile (parsing ls, I know...)
while [ "$(ls jupyterlab*.log 2>/dev/null | xargs cat | wc -w )" -lt 50 ]
do
sleep 0.25
printf "\b${sp:i++%${#sp}:1}"
done
# wait a bit till the startup of the jupyterlab server is complete
sleep 2
printf '\nScanning for corresponding node in logfile. '
LOGFILE=$(find . -maxdepth 1 -name "jupyterlab*" -printf "%T@ %Tc %p\n" | sort -n | awk '{print $NF}')
until grep -q -E "Jupyter Server.*is running at" $LOGFILE
do
sleep 0.25
printf "\b${sp:i++%${#sp}:1}"
done
# and output the corresponding entry
OUTPUT=$(grep -o "http://r.*" $LOGFILE )
printf "\nJupyter instances running at: \n${OUTPUT/token=.../token=cristallina}\n"