Adding cluster tools to this repo
This commit is contained in:
55
cluster_tools/jupyter_on_ra.sh
Executable file
55
cluster_tools/jupyter_on_ra.sh
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/bin/bash
|
||||
# Slurm submission script to start a Jupyterlab instance on the RA cluster for
|
||||
# data analysis Cristallina @ SwissFEL.
|
||||
|
||||
# Requirements: user account on RA and access to /sf/cristallina
|
||||
|
||||
# To execute from cristallina console use:
|
||||
# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh"
|
||||
# or when using more computing power we start a batch job which takes the options below into account:
|
||||
# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh"
|
||||
|
||||
# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1
|
||||
|
||||
#SBATCH --job-name=analysis # Job name
|
||||
#SBATCH --partition week # or week, shared, hour, day-rhel8
|
||||
#SBATCH --nodes=1 # Run all processes on a single node
|
||||
#SBATCH --ntasks=1 # Run a single task
|
||||
#SBATCH --cpus-per-task=14 # Number of CPU cores per task
|
||||
#SBATCH --mem-per-cpu=16G
|
||||
#SBATCH --time=96:00:00 # Time limit hrs:min:sec
|
||||
#SBATCH --output=jupyterlab_%j_%N.log # Standard output and error log
|
||||
#SBATCH --exclude=ra-c-[085-100] # older AMD Epyc nodes which are problematic with bitshuffle
|
||||
|
||||
echo "Starting Jupyterlab..."
|
||||
|
||||
echo "Date = $(date)"
|
||||
echo "Hostname = $(hostname -s)"
|
||||
echo "Working Directory = $(pwd)"
|
||||
echo ""
|
||||
echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES"
|
||||
echo "Number of Tasks Allocated = $SLURM_NTASKS"
|
||||
echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
|
||||
|
||||
# loading our minimal conda environment
|
||||
source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh
|
||||
|
||||
# and activating the actual analysis environment
|
||||
# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge
|
||||
conda activate /sf/cristallina/applications/conda/envs/analysis_edge
|
||||
|
||||
# password equivalent
|
||||
export JUPYTER_TOKEN=cristallina
|
||||
|
||||
# single user:
|
||||
# jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.disable_rtc=True
|
||||
|
||||
# experimental: use collaboration environment:
|
||||
jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore --YDocExtension.document_save_delay=10 --MySQLiteYStore.document_ttl=1800
|
||||
|
||||
echo "Jupyterlab finished."
|
||||
|
||||
# cleanup
|
||||
echo "Cleaning up temporary database files"
|
||||
find "/tmp" -type f -name "ystore_[0-9]*.db" -exec rm {} \;
|
||||
|
||||
51
cluster_tools/jupyter_on_sf.sh
Executable file
51
cluster_tools/jupyter_on_sf.sh
Executable file
@@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
# Slurm submission script to start a Jupyterlab instance on the SF cluster for
|
||||
# data analysis Cristallina @ SwissFEL.
|
||||
|
||||
# Requirements: user account on SF and access to /sf/cristallina
|
||||
|
||||
# To execute from cristallina console use:
|
||||
# ssh your_username@ra.psi.ch "srun /sf/cristallina/applications/conda/jupyter_on_ra.sh"
|
||||
# or when using more computing power we start a batch job which takes the options below into account:
|
||||
# ssh your_username@ra.psi.ch "sbatch /sf/cristallina/applications/conda/jupyter_on_ra.sh"
|
||||
|
||||
# alternatively we can also run on the SwissFEL computing nodes: sf-cn-1
|
||||
|
||||
#SBATCH --job-name=analysis # Job name
|
||||
#SBATCH --partition prod-aramis # or week, shared, hour, day-rhel8
|
||||
#SBATCH --nodes=1 # Run all processes on a single node
|
||||
#SBATCH --ntasks=1 # Run a single task
|
||||
|
||||
|
||||
|
||||
#SBATCH --output=jupyterlab_%j_%N.log # Standard output and error log
|
||||
|
||||
|
||||
echo "Starting Jupyterlab..."
|
||||
|
||||
echo "Date = $(date)"
|
||||
echo "Hostname = $(hostname -s)"
|
||||
echo "Working Directory = $(pwd)"
|
||||
echo ""
|
||||
echo "Number of Nodes Allocated = $SLURM_JOB_NUM_NODES"
|
||||
echo "Number of Tasks Allocated = $SLURM_NTASKS"
|
||||
echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
|
||||
|
||||
# loading our minimal conda environment
|
||||
source /sf/cristallina/applications/conda/envs/miniconda/etc/profile.d/conda.sh
|
||||
|
||||
# and activating the actual analysis environment
|
||||
# a bit more conservative: conda activate /sf/cristallina/applications/conda/envs/analysis_forge
|
||||
conda activate /sf/cristallina/applications/conda/envs/analysis_edge
|
||||
|
||||
# password equivalent
|
||||
export JUPYTER_TOKEN=cristallina
|
||||
|
||||
# single user: jupyter lab --no-browser --ip 0.0.0.0
|
||||
# experimental: use collaboration environment
|
||||
|
||||
jupyter lab --no-browser --ip 0.0.0.0 --YDocExtension.ystore_class=cristallina.jupyter_helper.MySQLiteYStore
|
||||
|
||||
echo "Jupyterlab finished."
|
||||
|
||||
|
||||
113
cluster_tools/ra_cluster_cristallina.sh
Normal file
113
cluster_tools/ra_cluster_cristallina.sh
Normal file
@@ -0,0 +1,113 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
function find_my_pgroups(){
|
||||
echo "You seem to have access to these pgroups:"
|
||||
for d in /sf/*/data/p*/raw; do
|
||||
ls $d >/dev/null 2>/dev/null && echo $d
|
||||
done | cut -d\/ -f5
|
||||
}
|
||||
|
||||
|
||||
if [ "$#" -ne 2 ]; then
|
||||
echo "Usage: $0 <pgroup> <human comment>"
|
||||
echo "Example: $0 p17123 commisioning_spectrometer"
|
||||
echo
|
||||
find_my_pgroups
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Change privileges of a newly created files such the whole group (e.g. users in pgroup for an experiment) can write to them
|
||||
# Add a line to ~/.bashrc if it doesn't exist already and source the file again
|
||||
grep -qxF 'umask 0002' ~/.bashrc || echo 'umask 0002' >> ~/.bashrc
|
||||
source ~/.bashrc
|
||||
|
||||
|
||||
pgroup=$1
|
||||
comment=$2
|
||||
shortcut_folder_path="./pgroup_shortcuts"
|
||||
#ppath=${pgroup::3}
|
||||
timestamp=$(date "+%Y%m%d-%H%M%S")
|
||||
|
||||
source=/sf/cristallina/data/$pgroup
|
||||
target=$shortcut_folder_path/$pgroup-$comment
|
||||
|
||||
# If folder pgroup_shortcuts does not exist, crate it
|
||||
mkdir -p $shortcut_folder_path
|
||||
|
||||
if [ -d $target ]; then
|
||||
echo "pgroup symlink (\"$target\") exists already."
|
||||
echo "Won't create symlink."
|
||||
elif [ ! -d $source ]; then
|
||||
echo "pgroup folder (\"$source\") does not exist."
|
||||
echo "Won't create symlink."
|
||||
find_my_pgroups
|
||||
else
|
||||
echo "Creating symlink to pgroup $pgroup at $target..."
|
||||
ln -s $source $target
|
||||
fi
|
||||
|
||||
|
||||
#echo
|
||||
#pgroup_conda=$source/anaconda
|
||||
#if [ -d $pgroup_conda ]; then
|
||||
# echo "conda folder in pgroup $pgroup exists already."
|
||||
#else
|
||||
# echo "No conda installation in pgroup $pgroup yet."
|
||||
# echo "Conda install may take some time..."
|
||||
# set -e
|
||||
|
||||
# source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh
|
||||
# conda activate alvra-analysis
|
||||
# conda env export --prefix /sf/alvra/anaconda/alvra-analysis --file packages-$timestamp.yaml
|
||||
# time conda env create --prefix $pgroup_conda/alvra-analysis-$pgroup --file packages-$timestamp.yaml
|
||||
# rm packages-$timestamp.yaml
|
||||
# conda activate $pgroup_conda/alvra-analysis-$pgroup
|
||||
# conda install --yes --channel paulscherrerinstitute alvra_tools
|
||||
# conda env export --prefix $pgroup_conda/alvra-analysis-$pgroup --file $pgroup_conda/packages-$timestamp.yaml
|
||||
# conda deactivate
|
||||
|
||||
# set +e
|
||||
#fi
|
||||
|
||||
condarc=~/.condarc
|
||||
condarc_bak=$condarc-$timestamp
|
||||
|
||||
if [ -f $condarc ]; then
|
||||
cp $condarc $condarc_bak
|
||||
fi
|
||||
|
||||
|
||||
echo
|
||||
echo "Updating .condarc:"
|
||||
# later added means higher priority
|
||||
#source /opt/psi/Programming/anaconda/2019.07/conda/etc/profile.d/conda.sh
|
||||
#conda config --add envs_dirs /sf/alvra/anaconda
|
||||
#conda config --add envs_dirs $pgroup_conda
|
||||
#conda config --add channels conda-forge
|
||||
#conda config --add channels paulscherrerinstitute
|
||||
miniconda=/sf/cristallina/applications/conda/envs/miniconda
|
||||
source ${miniconda}/etc/profile.d/conda.sh
|
||||
conda config --add envs_dirs /sf/cristallina/applications/conda/envs
|
||||
conda activate analysis_edge
|
||||
|
||||
|
||||
# delete backup if identical to updated original
|
||||
if [ -f $condarc_bak ]; then
|
||||
cmp --silent $condarc $condarc_bak
|
||||
if [ $? -eq 0 ]; then
|
||||
echo
|
||||
echo ".condarc unchanged."
|
||||
rm $condarc_bak
|
||||
else
|
||||
echo
|
||||
echo "Created backup: $condarc_bak"
|
||||
echo
|
||||
echo "Your old .condarc:"
|
||||
echo "=================="
|
||||
cat $condarc_bak
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
60
cluster_tools/start_jupyter.sh
Executable file
60
cluster_tools/start_jupyter.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
|
||||
# cleanup old log files
|
||||
mv jupyterlab*.log ~/old_logs 2>/dev/null
|
||||
|
||||
|
||||
# shortname, used in slurm queue
|
||||
SHORT=$(echo $USER | cut -c1-7)
|
||||
|
||||
# Jupyter (Lab) does not like to run with several instances
|
||||
# as the same user on a NFS system as this causes broken databases.
|
||||
# So we abort here in this case.
|
||||
if [ "$(squeue | grep $SHORT | awk '{print $3}')" == "analysis" ]; then
|
||||
echo "Jupyter instance already running. Aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# submits the batch job to the RA cluster
|
||||
sbatch jupyter_on_ra.sh
|
||||
|
||||
# prepare spinner
|
||||
i=1
|
||||
sp="/-\|"
|
||||
printf 'Waiting for Jupyterlab to start. '
|
||||
|
||||
|
||||
# wait until the Jupyterlab instance shows up
|
||||
while [ "$(squeue | grep $SHORT | wc -w )" -lt 2 ]
|
||||
do
|
||||
sleep 0.25
|
||||
printf "\b${sp:i++%${#sp}:1}"
|
||||
done
|
||||
|
||||
|
||||
printf '\nWaiting for Jupyterlab logfile. '
|
||||
|
||||
# and wait till there is a logfile (parsing ls, I know...)
|
||||
while [ "$(ls jupyterlab*.log 2>/dev/null | xargs cat | wc -w )" -lt 50 ]
|
||||
do
|
||||
sleep 0.25
|
||||
printf "\b${sp:i++%${#sp}:1}"
|
||||
done
|
||||
|
||||
# wait a bit till the startup of the jupyterlab server is complete
|
||||
sleep 2
|
||||
printf '\nScanning for corresponding node in logfile. '
|
||||
|
||||
LOGFILE=$(find . -maxdepth 1 -name "jupyterlab*" -printf "%T@ %Tc %p\n" | sort -n | awk '{print $NF}')
|
||||
|
||||
|
||||
until grep -q -E "Jupyter Server.*is running at" $LOGFILE
|
||||
do
|
||||
sleep 0.25
|
||||
printf "\b${sp:i++%${#sp}:1}"
|
||||
done
|
||||
|
||||
# and output the corresponding entry
|
||||
OUTPUT=$(grep -o "http://r.*" $LOGFILE )
|
||||
printf "\nJupyter instances running at: \n${OUTPUT/token=.../token=cristallina}\n"
|
||||
|
||||
Reference in New Issue
Block a user