diff --git a/autodeploy/README.md b/autodeploy/README.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/autodeploy/README.md @@ -0,0 +1 @@ + diff --git a/autodeploy/TODO.md b/autodeploy/TODO.md new file mode 100644 index 0000000..0712c09 --- /dev/null +++ b/autodeploy/TODO.md @@ -0,0 +1,86 @@ +# TODO + +## Make envs available in jupytera + +There's [pixi-kernel](https://github.com/renan-r-santos/pixi-kernel) but it is specifically "per directory", which does not fit our use case. + +However, Pixi envs are compatible with [nb_conda_kernels](https://github.com/anaconda/nb_conda_kernels), which Leo installs on jupytera already. + +The only "problem" is in how jupyter shows the env names: +- `prefix/envs/envname` => `prefix-envname` + +... which means that all pixi envs would show as `.pixi-envname`, which is weird. This can be solved by adding a symlink: +- `/sf/alvra/envs` -> `/sf/alvra/config/python/.pixi/envs` + +... so we'd get `alvra-envname`. As a bonus, the users may now add very intuitive paths to their `.condarc`. + +Note: if the `envs` folder is called differently, no prefix is shown at all... This might be an alternative idea. + +## Shared features + +Pixi supports "features" (i.e., sets of packages that allow the user to do XYZ). This would be very useful for, e.g., a feature like "be able to plot on jupytera", which gives the correct versions of ipympl, matplotlib, etc. + +We would need this to be a central file that is somehow made available and/or merged into everyone's individual files. + +Pixi does not seem to have this option (yet? opened a [ticket](https://github.com/prefix-dev/pixi/issues/3524)). TOML does not allow to link in further files out of the box. However, there is [tomlincl](https://github.com/qdongxu/tomlincl). + +## Running the setup script + +Currently, there runs a deploy script as part of the deployment. We do NOT want to replace that script, but run after it. + +Can this be changed such that the deploy script runs our script at the end? Where would our script live? + +## Timeout + +Currently the timeout of the autodeployer is rather short as the expectation is that there are only "filesystem copy tasks" performed. + +Conda env creation can take a few minutes even with Pixi. + +Can the timeout be made that long without negative implications? Can this, for instance, block other tasks? + +## Trigger only on change of specific file + +In case of changes to the env, the lock file (`pixi.lock`) should be committed at the end of the setup script to have a history and reproducibility. + +Currently, this will trigger an additional autodeployer run which does nothing. This is a bit ugly. + +Can we instead trigger the autodeployer only if `pixi.yml` changes? + +## Mounts + +### Problem + +At creation time, conda envs hard code their location to several files. + +### From Simon: + +> The autodeployer runs in a podman container which maps: +> - `/root/target` (in container world) +> +> to: +> - `/net` (in autodeployer machine host world) +> +> Then, the autodeployer machine host has a diskmount that maps: +> - `/net` +> +> to: +> - `/gfa05` +> +> This means that references in the autodeployer config file such as the following: +> - `gfa05/export/sf/alvra/config/python` +> +> result in writes to the combined path: +> - `/net/gfa05/export/sf/alvra/config/python` + +### Possible solution + +Temporarily bind the autodeployer path to the console path: + +```bash +sudo mkdir -p /sf/alvra/config/python +sudo mount --bind /root/target/gfa05/export/sf/alvra/config/python /sf/alvra/config/python +# install conda envs into /sf/alvra/config/python +sudo umount /sf/alvra/config/python +sudo rmdir -p /sf/alvra/config/python +``` + diff --git a/autodeploy/download.sh b/autodeploy/download.sh new file mode 100755 index 0000000..4aa94bd --- /dev/null +++ b/autodeploy/download.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +set -euo pipefail + + +print_help() { +cat << EOF +Usage: $(basename "$0") [OPTIONS] URL + +Downloads ARCHIVE from URL and extracts BINARY from it, +but only if the remote file differs from the local file. + +Positional arguments: + URL The url to download ARCHIVE from + +Optional arguments: + -a, --archive ARCHIVE The name of the downloaded archive (default: inferred from URL) + -b, --binary BINARY The binary to extract from ARCHIVE (default: inferred from ARCHIVE) + -h, --help Show this help message and exit +EOF +} + +ARCHIVE="" +BINARY="" +POSITIONAL_ARGS=() + +while [[ $# -gt 0 ]]; do + case "$1" in + -a|--archive) + ARCHIVE="$2" + shift 2 + ;; + -b|--binary) + BINARY="$2" + shift 2 + ;; + -h|--help) + print_help + exit 0 + ;; + -*) + echo "Unknown option: $1" + print_help + exit 1 + ;; + *) + POSITIONAL_ARGS+=("$1") + shift + ;; + esac +done + +if [[ ${#POSITIONAL_ARGS[@]} -ne 1 ]]; then + echo "Error: Missing required positional argument: URL" + echo + print_help + exit 1 +fi + +URL="${POSITIONAL_ARGS[0]}" +[[ -z "$ARCHIVE" ]] && ARCHIVE=$(basename "$URL") +[[ -z "$BINARY" ]] && BINARY=$(echo "$ARCHIVE" | cut -d. -f1 | cut -d- -f1) + + +#echo "url: $URL" +#echo "archive: $ARCHIVE" +#echo "binary: $BINARY" + + +function ensure_binary() { + local url=$1 + local archive=$2 + local binary=$3 + + if [ -f "$binary" ]; then + echo "File $binary exists already" + return + fi + + update "$url" "$archive" + tar xzf "$archive" +} + +function update() { + local url=$1 + local archive=$2 + + if ! check_file_sizes "$url" "$archive"; then + echo "Downloading $url (if remote is newer) ..." + curl --location --remote-time --time-cond "$archive" --output "$archive" "$url" + else + echo "Skipped download $url" + fi +} + +function check_file_sizes() { + local url=$1 + local fn=$2 + local local_size remote_size + + if [ ! -f "$fn" ]; then + echo "File $fn not found" + return 1 + fi + + local_size=$(get_local_file_size "$fn") + remote_size=$(get_remote_file_size "$url") + + if [ "$remote_size" -ne "$local_size" ]; then + echo "Remote and local size of file $fn differs (remote: $remote_size, local: $local_size)" + return 1 + fi + + echo "Remote and local size of file $fn is identical" + return 0 +} + +function get_local_file_size() { + local fn=$1 + stat --format="%s" "$fn" +} + +function get_remote_file_size() { + local url=$1 + curl --location --head --silent "$url" | grep -i "content-length" | tail -n 1 | cut -d ' ' -f 2 | tr -d '\r' +} + + +ensure_binary "$URL" "$ARCHIVE" "$BINARY" + + + diff --git a/autodeploy/setup.sh b/autodeploy/setup.sh new file mode 100755 index 0000000..049bbd3 --- /dev/null +++ b/autodeploy/setup.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -euo pipefail + + +./download.sh "https://github.com/prefix-dev/pixi/releases/latest/download/pixi-x86_64-unknown-linux-musl.tar.gz" + +./pixi init || true +./pixi install --all + +git add .gitattributes .gitignore pixi.lock pixi.toml +git commit -m"automatic commit after install" +git push + +