From ff62c5924b10b83ce8ce2e4ae7e91127d5f32e94 Mon Sep 17 00:00:00 2001 From: Derek Feichtinger Date: Fri, 26 Jul 2019 15:13:36 +0200 Subject: [PATCH] new anaconda build and modulefile + conda repo structure --- .../datascience_36/datascience_36.yml | 20 ++++++ .../2019.07/conda-env-defs/talos_36/README.md | 31 +++++++++ .../conda-env-defs/talos_36/talos_36.yml | 19 +++++ Programming/anaconda/2019.07/config.sh | 6 ++ Programming/anaconda/README.md | 69 +++++++++++++++++++ Programming/anaconda/build | 31 +++++++-- .../files/admintools/export-all-envs.sh | 33 +++++++++ Programming/anaconda/files/variants | 1 + Programming/anaconda/modulefile | 39 ++++++++--- 9 files changed, 235 insertions(+), 14 deletions(-) create mode 100644 Programming/anaconda/2019.07/conda-env-defs/datascience_36/datascience_36.yml create mode 100644 Programming/anaconda/2019.07/conda-env-defs/talos_36/README.md create mode 100644 Programming/anaconda/2019.07/conda-env-defs/talos_36/talos_36.yml create mode 100644 Programming/anaconda/2019.07/config.sh create mode 100644 Programming/anaconda/README.md create mode 100755 Programming/anaconda/files/admintools/export-all-envs.sh diff --git a/Programming/anaconda/2019.07/conda-env-defs/datascience_36/datascience_36.yml b/Programming/anaconda/2019.07/conda-env-defs/datascience_36/datascience_36.yml new file mode 100644 index 0000000..fcb0fa7 --- /dev/null +++ b/Programming/anaconda/2019.07/conda-env-defs/datascience_36/datascience_36.yml @@ -0,0 +1,20 @@ +# Clean environment based on pure conda-forge packages +name: datascience_py36 +channels: + - conda-forge +dependencies: + - python=3.6 + - pandas + - numpy + - scipy + - scikit-learn + - matplotlib + - seaborn + - tensorflow=1.13.1 + - pytables + - ipython + - keras=2.1.6 + - dask + - deap + - nb_conda_kernels + - ipywidgets diff --git a/Programming/anaconda/2019.07/conda-env-defs/talos_36/README.md b/Programming/anaconda/2019.07/conda-env-defs/talos_36/README.md new file mode 100644 index 0000000..a17d735 --- /dev/null +++ b/Programming/anaconda/2019.07/conda-env-defs/talos_36/README.md @@ -0,0 +1,31 @@ +This environment adds some dependencies to talos_36.yml via pip install. + +####### +$ conda env create -f talos_py36.yml + +$ pip install talos +###### +This brings in talos-0.4.9 at the time of this writing. + +The talos install will produce some warnings, but will still install. + +################# Part of output +... + Stored in directory: /afs/psi.ch/user/f/feichtinger-adm/.cache/pip/wheels/a3/85/27/2179469128bd1f663d3563a8e2166223b2c0fc409c91f83b48 +Successfully built talos astetik sklearn chances kerasplotlib wrangle +ERROR: wrangle 0.6.5 has requirement scipy==1.2, but you'll have scipy 1.3.0 which is incompatible. +Installing collected packages: sklearn, wrangle, geonamescache, astetik, tqdm, matplotlib, chardet, urllib3, idna, requests, chances, kerasplotlib, talos + Found existing installation: matplotlib 3.1.1 + Uninstalling matplotlib-3.1.1: + Successfully uninstalled matplotlib-3.1.1 +Successfully installed astetik-1.9.8 chances-0.1.6 chardet-3.0.4 geonamescache-1.0.2 idna-2.8 kerasplotlib-0.1.4 matplotlib-2.2.3 requests-2.22.0 sklearn-0.0 talos-0.4.9 tqdm-4.32.2 urllib3-1.25.3 wrangle-0.6.5 +################### + +The tests by users (A. Adelmann) succeeded. I did not replace +scipy. Talos will replace a number of dependencies, matplotlib among +them. After the pip installs the environment should be considered +tainted. Further changes of the environment by conda may reflect this +inconsistent state, so it might be better to regenerate the whole +environment using first conda, then pip. + +2019-07-16 Derek diff --git a/Programming/anaconda/2019.07/conda-env-defs/talos_36/talos_36.yml b/Programming/anaconda/2019.07/conda-env-defs/talos_36/talos_36.yml new file mode 100644 index 0000000..dfe2a39 --- /dev/null +++ b/Programming/anaconda/2019.07/conda-env-defs/talos_36/talos_36.yml @@ -0,0 +1,19 @@ +name: talos_py36 +channels: + - conda-forge +dependencies: + - python=3.6 + - pandas + - numpy + - scipy + - scikit-learn + - matplotlib + - seaborn + - tensorflow=1.13.1 + - pytables + - ipython + - keras=2.1.6 + - dask + - deap + - nb_conda_kernels + - ipywidgets diff --git a/Programming/anaconda/2019.07/config.sh b/Programming/anaconda/2019.07/config.sh new file mode 100644 index 0000000..8ff6238 --- /dev/null +++ b/Programming/anaconda/2019.07/config.sh @@ -0,0 +1,6 @@ +# version-specific settings + +# Miniconda version. Defaults to "latest" +# Should match the version distributed with anaconda +CONDA_VERSION=4.6.14 + diff --git a/Programming/anaconda/README.md b/Programming/anaconda/README.md new file mode 100644 index 0000000..9d086a9 --- /dev/null +++ b/Programming/anaconda/README.md @@ -0,0 +1,69 @@ +# Building of an Anaconda release using Pmodules + +## Concepts + + * The anaconda module just provides the **conda** package management tool together with its directory infrastructure which contains *conda environments* and a cache of downloaded packages + * Python and user software is provided in **conda environments**. These environments are located within the directory tree belonging to the anaconda module, e.g. `/afs/psi.ch/sys/psi.merlin/Programming/anaconda/2019.03/conda/envs/` + * The software in these environments can be accessed by users through + * loading the anaconda module and then using `conda activate somemodule_py36` + * a seperate pmodule that transparently switches to that installed environment by just setting the correct PATH + * jupyter installations running from one of the environments which discover the other environments if they contain the correct packages (**nb_conda_kernels**) + * The `conda` tool has frequent updates, and our experience shows that they should be installed. However, it would be a waste to every time produce a new module, because with the new module would also be associated a new area for environments. So, we prefer to update conda in place, and only make a new anaconda module if their are special incentives + * Environments are self sufficient and do not depend on the conda tool at all. All depending libraries are installed by conda. Conda makes consisten use of **rpath** definitions for executables and libraries, i.e. there is no reason to set `LD_LIBRARY_PATH` at all. + +## Building a central conda environment + + * **Allways work on the host pmod6**: conda is trying to use hardlinks where it can. There is an issue that can appear if you install from a machine that uses Auristor (which provides hardlinks). This causes whole environments to become corrupt, so that only a PSI AFS admin can fix the problem. Therefore we only install from pmod6 which runs openAFS. + +### installation of a pure conda environment + +In the simplest case, the environment can be created by conda alone. First load the anaconda module to get access to the conda package installer and the install environment. + +``` +module load anaconda/2019.03 +``` + +Define your installation in a `conda YAML` file and place it inside the buildblock +tree + +``` +cd buildblocks/Programming/anaconda/2019.03/conda-env-defs +mkdir datascience_36 +vim datascience_36/datascience_36.yml +``` + +Create the environment +``` +conda env create -f datascience_36/datascience_36.yml +``` + +### installation of a conda environment and adding pip packages + +Frequently there are packages that are not available as conda packages, even though they may exist as PyPi packages. You have two options + 1. install the dependencies using `pip` + 1. create a conda package based on the PyPi package + +In most cases you will want to go ahead with `pip` installs. However, after running `pip` inside of a conda environment, the environment is tainted and conda may warn you that it is inconsistent. Therefore conda packages should always be installed first. + +Proceed as above by defining a YAML file and use conda to first install all the conda based packages. + +Even though the YAML file also allows for the specification of pip packages, I advise to do this step separately. The pip steps can fail for various reasons, and it is better to do them interactively. Describe what you have to do in a README.md inside of the `conda-env-defs/${myenv}` folder. + + +### installation of a conda environment and adding source compiled packages + +**DRAFT!!!** + +This works if the python package has a correct setup.py build + + * If you need to apply changes to the source + * Clone the relevant git repos on github/gitlab + * implement your changes in a branch + * document it in `conda-env-defs/${myenv}/README.md` + * downlad and store the sources in the install area under + `/opt/psi/Programming/anaconda/2019.07/xxxx/mypackage` + * Use pip to install them into the environment (requires the setup.py) + ``` + cd /opt/psi/Programming/anaconda/2019.07/xxxx/mypackage + pip install . + ``` diff --git a/Programming/anaconda/build b/Programming/anaconda/build index 91c547e..36832c4 100755 --- a/Programming/anaconda/build +++ b/Programming/anaconda/build @@ -2,11 +2,27 @@ pbuild::add_to_group 'Programming' -source "${BUILDBLOCK_DIR}/files/config-${V_PKG}.sh" + +source "${BUILDBLOCK_DIR}/${V_PKG}/config.sh" +if [[ x"$CONDA_VERSION" == x ]]; then + echo "ERROR: CONDA_VERSION for this release has not been defined" >&2 + exit 1 +fi + +MINICONDA_EXE="Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh" +DOWNLOAD_URL="https://repo.anaconda.com/miniconda/$MINICONDA_EXE" +INSTALLER="$PMODULES_DISTFILESDIR/$MINICONDA_EXE" pbuild::pre_prep() { - curl -fsSLo "$PMODULES_DISTFILESDIR/Miniconda2-${CONDA_VERSION}-Linux-x86_64.sh" \ - "https://repo.anaconda.com/miniconda/Miniconda2-${CONDA_VERSION}-Linux-x86_64.sh" + # Need to implement own downloader since pbuild::set_download_url and pbuild::prep + # currently only support tar files + std::info "DEBUG: in function %s...\n" "$FUNCNAME: FUNCNAME=${FUNCNAME[*]}" + if [[ -r "$INSTALLER" ]]; then + std::info "%s using previously downloaded %s\n" \ + "${module_name}/${module_version}:" "$INSTALLER" + else + curl -fsSLo "$INSTALLER" "$DOWNLOAD_URL" + fi } pbuild::configure() { @@ -17,8 +33,11 @@ pbuild::compile() { : } -pbuild::install() { +pbuild::install() { # Install conda - bash "$PMODULES_DISTFILESDIR/Miniconda2-${CONDA_VERSION}-Linux-x86_64.sh" \ - -b -p "$PREFIX/conda" + bash "$INSTALLER" -b -p "$PREFIX/conda" + + #echo "Installing PSI admintools for maintaining conda" + std::info "%s Installing admintools ...\n" "${module_name}/${module_version}:" + cp -r "${BUILDBLOCK_DIR}/files/admintools" "$PREFIX/admintools" } diff --git a/Programming/anaconda/files/admintools/export-all-envs.sh b/Programming/anaconda/files/admintools/export-all-envs.sh new file mode 100755 index 0000000..8551e2e --- /dev/null +++ b/Programming/anaconda/files/admintools/export-all-envs.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Dump the package descriptions of all conda environments +DATE=$(date +%Y%m%d-%H%M%S) +MYDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +INSTALLDIR=$(dirname $MYDIR) +TOPLOGDIR="$INSTALLDIR/envlogs" + +command -v conda &>/dev/null +if [[ $? -ne 0 ]]; then + echo "conda command is not defined. Load the module" >&2 + exit 1 +fi + +CONDAPREFIX=$(expr "$(conda config --show root_prefix)" : '.*: *\(.*\)') + +if [[ "$INSTALLDIR" != "$(dirname $CONDAPREFIX)" ]]; then + printf "WARNING: you may be running the wrong conda command!!!\n" >&2 + printf " PREFIX from conda root_prefix : %s\n" "$(dirname $CONDAPREFIX)" >&2 + printf " this script's associated prefix : %s\n" "$INSTALLDIR" >&2 + exit 1 +fi + +mkdir -p "$TOPLOGDIR" +LOGDIR=$TOPLOGDIR/envdef-$DATE +mkdir $LOGDIR +for n in $(conda info --json | jq -r '.envs[]'|grep envs); do + env=$(basename $n) + echo ==== "$LOGDIR/${env}.txt ..." ===== + conda list -n $env -e > "$LOGDIR/${env}.txt" +done + + diff --git a/Programming/anaconda/files/variants b/Programming/anaconda/files/variants index 7310e93..aeb4876 100644 --- a/Programming/anaconda/files/variants +++ b/Programming/anaconda/files/variants @@ -1,2 +1,3 @@ anaconda/2018.12 unstable +anaconda/2019.07 unstable diff --git a/Programming/anaconda/modulefile b/Programming/anaconda/modulefile index 6085ca9..251c30c 100644 --- a/Programming/anaconda/modulefile +++ b/Programming/anaconda/modulefile @@ -3,7 +3,7 @@ module-whatis "Conda package manager and Anaconda software distribution" module-url "https://www.anaconda.com/" module-license "3-clause BSD License and 3rd party software licenses. https://docs.continuum.io/anaconda/eula/" -module-maintainer "Spencer Bliven " +module-maintainer "Spencer Bliven / Derek Feichtinger " module-help " Anaconda is a package manager, an environment manager, a Python/R data science @@ -27,7 +27,18 @@ This package provides the conda command (miniconda), as well as releases of the Anaconda distribution, built against several python versions. " +# this package has conflicts with all older psi python installations which +# integrated conda into the psi-python +conflict psi-python27 +conflict psi-python34 +conflict psi-python35 +conflict psi-python36 +conflict psi-python37 + + +# These paths are managed by the sourced conda file. Pmodules should not touch it set dont-setenv { LD_LIBRARY_PATH } +set dont-setenv { PATH } # Check for supported shell types set shelltype [module-info shelltype] @@ -42,20 +53,32 @@ switch -- $shelltype { switch [module-info mode] { "load" { #TODO check whether another conda version is present (conflicts) + # pmodules itself seems to prevent loading the same module of a different version + # but we need to prevent conflicts with a conda from a different source. - puts stderr "Using conda from $P/$V\n" + # puts stderr "DEBUG: Using conda from $P/$V\n" puts stdout "source \"$PREFIX/conda/etc/profile.d/conda.sh\";\n" - # Activate base - puts stdout "conda activate;\n" + # Activate base? No: anaconda should just provide the conda tool + # puts stdout "conda activate;\n" } - "unload" - + "unload" - "remove" { - # Deactivate + # Deactivate all layers of active conda environments puts stdout {while [[ "${CONDA_SHLVL:-0}" -gt 0 ]]; do conda deactivate; done;} - # Unload conda - puts stdout {unset $(set|sed -rn 's/^(_?conda[a-z_]*).*$/\1/pI');} + remove-path PATH $PREFIX/conda/condabin + # remove the conda function definitions + unsetenv __conda_activate + unsetenv __conda_hashr + unsetenv __conda_reactivate + unsetenv conda + + unsetenv CONDA_EXE + unsetenv CONDA_PYTHON_EXE + unsetenv CONDA_SHLVL + unsetenv _CE_CONDA + } }