public release 4.2.0 - see README.md and CHANGES.md for details

2026-01-08 19:10:45 +01:00
parent ef781e2db4
commit a343772384
181 changed files with 39388 additions and 6527 deletions
--- a/.gitea/workflows/build-test.yaml
+++ b/.gitea/workflows/build-test.yaml
@@ -0,0 +1,51 @@
+name: build and test the package
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: check runner environment
+        run: |
+          uname -a
+          lsb_release -a
+          echo "Runner home: $HOME"
+
+      - name: check out
+        uses: actions/checkout@v5
+
+      - name: set up compilers
+        run: |
+          sudo apt-get update
+          sudo apt-get -y install binutils build-essential g++ gcc gfortran libblas-dev liblapack-dev openmpi-bin openmpi-common sqlite3
+
+      - name: set up python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+
+      - name: install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          version: "0.9.18"
+          enable-cache: true
+
+      - name: lint with ruff
+        # configuration is in pyproject.toml
+        run: |
+          uvx ruff check --extend-exclude=.venv,build pmsco
+
+      - name: install dependencies
+        run: uv sync --locked --all-extras --dev
+
+      - name: tests
+        run: |
+          uv run nosetests
--- a/.gitea/workflows/deploy-pages.yaml
+++ b/.gitea/workflows/deploy-pages.yaml
@@ -0,0 +1,45 @@
+name: build and deploy documentation
+
+on:
+  push:
+    branches:
+      - master
+
+jobs:
+  build-and-deploy:
+    runs-on: ubuntu-latest
+    container:
+      image: gitea.psi.ch/pearl/docs
+      credentials:
+        username: ${{ gitea.actor }}
+        password: ${{ secrets.package_token }}
+
+    steps:
+      - name: checkout
+        working-directory: /app
+        run: |
+          git clone --branch master --single-branch https://${{ secrets.REPO_TOKEN }}@gitea.psi.ch/${{ github.repository }}.git
+          
+      - name: build
+        working-directory: /app/pmsco/docs
+        run: |
+          export REVISION=$(shell git describe --always --tags --dirty --long || echo "unknown, "`date +"%F %T %z"`)
+          export OUTDIR=/app/build
+          doxygen config.dox
+
+      - name: configure git
+        working-directory: /app/pmsco
+        run: |
+          git config --global user.name "Gitea Actions"
+          git config --global user.email "actions@gitea.local"
+          
+      - name: push to gitea-pages
+        working-directory: /app/pmsco
+        run: |
+          git checkout --orphan gitea-pages
+          git reset --hard
+          cp -r /app/build/html/* .
+          git add .
+          git commit -m "Deploy documentation to gitea"
+          git push -f https://${{ secrets.REPO_TOKEN }}@gitea.psi.ch/${{ github.repository }}.git gitea-pages
+
--- a/.githooks/install-hooks.sh
+++ b/.githooks/install-hooks.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+echo "Setting up Git hooks..."
+cd "$(dirname "$0")"
+cd ..
+
+# Create symlinks
+ln -sf ../../.githooks/pre-commit .git/hooks/pre-commit
+ln -sf ../../.githooks/pre-push .git/hooks/pre-push
+
+chmod +x .git/hooks/*
+echo "Git hooks installed successfully!"
--- a/.githooks/pre-commit
+++ b/.githooks/pre-commit
@@ -0,0 +1,34 @@
+#!/bin/bash
+# .git/hooks/pre-commit
+# requires uv
+
+# Track overall status
+PASS=true
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+echo -e "${YELLOW}Running pre-commit checks...${NC}"
+
+PY_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep '\.py$')
+
+# Python checks
+if [ -n "$PY_FILES" ]; then
+    echo -e "${YELLOW}Checking Python files...${NC}"
+
+    if ! uvx ruff check --extend-exclude=.*,build*; then
+        PASS=false
+    fi
+fi
+
+# Final status
+if [ "$PASS" = true ]; then
+    echo -e "${GREEN}All checks passed!${NC}"
+    exit 0
+else
+    echo -e "${RED}Some checks failed. Please fix issues before committing.${NC}"
+    exit 1
+fi
--- a/.githooks/pre-push
+++ b/.githooks/pre-push
@@ -0,0 +1,43 @@
+#!/bin/bash
+# .git/hooks/pre-push
+
+# Track overall status
+PASS=true
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+echo -e "${YELLOW}Running pre-push checks...${NC}"
+
+PY_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep '\.py$')
+
+# Python checks
+echo -e "${YELLOW}Checking Python files...${NC}"
+
+# Lint
+if ! uvx ruff check --extend-exclude=.*,build*; then
+    PASS=false
+fi
+
+# Compile
+uv sync
+
+# Run different test suites based on changed files
+echo -e "${YELLOW}Running Python tests...${NC}"
+
+if ! uv run nosetests; then
+    echo -e "Tests failed. Push aborted."
+    PASS=false
+fi
+
+# Final status
+if [ "$PASS" = true ]; then
+    echo -e "${GREEN}All checks passed!${NC}"
+    exit 0
+else
+    echo -e "${RED}Some checks failed. Please fix issues before committing.${NC}"
+    exit 1
+fi
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@ work/*
 debug/*
 lib/*
 dev/*
+build/*
+__pycache__/*
 *.pyc
 *.o
 *.so
@@ -15,3 +17,4 @@ dev/*
 .ropeproject/*
 .fuse*
 .trash
+.wraplock
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,14 +0,0 @@
-pages:
-  stage: deploy
-  script:
-  - ~/miniconda3/bin/activate pmsco
-  - make docs
-  - mv docs/html/ public/
-  artifacts:
-    paths:
-    - public
-  only:
-  - master
-  tags:
-  - doxygen
-  
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,59 +1,72 @@
+Release 4.2.0 (2026-01-01)
+==========================
+
+- Switch to Astral-UV package manager
+- CI lint, build, test workflow in gitea
+- Automated documentation workflow in gitea
+- Recommended Python version 3.12 (compatibility 3.10-3.13)
+- Multipole expansion
+- Table optimization mode 
+- Integrate phagen scattering amplitude calculator 
+- Select modulation and R-factor functions in runfile
+- Parametric holo scan generator
+- Namespace package installation, support for editable installation
+- Simplified command line
+- Meson build system
+- Differential cross section in periodic table
+- Configurable reports
+- Path resolution
+- Database interface for reports
+- Runfile based job scheduling
+
+
 Release 3.0.0 (2021-02-01)
 ==========================

-| Hash | Date | Description |
-| ---- | ---- | ----------- |
-| 72a9f38 | 2021-02-06 | introduce run file based job scheduling |
-| 42e12d8 | 2021-02-05 | compatibility with recent conda and singularity versions |
-| caf9f43 | 2021-02-03 | installation: include plantuml.jar |
-| 574c88a | 2021-02-01 | docs: replace doxypy by doxypypy |
-| a5cb831 | 2021-02-05 | redefine output_file property |
-| 49dbb89 | 2021-01-27 | documentation of run file interface |
-| 940d9ae | 2021-01-07 | introduce run file interface |
-| 6950f98 | 2021-02-05 | set legacy fortran for compatibility with recent compiler |
-| 28d8bc9 | 2021-01-27 | graphics: fixed color range for modulation functions |
-| 1382508 | 2021-01-16 | cluster: build_element accepts symbol or number |
-| 53508b7 | 2021-01-06 | graphics: swarm plot |
-| 4a24163 | 2021-01-05 | graphics: genetic chart |
-| 99e9782 | 2020-12-23 | periodic table: use common binding energies in condensed matter XPS |
-| fdfcf90 | 2020-12-23 | periodic table: reformat bindingenergy.json, add more import/export functions |
-| 13cf90f | 2020-12-21 | hbnni: parameters for xpd demo with two domains |
-| 680edb4 | 2020-12-21 | documentation: update documentation of optimizers |
-| d909469 | 2020-12-18 | doc: update top components diagram (pmsco module is entry point) |
-| 574993e | 2020-12-09 | spectrum: add plot cross section function |
+- Compatibility with recent conda and singularity versions
+- Installation: include plantuml.jar
+- Documentation: replace doxypy by doxypypy
+- Redefine output_file property
+- Documentation of run file interface
+- Introduce runfile interface
+- Set legacy Fortran for compatibility with recent compiler
+- Graphics: fixed color range for modulation functions
+- Cluster: build_element accepts symbol or number
+- Graphics: swarm plot
+- Graphics: genetic chart
+- Periodic table: use common binding energies in condensed matter XPS
+- Periodic table: reformat bindingenergy.json, add more import/export functions
+- Spectrum: add plot cross section function


 Release 2.2.0 (2020-09-04)
 ==========================

-| Hash | Date | Description |
-| ---- | ---- | ----------- |
-| 4bb2331 | 2020-07-30 | demo project for arbitrary molecule (cluster file) |
-| f984f64 | 2020-09-03 | bugfix: DATA CORRUPTION in phagen translator (emitter mix-up) |
-| 11fb849 | 2020-09-02 | bugfix: load native cluster file: wrong column order |
-| d071c97 | 2020-09-01 | bugfix: initial-state command line option not respected |
-| 9705eed | 2020-07-28 | photoionization cross sections and spectrum simulator |
-| 98312f0 | 2020-06-12 | database: use local lock objects |
-| c8fb974 | 2020-04-30 | database: create view on results and models |
-| 2cfebcb | 2020-05-14 | REFACTORING: Domain -> ModelSpace, Params -> CalculatorParams |
-| d5516ae | 2020-05-14 | REFACTORING: symmetry -> domain |
-| b2dd21b | 2020-05-13 | possible conda/mpi4py conflict - changed installation procedure |
-| cf5c7fd | 2020-05-12 | cluster: new calc_scattering_angles function |
-| 20df82d | 2020-05-07 | include a periodic table of binding energies of the elements |
-| 5d560bf | 2020-04-24 | clean up files in the main loop and in the end |
-| 6e0ade5 | 2020-04-24 | bugfix: database ingestion overwrites results from previous jobs |
-| 263b220 | 2020-04-24 | time out at least 10 minutes before the hard time limit given on the command line |
-| 4ec526d | 2020-04-09 | cluster: new get_center function |
-| fcdef4f | 2020-04-09 | bugfix: type error in grid optimizer |
-| a4d1cf7 | 2020-03-05 | bugfix: file extension in phagen/makefile |
-| 9461e46 | 2019-09-11 | dispatch: new algo to distribute processing slots to task levels |
-| 30851ea | 2020-03-04 | bugfix: load single-line data files correctly! |
-| 71fe0c6 | 2019-10-04 | cluster generator for zincblende crystal |
-| 23965e3 | 2020-02-26 | phagen translator: fix phase convention (MAJOR), fix single-energy |
-| cf1814f | 2019-09-11 | dispatch: give more priority to mid-level tasks in single mode |
-| 58c778d | 2019-09-05 | improve performance of cluster add_bulk, add_layer and rotate |
-| 20ef1af | 2019-09-05 | unit test for Cluster.translate, bugfix in translate and relax |
-| 0b80850 | 2019-07-17 | fix compatibility with numpy >= 1.14, require numpy >= 1.13 |
-| 1d0a542 | 2019-07-16 | database: introduce job-tags |
-| 8461d81 | 2019-07-05 | qpmsco: delete code after execution |
-
+- Demo project for arbitrary molecule (cluster file)
+- Bugfix: DATA CORRUPTION in phagen translator (emitter mix-up)
+- Bugfix: load native cluster file: wrong column order
+- Bugfix: initial-state command line option not respected
+- Photoionization cross sections and spectrum simulator
+- Database: use local lock objects
+- Database: create view on results and models
+- REFACTORING: Domain -> ModelSpace, Params -> CalculatorParams
+- REFACTORING: symmetry -> domain
+- Possible conda/mpi4py conflict - changed installation procedure
+- Cluster: new calc_scattering_angles function
+- Include a periodic table of binding energies of the elements
+- Clean up files in the main loop and in the end
+- Bugfix: database ingestion overwrites results from previous jobs
+- Time out at least 10 minutes before the hard time limit given on the command line
+- Cluster: new get_center function
+- Bugfix: type error in grid optimizer
+- Bugfix: file extension in phagen/makefile
+- Dispatch: new algo to distribute processing slots to task levels
+- Bugfix: load single-line data files correctly!
+- Cluster generator for zincblende crystal
+- Phagen translator: fix phase convention (MAJOR), fix single-energy
+- Dispatch: give more priority to mid-level tasks in single mode
+- Improve performance of cluster add_bulk, add_layer and rotate
+- Unit test for Cluster.translate, bugfix in translate and relax
+- Fix compatibility with numpy >= 1.14, require numpy >= 1.13
+- Database: introduce job-tags
+- qpmsco: delete code after execution
--- a/NOTICE.md
+++ b/NOTICE.md
@@ -5,10 +5,10 @@ List of Contributors
 Original Author
 ---------------

-Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
+- Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>


 Contributors
 ------------

-
+- Frederik Schirdewahn, <mailto:frederik.schirdewahn@psi.ch>
--- a/README.md
+++ b/README.md
@@ -1,9 +1,11 @@
 Introduction
 ============

-PMSCO stands for PEARL multiple-scattering cluster calculations and structural optimization.
-It is a collection of computer programs to calculate photoelectron diffraction patterns,
-and to optimize structural models based on measured data.
+PMSCO (PSI multiple-scattering cluster calculations and structural optimization)
+is a Python-based workflow engine to calculate photoelectron diffraction patterns,
+and to optimize structural models based on measured data using machine learning techniques.
+PMSCO was developed at the [Paul Scherrer Institut (PSI)](https://www.psi.ch/) 
+by the team of the [PEARL beamline](https://www.psi.ch/en/sls/pearl).

 The actual scattering calculation is done by code developed by other parties.
 PMSCO wraps around those programs and facilitates parameter handling, cluster building, structural optimization and parallel processing.
@@ -31,46 +33,40 @@ Highlights
 Installation
 ============

-PMSCO is written in Python 3.6.
-The code will run in any recent Linux environment on a workstation or virtual machine.
-Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
-and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
-For optimization jobs, a cluster with 20-50 available processor cores is recommended.
+PMSCO is written in Python. The recommended Python version is 3.12.
+Further requirements are the GNU compiler collection, BLAS/LAPACK libraries, OpenMPI and a package manager such as uv, pip or conda.
+For optimization jobs, a cluster machine with 20-50 available processor cores is recommended.
+Smaller jobs run on any recent Linux workstation.
 The code requires about 2 GB of RAM per process.

 Detailed installation instructions and dependencies can be found in the documentation
 (docs/src/installation.dox).
-A [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) compiler with Doxypypy is required to generate the documentation in HTML format.
-
-The easiest way to set up an environment with all dependencies and without side-effects on other installed software is to use a [Singularity](https://www.sylabs.io/guides/3.7/user-guide/index.html) container.
-A Singularity recipe file is part of the distribution, see the PMSCO documentation for details, Singularity must be installed separately.
-Installation in a [virtual box](https://www.virtualbox.org/) on Windows or Mac is straightforward using pre-compiled images with [Vagrant](https://www.vagrantup.com/).
-A Vagrant definition file is included in the distribution.
-
-The public distribution of PMSCO does not contain the [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/) code.
-Please obtain the EDAC source code from the original author, copy it to the pmsco/edac directory, and apply the edac_all.patch patch.


 License
 =======

 The source code of PMSCO is licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
-Please read and respect the license agreement.
+This _does not include_ the calculation packages contained in the subprojects folder which are licensed separately.

-Please share your extensions of the code with the original author.
-The gitlab facility can be used to create forks and to submit pull requests.
-Attribution notices for your contributions shall be added to the NOTICE.md file.
+- Please read and respect the respective license agreements.
+- Please acknowledge the use of the code.
+- Please consider sharing your developments with the original author.
+
+Due to different copyright terms, the third-party calculation programs are not contained in the public software repository.
+These programs may not be used without an explicit agreement by the respective original authors.


-Author
------
+Authors
+-------

-Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
+- Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
+- Frederik Schirdewahn, <mailto:frederik.schirdewahn@psi.ch>

 Copyright
 ---------

-Copyright 2015-2021 by [Paul Scherrer Institut](http://www.psi.ch)
+Copyright 2015-2025 by [Paul Scherrer Institut](http://www.psi.ch)


 Release Notes
@@ -78,6 +74,31 @@ Release Notes

 For a detailed list of changes, see the CHANGES.md file.

+4.2.0 (2026-01-01)
+------------------
+
+- Recommended Python version 3.12 (compatibility 3.10-3.13)
+- Build system and package environment
+  - Switch to Astral-UV package manager
+  - Meson build system for Fortran, C and C++ extension modules
+  - Namespace package installation, support for editable installation
+  - CI lint, build, test workflow in gitea
+  - Automated documentation workflow in gitea
+- User interface
+  - Simplified command line, all configuration via runfile and/or project class
+  - Select modulation and R-factor functions in runfile
+  - Parametric holo scan generator
+  - Configurable reports
+  - Path resolution in runfile
+  - Database interface for reports
+  - Runfile based job scheduling
+- Calculation features
+  - Multipole expansion
+  - Table optimization mode 
+  - Integrate phagen scattering amplitude calculator 
+  - Differential cross section in periodic table
+
+
 3.0.0 (2021-02-08)
 ------------------

--- a/docs/config.dox
+++ b/docs/config.dox
--- a/docs/makefile
+++ b/docs/makefile
@@ -10,26 +10,25 @@ SHELL=/bin/sh

 .SUFFIXES:
 .SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .html
-.PHONY: all docs clean
+.PHONY: all docs html clean

 DOX=doxygen
 DOXOPTS=
-LATEX_DIR=latex

 REVISION=$(shell git describe --always --tags --dirty --long || echo "unknown, "`date +"%F %T %z"`)
 export REVISION
+OUTDIR=
+export OUTDIR

-all: docs
+all: html

-docs: doxygen pdf
+docs: html

 doxygen:
 	$(DOX) $(DOXOPTS) config.dox

-pdf: doxygen
-	-$(MAKE) -C $(LATEX_DIR)
+html: doxygen

 clean:
-	-rm -r latex/*
 	-rm -r html/*

--- a/docs/readme.md
+++ b/docs/readme.md
@@ -0,0 +1,28 @@
+To compile the source code documentation in HTML format on Ubuntu, follow the instructions below.
+
+~~~~~~{.sh}
+apt-get update
+apt-get install -y --no-install-recommends \
+    default-jre \
+    doxygen \
+    gawk \
+    git \
+    graphviz \
+    pandoc \
+    wget
+
+pip install --no-cache-dir \
+    doxypypy \
+    meson \
+    meson-python \
+    ninja \
+    pynose
+
+wget -O plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
+export PLANTUML_JAR_PATH=/app/plantuml.jar
+
+cd pmsco/docs
+doxygen config.dox
+~~~~~~
+
+Open `pmsco/docs/html/index.html` in your browser.
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -1,17 +0,0 @@
-To compile the source code documentation in HTML format, 
-you need the following packages.
-They are available from Linux distributions unless noted otherwise.
-
-GNU make
-doxygen
-python
-doxypypy (pip)
-graphviz
-java JRE
-plantuml (download from plantuml.com)
-
-export the location of plantuml.jar in the PLANTUML_JAR_PATH environment variable.
-
-go to the `docs` directory and execute `make html`.
-
-open `docs/html/index.html` in your browser.
--- a/docs/src/commandline.dox
+++ b/docs/src/commandline.dox
@@ -1,135 +1,48 @@
 /*! @page pag_command Command Line
 \section sec_command Command Line

-This section describes the command line arguments for a direct call of PMSCO from the shell.
-For batch job submission to Slurm see @ref sec_slurm.
+Assuming that PMSCO has been installed in the active Python environment (@ref pag_install),
+the basic command line of PMSCO is as follows:

-Since PMSCO is started indirectly by a call of the specific project module,
-the syntax of the command line arguments is defined by the project module.
-However, to reduce the amount of custom code and documentation and to avoid confusion
-it is recommended to adhere to the standard syntax described below.
+~~~~~~{.sh}
+[mpiexec -np NPROCESSES] python -m pmsco [options]
+~~~~~~

-The basic command line is as follows:
-@code{.sh}
-[mpiexec -np NPROCESSES] python path/to/pmsco path/to/project.py [common args] [project args]
-@endcode
+The first portion between square brackets is necessary for parallel execution using MPI.
+Replace `NPROCESSES` by the number of processes.

-Include the first portion between square brackets if you want to run parallel processes.
-Specify the number of processes as the @c -np option.
-@c path/to/pmsco is the directory where <code>__main.py__</code> is located.
-Do not include the extension <code>.py</code> or a trailing slash.
-@c path/to/project.py should be the path and name to your project module.
-Common args and project args are described below.
+The PMSCO main program has a limited number of `common arguments` that are described below.
+Usually, all parameters should be declared in a @ref pag_runfile so that they can be archived with the results.
+However, in some cases it may be necessary to override some common parameters, e.g. the job name, on the command line.


 \subsection sec_command_common Common Arguments

-All common arguments are optional and default to more or less reasonable values if omitted.
-They can be added to the command line in arbitrary order.
+All common arguments can also be set in the project code or the run-file (recommended).
+In that case, only the run-file is specified on the command line.
+However, there are a number of options that override settings from the run-file.
+
+The arguments can appear in arbitrary order.
 The following table is ordered by importance.


-| Option | Values | Description |
-| --- | --- | --- |
-| -h , --help | | Display a command line summary and exit. |
-| -m , --mode | single (default), grid, swarm, genetic | Operation mode. |
-| -d, --data-dir | file system path | Directory path for experimental data files (if required by project). Default: current working directory. |
-| -o, --output-file | file system path | Base path and/or name for intermediate and output files. Default: pmsco0 |
-| -t, --time-limit | decimal number | Wall time limit in hours. The optimizers try to finish before the limit. Default: 24.0. |
-| -k, --keep-files | list of file categories | Output file categories to keep after the calculation. Multiple values can be specified and must be separated by spaces. By default, cluster and model (simulated data) of a limited number of best models are kept. See @ref sec_file_categories below. |
-| --log-level | DEBUG, INFO, WARNING (default), ERROR, CRITICAL | Minimum level of messages that should be added to the log. |
-| --log-file | file system path | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. Default: output-file + log, or pmsco.log. |
-| --log-disable | | Disable logging. By default, logging is on. |
-| --pop-size | integer | Population size (number of particles) in swarm and genetic optimization mode. The default value is the greater of 4 or the number of parallel calculation processes. |
-| --seed-file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.project.Project.seed_file. |
-| --table-file | file system path | Name of the model table file in table scan mode. |
+| Option | Values | Description | Run File |
+| --- | --- | --- | --- |
+| -r, --run-file | file path | JSON-formatted configuration file that defines run-time parameters. The format and content of a run file is described in a section @ref pag_runfile. | no |
+| -o, --output-dir | file path | Base path and/or name for intermediate and output files. | see note below |
+| -j , --job-name | string | Job name | job-name |
+| -m, --module | file path | Project module | __module__ |
+| -c, --project-class | string | Project class | __class__ |
+| -h, --help | | Display a command line summary and exit. | no |

+The job name is used as a prefix of output file names.
+It is also registered in the `jobs` table of the results database (if used),
+and it is used to identify the job with a job scheduling system.

-\subsubsection sec_command_files File Categories
-
-The following category names can be used with the `--keep-files` option.
-Multiple names can be specified and must be separated by spaces.
-
-| Category | Description | Default Action |
-| --- | --- | --- |
-| all | shortcut to include all categories | |
-| input |      raw input files for calculator, including cluster and phase files in custom format | delete |
-| output |     raw output files from calculator | delete |
-| atomic |     atomic scattering and emission files in portable format | delete |
-| cluster |    cluster files in portable XYZ format for report | keep |
-| debug |      debug files |  delete |
-| model |       output files in ETPAI format: complete simulation  (a_-1_-1_-1_-1) | keep |
-| scan |       output files in ETPAI format: scan (a_b_-1_-1_-1) |  keep |
-| domain |     output files in ETPAI format: domain (a_b_c_-1_-1) |  delete |
-| emitter |    output files in ETPAI format: emitter (a_b_c_d_-1) |  delete |
-| region |     output files in ETPAI format: region (a_b_c_d_e) |  delete |
-| report|      final report of results | keep always |
-| population |  final state of particle population | keep |
-| rfac |        files related to models which give bad r-factors, see warning below | delete |
-
-\note
-The `report` category is always kept and cannot be turned off.
-The `model` category is always kept in single calculation mode.
-
-\warning
-If you want to specify `rfac` with the `--keep-files` option,
-you have to add the file categories that you want to keep, e.g.,
-`--keep-files rfac cluster model scan population`
-(to return the default categories for all calculated models).
-Do not specify `rfac` alone as this will effectively not return any file.
-
-
-\subsection sec_command_project_args Project Arguments
-
-The following table lists a few recommended options that are handled by the project code.
-Project options that are not listed here should use the long form to avoid conflicts in future versions.
-
-
-| Option | Values | Description |
-| --- | --- | --- |
-| -s, --scans | project-dependent | Nick names of scans to use in calculation. The nick name selects the experimental data file and the initial state of the photoelectron. Multiple values can be specified and must be separated by spaces. |
-
-
-\subsection sec_command_scanfile Experimental Scan Files
-
-The recommended way of specifying experimental scan files is using nick names (dictionary keys) and the @c --scans option.
-A dictionary in the module code defines the corresponding file name, chemical species of the emitter and initial state of the photoelectron.
-The location of the files is selected using the common @c --data-dir option.
-This way, the file names and photoelectron parameters are versioned with the code,
-whereas command line arguments may easily get forgotten in the records.
-
-
-\subsection sec_command_example Argument Handling
-
-To handle command line arguments in a project module,
-the module must define a <code>parse_project_args</code> and a <code>set_project_args</code> function.
-An example can be found in the twoatom.py demo project.
-
-
-\section sec_slurm Slurm Job Submission
-
-The command line of the Slurm job submission script for the Ra cluster at PSI is as follows.
-This script is specific to the configuration of the Ra cluster but may be adapted to other Slurm-based queues.
-
-@code{.sh}
-qpmsco.sh [NOSUB] DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]
-@endcode
-
-Here, the first few arguments are positional and their order must be strictly adhered to.
-After the positional arguments, optional arguments of the PMSCO project command line can be added in arbitrary order.
-If you execute the script without arguments, it displays a short summary.
-The job script is written to @c $DESTDIR/$JOBNAME which is also the destination of calculation output.
-
-| Argument | Values | Description |
-| --- | --- | --- |
-| NOSUB (optional) | NOSUB or omitted | If NOSUB is present as the first argument, create the job script but do not submit it to the queue. Otherwise, submit the job script. |
-| DESTDIR | file system path | destination directory. must exist. a sub-dir $JOBNAME is created. |
-| JOBNAME | text | Name of job. Use only alphanumeric characters, no spaces. |
-| NODES | integer | Number of computing nodes. (1 node = 24 or 32 processors). Do not specify more than 2. |
-| TASKS_PER_NODE | 1...24, or 32 | Number of processes per node. 24 or 32 for full-node allocation. 1...23 for shared node allocation. |
-| WALLTIME:HOURS | integer | Requested wall time. 1...24 for day partition, 24...192 for week partition, 1...192 for shared partition. This value is also passed on to PMSCO as the @c --time-limit argument. |
-| PROJECT | file system path | Python module (file path) that declares the project and starts the calculation. |
-| MODE | single, swarm, grid, genetic | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
-| ARGS (optional) | | Any further arguments are passed on verbatim to PMSCO. You don't need to specify the mode and time limit here. |
+\note It is important that the job name be unique within a project.
+Specifically, you need to *provide a new job name each time you start pmsco*, otherwise the job may fail.
+It may be more natural to specify the job name on the command line using the `-j` argument
+than to change the run file every time.
+Unfortunately, PMSCO cannot auto-generate, auto-increment or verify the job name.

 */
--- a/docs/src/execution.dox
+++ b/docs/src/execution.dox
@@ -3,36 +3,35 @@

 To run PMSCO you need the PMSCO code and its dependencies (cf. @ref pag_install),
 a customized code module that contains the project-specific code,
-and one or several files containing the scan parameters and experimental data.
-Please check the <code>projects</code> folder for examples of project modules.
+one or several files containing the scan parameters and experimental data,
+and a run-file specifying the calculation parameters.

-The run-time arguments can either be passed on the command line
-(@ref pag_command - the older and less flexible way)
-or in a JSON-formatted run-file
-(@ref pag_runfile - the recommended new and flexible way).
-For beginners, it's also possible to hard-code all project parameters in the custom project module.
+Please check the <code>projects</code> folder for examples of project modules.


 \subsection sec_run_single Single Process

-Run PMSCO from the command prompt:
+The following instructions assume that PMSCO was installed as a Python site-package according to @ref pag_install.
+
+To run PMSCO from the command prompt:

@code{.sh}
 cd work-dir
-python pmsco-dir -r run-file
+python -m pmsco -j job-name -r run-file
@endcode

-where <code>work-dir</code> is the destination directory for output files,
-<code>pmsco-dir</code> is the directory containing the <code>__main__.py</code> file,
-<code>run-file</code> is a json-formatted configuration file that defines run-time parameters.
-The format and content of the run-file is described in a separate section.
+where:
+
+| `work-dir` | Destination directory for output files |
+| `run-file` | JSON-formatted configuration file that defines run-time parameters. The format and content of a run file is described in a section @ref pag_runfile. |
+| `job-name` | (optional) The job name appears mainly as the prefix of all output files but is also used in the database and other places. The job name can also be declared in the run file. |

 In this form, PMSCO is run in one process which handles all calculations sequentially.

 Example command line for a single EDAC calculation of the two-atom project:
@code{.sh}
 cd work/twoatom
-python ../../pmsco -r twoatom-hemi.json
+python -m pmsco -j job0001 -r twoatom-hemi.json
@endcode

 This command line executes the main pmsco module <code>pmsco.py</code>.
@@ -54,46 +53,104 @@ The slave processes will run the scattering calculations, while the master coord
 and optimizes the model parameters (depending on the operation mode).

 For optimum performance, the number of processes should not exceed the number of available processors.
-To start an optimization job with multiple processes on an quad-core workstation with hyperthreading:
+To start an optimization job with multiple processes on a quad-core workstation with hyperthreading:
@code{.sh}
 cd work/my_project
-mpiexec -np 8 --use-hwthread-cpus python pmsco-dir -r run-file
+mpiexec -np 8 --use-hwthread-cpus python -m pmsco -j my_job002 -r my_project.json
@endcode

-The `--use-hwthread` option may be necessary on certain hyperthreading architectures.
+The `--use-hwthread` option is necessary on certain hyperthreading architectures.


 \subsection sec_run_hpc High-Performance Cluster

 PMSCO is ready to run with resource managers on cluster machines.
-Code for submitting jobs to the slurm queue of the Ra cluster at PSI is included in the pmsco.schedule module
-(see also the PEARL wiki pages in the PSI intranet).
-The job parameters are entered in a separate section of the run file, cf. @pag_runfile for details.
+Code for submitting jobs to Slurm queues is included and can be customized for many machines.
+For example, code for the slurm queue of the Ra cluster at PSI is included in the pmsco.schedule module.
 Other machines can be supported by sub-classing pmsco.schedule.JobSchedule or pmsco.schedule.SlurmSchedule.

-If a schedule section is present and enabled in the run file,
-the following command will submit a job to the cluster machine
-rather than starting a calculation directly:
+To have PMSCO submit a job, the arguments for the queue are entered in the schedule section of the run file,
+cf. @ref pag_runfile.
+Then, the same command as for starting a calculation directly will instead submit a job to the queue:

@code{.sh}
-cd ~/pmsco
-python pmsco -r run-file.json
+python -m pmsco -j job-name -r run-file.json
@endcode

-The command will copy the pmsco and project source trees as well as the run file and job script to a job directory
-under the output directory specified in the project section of the run file.
-The full path of the job directory is _output-dir/job-name.
-The directory must be empty or not existing when you run the above command.
+The command creates a separate work directory with copies of the project source, the run-file and the job script.
+This job directory will also receive the calculation results.
+The full path of the job directory is _output-dir/job-name_.
+The directory must not exist when you run the above command to prevent overwriting of previous data.
+The job name can be declared in the run file or on the command line.

-Be careful to specify correct project file paths.
-The output and data directories should be specified as absolute paths.
+The command above also loads the project module and scan files.
+Many parameter errors are caught this way and can be fixed before the job is submitted to the queue.

-The scheduling command will also load the project and scan files.
-Many parameter errors can, thus, be caught and fixed before the job is submitted to the queue.
-The run file also offers an option to stop just before submitting the job
+The run file offers an option to prepare a script file and not to submit the job immediately
 so that you can inspect the job files and submit the job manually.

 Be sure to consider the resource allocation policy of the cluster
 before you decide on the number of processes.
 Requesting less resources will prolong the run time but might increase the scheduling priority.
+
+\subsection sec_run_dirs Directories
+
+Code and data files are typically located in different, possibly machine-specific locations.
+This can make it difficult to port a project to another machine and to repeat calculations.
+Ideally, a calculation job should be repeatable on different machines
+with a minimum of changes to code, input data and parameter files.
+Project code (which is under version control)
+should never need modifications for porting to another machine.
+Run-files (which are considered part of the data) can follow a project-specific or machine-specific directory structure.
+
+PMSCO provides directory resolution at run-time to facilitate writing of portable code.
+This is done by a number of directory aliases that can be included as shell-like placeholders, e.g. `${project}`, in file paths.
+Some aliases are preset to system-based defaults,
+further aliases can be added by the project code or declared in the run file.
+Directory aliases can be used in Project.directories
+as well as in other Project attributes that hold a file name.
+
+The table below shows the aliases defined and/or required by PMSCO.
+The paths are stored in Project.directories.
+The aliases are resolved before the actual calculations start (in the Project.validate() method).
+The resolved paths are printed to the log at warning level.
+
+| Key | Description | Source | Use |
+| --- | --- | --- | --- |
+| work | Working directory at program start | PMSCO | |
+| home | User's home directory | PMSCO | |
+| project | Location of the project module. | PMSCO | Can be used to find auxiliary files that are part of the repository. |
+| output | Intermediate and output files. | Must be set by the project or run file | The `output_file` property which serves as the basis of all output files is a concatenation of the `output` directory and `job_name`. |
+| report | Directory for graphical output (reports) | Default: `${output}/report` | |
+| data (optional) | Location of data (scan) files. | Project or run file | Usage is up to the project. |
+| temp | Temporary files | | Reserved. Currently not supported |
+| (job tag) | Any job_tags key that maps to a legal directory name can be included in a path | run file | project or run file |
+| mode, job_name, project_name | These project attributes can be included in a path if they contain a valid directory name | | |
+
+\subsection sec_run_stop Stopping a PMSCO job
+
+A PMSCO optimization job stops on any one of the following events.
+
+- The model handler is done.
+  Depending on the run mode, this happens when the optimization has converged or
+  the planned number of iterations or calculations has been reached.
+- The number of calculation tasks exceeds the limit configured in `dispatch.MscoMaster.max_calculations`.
+  This is meant to prevent excessive and runaway jobs.
+  The default value is 1000000. It can be adjusted by the project code if necessary.
+- The master process receives a SIGTERM, SIGUSR1 or SIGUSR2 from the operating system.
+  The signal can be sent, e.g., by the `kill` command on Linux.
+  This doesn´t work on all platforms.
+- The time limit configured in `Project.timedelta_limit` is reached.
+  This is a soft limit and should be set shorter than the job reservation with the resource manager.
+- A file named `finish_pmsco` is present in the output directory.
+  This is an easy way for a user to stop a running optimization.
+  The file doesn´t need any content.
+  It can be created by the `touch` command.
+
+All these stop conditions cause graceful stops.
+Running calculation tasks are waited for, but some results on the model level may not be complete.
+Final reports of complete models are produced and the output folder is cleaned up.
+
+Stops caused by resource managers such as Slurm are typically not graceful.
+The results are in an undefined state, reports are not generated, and temporary files may be left over.
 */
--- a/docs/src/installation.dox
+++ b/docs/src/installation.dox
@@ -3,69 +3,66 @@

 \subsection sec_general General Remarks

-The PMSCO code is maintained under [Git](https://git-scm.com/).
-The central repository for PSI-internal projects is at https://git.psi.ch/pearl/pmsco,
-the public repository at https://gitlab.psi.ch/pearl/pmsco.
+The central repository for development and PSI-internal projects is at https://gitea.psi.ch/pearl/pmsco,
+the public repository at https://gitea.psi.ch/pearl-public/pmsco.
 For their own developments, users should clone the repository.
 Changes to common code should be submitted via pull requests.
+Scientific projects should be maintained in a separate directory tree, cf. @ref sec_project.

-The program code of PMSCO and its external programs is written in Python 3.6, C++ and Fortran.
+The program code of PMSCO and its external programs is written in Python, C++ and Fortran.
 The code will run in any recent Linux environment on a workstation or in a virtual machine.
-Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
-and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
-For optimization jobs, a workstation with at least 4 processor cores
+For optimization jobs with parallel execution, a workstation with at least 4 processor cores
 or cluster with 20-50 available processor cores is recommended.
 The program requires about 2 GB of RAM per process.

 The recommended IDE is [PyCharm (community edition)](https://www.jetbrains.com/pycharm).
-[Spyder](https://docs.spyder-ide.org/index.html) is a good alternative with a better focus on scientific data.
-The documentation in [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) format is part of the source code.
-The Doxygen compiler can generate separate documentation in HTML or LaTeX.
+[Spyder](https://docs.spyder-ide.org/index.html) is a good alternative with a focus on scientific data.
+The documentation in [Doxygen](https://www.doxygen.nl/index.html) format is part of the source code.
+The Doxygen compiler can generate documentation in HTML.
+
+@attention Due to rapidly evolving computing environments
+some of the installation instructions on this page may be outdated or incompatible with certain environments.


 \subsection sec_requirements Requirements

 Please note that in some environments (particularly shared high-performance machines)
-it may be important to choose specific compiler and library versions.
-In order to maintain backward compatibility with some of these older machines,
+it may be important to choose specific compiler and library versions that are tailored to the hardware platform.
+In order to maintain backward compatibility with older installations,
 code that requires new versions of compilers and libraries should be introduced carefully.

-The code depends on the following libraries:
+The following basic tools and libraries are required:

- GCC >= 4.8
- OpenMPI >= 1.10
- F2PY
- F2C
- SWIG
+- GCC (C, C++, Fortran) >= 4.8
 - BLAS
 - LAPACK
- Python 3.6
- Numpy >= 1.13
- Python packages listed in the requirements.txt file
+- OpenMPI >= 1.10
+- Git

 Most of these requirements are available from the Linux distribution.
-For an easily maintainable Python environment, [Miniconda](https://conda.io/miniconda.html) is recommended.
-The Python environment distributed with the OS often contains outdated packages,
-and it's difficult to switch between different Python versions.
+For the Python environment,
+the [uv](https://docs.astral.sh/uv/) package and environment manager is recommended.
+It can be installed by non-privileged users.
+Other package managers like pip and conda may work as well but are not described here.

-On the PSI cluster machines, the environment must be set using the module system and conda (on Ra).
-Details are explained in the PEARL Wiki.
-
-The following tools are required to compile the documentation:
+The following tools are required to compile the documentation.
+They are not needed in calculations.

 - doxygen
 - doxypypy
 - graphviz
- Java
+- Java runtime environment (JRE)
 - [plantUML](https://plantuml.com)
- LaTeX (optional, generally not recommended)

 \subsection sec_install_instructions Instructions

+Installation instructions are given for Ubuntu 24.04.
+On managed HPC clusters use the compilers and libraries recommended by the administrator
+(often provided by a module system).
+
 \subsubsection sec_install_ubuntu Installation on Ubuntu

 The following instructions install the necessary dependencies on Ubuntu, Debian or related distributions.
-The Python environment is provided by [Miniconda](https://conda.io/miniconda.html).

@code{.sh}
 sudo apt update
@@ -74,7 +71,6 @@ sudo apt install \
 binutils \
 build-essential \
 doxygen \
-f2c \
 g++ \
 gcc \
 gfortran \
@@ -83,57 +79,184 @@ graphviz \
 libblas-dev \
 liblapack-dev \
 libopenmpi-dev \
-make \
 nano \
 openmpi-bin \
 openmpi-common \
+python3 \
+python3-venv \
 sqlite3 \
 wget
@endcode

-On systems where the link to libblas is missing (see @ref sec_compile below),
-the following lines are necessary.
+In addition, download and install [uv](https://docs.astral.sh/uv/).
+PSI users should configure uv to use PSI's PyPI package cache (cf. documentation on the intranet).
+
+
+\subsubsection sec_install_extra Additional Applications
+
+For working with the code and data, some other applications are recommended.
+The PyCharm IDE can be installed from the Ubuntu software center.
+The following commands install other useful helper applications:

@code{.sh}
-cd /usr/lib
-sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
+sudo apt install \
+avogadro \
+gitg \
+meld
@endcode

-Download and install [Miniconda](https://conda.io/), 
-then configure the Python environment:
+To compile the documentation install the following tools.
+The basic documentation is in HTML format and can be opened in any internet browser.

@code{.sh}
-wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-bash ~/miniconda.sh
+sudo apt install \
+doxygen \
+graphviz \
+default-jre

-conda create -q --yes -n pmsco python=3.6
-conda activate pmsco
-conda install -q --yes -n pmsco \
-    pip \
-    "numpy>=1.13" \
-    scipy \
-    ipython \
-    matplotlib \
-    nose \
-    mock \
-    future \
-    statsmodels \
-    swig \
-    gitpython
-pip install periodictable attrdict commentjson fasteners mpi4py doxypypy
+wget -O plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
+sudo mkdir /opt/plantuml/
+sudo mv plantuml.jar /opt/plantuml/
+echo "export PLANTUML_JAR_PATH=/opt/plantuml/plantuml.jar" | sudo tee /etc/profile.d/pmsco-env.sh
@endcode

-@note `mpi4pi` should be installed via pip, _not_ conda.
-   conda might install its own MPI libraries, which can cause a conflict with system libraries.
-   (cf. [mpi4py forum](https://groups.google.com/forum/#!topic/mpi4py/xpPKcOO-H4k))

-\subsubsection sec_install_singularity Installation in Singularity container
+\subsection sec_distro Download PMSCO Source Code

-A [Singularity](https://sylabs.io/singularity/) container
-contains all OS and Python dependencies for running PMSCO.
-Besides the Singularity executable, nothing else needs to be installed in the host system.
-This may be the fastest way to get PMSCO running.
+Clone or download the code from one of these repository addresses:

+| Repository | Access |
+| --- | --- |
+| https://gitea.psi.ch/pearl/pmsco | PSI internal  |
+| https://gitea.psi.ch/pearl-public/pmsco-public | Public |
+
+@code{.sh}
+cd ~
+git clone {repo-address see above} pmsco
+cd pmsco
+git checkout master
+@endcode
+
+These instructions download the base package of PMSCO.
+The public repository does not contain external programs (EDAC, PHAGEN, LOESS).
+You need to obtain the source code for these programs from their respective owners,
+copy them to the respective subprojects directories and
+apply the patches included in the PMSCO distribution.
+Please respect the respective license terms and acknowledge the use of the codes.
+
+
+\subsection sec_install_environment Set up the Python Environment
+
+The following instructions are for the [uv](https://docs.astral.sh/uv/) package manager.
+For other package managers, the pyproject.toml and requirements.txt files list the necessary dependencies.
+
+\subsubsection sec_install_uv Virtual Environment with uv
+
+By default, uv creates the virtual environment automatically in a `.venv` folder inside the source directory tree.
+In this case, no explicit setup is necessary, and pmsco can be called by:
+
+~~~~~~{.sh}
+uv run pmsco -h
+~~~~~~
+
+On some platforms, however, it may be necessary to separate the environment from the code,
+e.g. because of limited storage space or quota in the home directory.
+In this case, create the environment as follows:
+
+~~~~~~{.sh}
+cd ~
+mkdir envs
+cd envs
+uv venv --clear my_pmsco_env
+~~~~~~
+
+The `--clear` option resets an existing environment to empty.
+To activate this environment, call this command once in every terminal:
+
+~~~~~~{.sh}
+source ~/envs/my_pmsco_env/bin/activate
+~~~~~~
+
+
+\subsubsection sec_normal_install Installing PMSCO
+
+to install PMSCO and all dependencies into the active environment,
+run the following commands in the top-level PMSCO directory (where `pyproject.toml` is located).
+The commands compile the Fortran and C++ code of the calculation programs using the
+[Meson build system](https://mesonbuild.com/meson-python/index.html)
+and install the binaries and Python code in the site-packages folder of the active Python environment.
+
+~~~~~~{.sh}
+uv sync --active
+~~~~~~
+
+To use the default `.venv` environment, omit the `--active` option (also in the uv commands shown further below).
+Now, run the unit tests to check the installation:
+
+~~~~~~{.sh}
+uv run --active nosetests
+~~~~~~
+
+And check the help page:
+
+~~~~~~{.sh}
+uv run --active pmsco -h
+~~~~~~
+
+In the explicit environment, these commands can alternatively be called directly:
+
+~~~~~~{.sh}
+nosetests
+pmsco -h
+~~~~~~
+
+The PMSCO packages are now accessible in Python import statements.
+Verify it by opening a Python shell and entering:
+
+~~~~~~{.py}
+import pmsco.project
+dir(pmsco.project)
+~~~~~~
+
+Note: By default, uv installs the Python code in editable mode.
+Changes in the PMSCO source tree are visible as soon as you start a new Python interpreter.
+This does not apply to the subpackages, however.
+After modifying the subpackages, you need to clear and re-sync the environment.
+
+
+\subsection sec_test Test project
+
+Run the twoatom project to check that everything is installed correctly:
+
+~~~~~~{.py}
+cd ~
+mkdir -p work/twoatom
+cd work/twoatom
+nice python -m pmsco -r {path-to-pmsco}/projects/twoatom/twoatom-hemi.json
+~~~~~~
+
+You should get a number of result files whose names start with `twoatom0001` in `~/work/twoatom/`,
+including a hologram plot of the modulation function.
+
+To learn more about running PMSCO, see @ref pag_run.
+
+\subsection sec_install_projects Installing Namespace Packages
+
+Instructions on how to set up your own projects as namespace packages are given in section \ref sec_project.
+To install them into the pmsco namespace, call uv with the `--inexact` option.
+Without `--inexact`, uv would remove the previously installed packages (including PMSCO).
+
+~~~~~~{.sh}
+uv sync --active --inexact
+~~~~~~
+
+
+\subsection sec_install_singularity Installation in a Singularity container
+
+Singularity containers are currently unmaintained.
+
+The PMSCO source includes an install script for the [Singularity](https://sylabs.io/singularity/) container system
+under `extras/singularity`.
 To get started with Singularity,
 download it from [sylabs.io](https://www.sylabs.io/singularity/) and install it according to their instructions.
 On Windows, Singularity can be installed in a virtual machine using the [Vagrant](https://www.vagrantup.com/)
@@ -146,7 +269,7 @@ check out PMSCO as explained in the @ref sec_compile section:
 cd ~
 mkdir containers
 cd containers
-git clone git@git.psi.ch:pearl/pmsco.git pmsco
+git clone git@gitea.psi.ch:pearl-public/pmsco-public.git pmsco
 cd pmsco
 git checkout master
 git checkout -b my_branch
@@ -156,7 +279,7 @@ Then, either copy a pre-built container into `~/containers`,
 or build one from the definition file included under extras/singularity.
 You may need to customize the definition file to match the host OS
 or to install compatible OpenMPI libraries,
-cf. cf. [Singularity user guide](https://sylabs.io/guides/3.7/user-guide/mpi.html).
+cf. [Singularity user guide](https://sylabs.io/guides/3.7/user-guide/mpi.html).

@code{.sh}
 cd ~/containers
@@ -172,8 +295,10 @@ singularity shell pmsco.sif
 . /opt/miniconda/etc/profile.d/conda.sh
 conda activate pmsco
 cd ~/containers/pmsco
-make all
-nosetests -w tests/
+meson setup build
+meson compile -C build
+meson install -C build
+meson test -C build
@endcode

 Or call PMSCO from outside:
@@ -182,107 +307,10 @@ Or call PMSCO from outside:
 cd ~/containers
 mkdir output
 cd output
-singularity run -e ../pmsco.sif ~/containers/pmsco/pmsco -r path/to/your-runfile
+singularity run -e ../pmsco.sif python -m pmsco -r path/to/your-runfile
@endcode

 For parallel processing, prepend `mpirun -np X` to the singularity command as needed.
-Note that this requires "compatible" OpenMPI versions on the host and container to avoid runtime errors.
+Note that this requires compatible OpenMPI versions on the host and container to avoid runtime errors.

-
-\subsubsection sec_install_extra Additional Applications
-
-For working with the code and data, some other applications are recommended.
-The PyCharm IDE (community edition) can be installed from the Ubuntu software center.
-The following commands install other useful helper applications:
-
-@code{.sh}
-sudo apt install \
-avogadro \
-gitg \
-meld
-@endcode
-
-To compile the documentation install the following tools.
-The basic documentation is in HTML format and can be opened in any internet browser.
-If you have a working LaTeX installation, a PDF document can be produced as well.
-It is not recommended to install LaTeX just for this documentation, however.
-
-@code{.sh}
-sudo apt install \
-doxygen \
-graphviz \
-default-jre
-
-conda activate pmsco
-conda install -q --yes -n pmsco doxypypy
-
-wget -O plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
-sudo mkdir /opt/plantuml/
-sudo mv plantuml.jar /opt/plantuml/
-echo "export PLANTUML_JAR_PATH=/opt/plantuml/plantuml.jar" | sudo tee /etc/profile.d/pmsco-env.sh
-@endcode
-
-
-\subsection sec_compile Compilation
-
-Make sure you have access to the PMSCO Git repository and set up your Git environment.
-Depending on your setup, location and permissions, one of the following addresses may work.
-Private key authentication is usually recommended except on shared computers.
-
-| Repository | Access |
-| --- | --- |
-| `git@git.psi.ch:pearl/pmsco.git` | PSI intranet, SSH private key authentication |
-| `https://git.psi.ch/pearl/pmsco.git` | PSI intranet, password prompt |
-| `git@gitlab.psi.ch:pearl/pmsco.git` | Public repository, SSH private key authentication |
-| `https://gitlab.psi.ch/pearl/pmsco.git` | Public repository, password prompt |
-
-Clone the code repository using one of these repositiory addresses and switch to the desired branch:
-
-@code{.sh}
-git clone git@git.psi.ch:pearl/pmsco.git pmsco
-cd pmsco
-git checkout master
-git checkout -b my_branch
-@endcode
-
-Compile the code and run the unit tests to check that it worked.
-
-@code{.sh}
-make all
-nosetests -w tests/
-@endcode
-
-If the compilation of _loess.so failes due to a missing BLAS library,
-try to set a link to the BLAS library as follows (the actual file names may vary due to the actual distribution or version):
-@code{.sh}
-cd /usr/lib
-sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
-@endcode
-
-
-\subsection sec_test Tests
-
-Run the unit tests.
-They should pass successfully.
-Re-check from time to time.
-
-@code{.sh}
-cd ~/pmsco
-nosetests -w tests/
-@endcode
-
-Run the twoatom project to check the compilation of the calculation programs.
-
-@code{.sh}
-cd ~/pmsco
-mkdir work
-cd work
-mkdir twoatom
-cd twoatom/
-nice python ~/pmsco/pmsco -r ~/pmsco/projects/twoatom/twoatom-energy.json
-@endcode
-
-Runtime warnings may appear because the twoatom project does not contain experimental data.
-
-To learn more about running PMSCO, see @ref pag_run.
 */
--- a/docs/src/introduction.dox
+++ b/docs/src/introduction.dox
@@ -1,9 +1,11 @@
 /*! @mainpage Introduction
 \section sec_intro Introduction

-PMSCO stands for PEARL multiple-scattering cluster calculations and structural optimization.
-It is a collection of computer programs to calculate photoelectron diffraction patterns,
-and to optimize structural models based on measured data.
+PMSCO (PSI multiple-scattering cluster calculations and structural optimization)
+is a Python-based workflow engine to calculate photoelectron diffraction patterns,
+and to optimize structural models based on measured data using machine learning techniques.
+PMSCO was developed at the [Paul Scherrer Institut (PSI)](https://www.psi.ch/)
+by the team of the [PEARL beamline](https://www.psi.ch/en/sls/pearl).

 The actual scattering calculation is done by code developed by other parties.
 While the scattering program typically calculates a diffraction pattern based on a set of static parameters and a specific coordinate file in a single process,
@@ -12,7 +14,7 @@ PMSCO wraps around that program to facilitate parameter handling, cluster buildi
 In the current version, PMSCO can make use of the following programs.
 Other programs may be integrated as well.

- [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/)
+- [EDAC](https://garciadeabajos-group.icfo.es/widgets/edac/)
  by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley,
  [Phys. Rev. B 63 (2001) 075404](http://dx.doi.org/10.1103/PhysRevB.63.075404)
 - PHAGEN from the [MsSpec package](https://ipr.univ-rennes1.fr/msspec)
@@ -29,7 +31,8 @@ Other programs may be integrated as well.
 - structural optimization algorithms: genetic, particle swarm, grid search.
 - calculation of the modulation function.
 - calculation of the weighted R-factor.
- automatic parallel processing using OpenMPI.
+- integrated and extensible reporting, database storage of results.
+- automatic parallel processing using OpenMPI and job submission to scheduling systems.


 \section sec_intro_project Optimization Projects
@@ -38,11 +41,11 @@ To set up a new optimization project, you need to:

 - create a new directory under projects.
 - create a new Python module in this directory, e.g., my_project.py.
- implement a sub-class of project.Project in my_project.py.
+- implement a sub-class of pmsco.project.Project in my_project.py.
 - override the create_cluster, create_params, and create_model_space methods.
 - optionally, override the combine_domains and combine_scans methods.
- add a global function create_project to my_project.py.
- provide experimental data files (intensity or modulation function).
+- add a global function create_project to my_project.py or create a @ref pag_runfile.
+- prepare experimental data files (intensity or modulation function).

 For details, see @ref pag_project, the documentation of the pmsco.project.Project class and the example projects.

@@ -59,17 +62,18 @@ For details, see @ref pag_project, the documentation of the pmsco.project.Projec

 \section sec_license License Information

-An open distribution of PMSCO is available under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) at <https://gitlab.psi.ch/pearl-public/pmsco>.
+The source code of PMSCO is licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
+This _does not include_ the calculation packages contained in the subprojects folder which are licensed separately.

 - Please read and respect the respective license agreements.
 - Please acknowledge the use of the code.
- Please share your development of the code with the original author.
+- Please consider sharing your developments with the original author.

 Due to different copyright terms, the third-party calculation programs are not contained in the public software repository.
 These programs may not be used without an explicit agreement by the respective original authors.

 \author    Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
 \version   This documentation is compiled from version $(REVISION).
-\copyright 2015-2021 by [Paul Scherrer Institut](http://www.psi.ch)
+\copyright 2015-2025 by [Paul Scherrer Institut](http://www.psi.ch)
 \copyright Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
 */
--- a/docs/src/project.dox
+++ b/docs/src/project.dox
@@ -1,5 +1,5 @@
 /*! @page pag_project Setting up a new project
-\section sec_project Setting Up a New Project
+\section sec_project Setting up a new project

 This topic guides you through the setup of a new project.
 Be sure to check out the examples in the projects folder
@@ -7,13 +7,82 @@ and the code documentation as well.

 The basic steps are:

-1. Create a new folder under `projects`.
-2. In the new folder, create a Python module for the project (subsequently called _the project module_).
-3. In the project module, define a cluster generator class which derives from pmsco.cluster.ClusterGenerator.
-4. In the project module, define a project class which derives from pmsco.project.Project.
-5. In the same folder as the project module, create a JSON run-file.
+1. Create a new package folder under `pmsco/projects`.
+   To keep your code and PMSCO separate, you are suggested to start your own pmsco/projects tree
+   in a convenient location separate from the PMSCO source code.
+2. Add the parent directory of your pmsco/projects tree to the Python path.
+3. In the new folder, create a Python module for the project (subsequently called _the project module_).
+4. In the project module, define a cluster generator class which inherits from @ref pmsco.cluster.ClusterGenerator.
+5. In the project module, define a project class which inherits from @ref pmsco.project.Project.
+6. Create one or more run files.

-\subsection sec_project_module Project Module
+The basic steps listed above are recommended and explained in the following.
+In previous versions, other mechanisms of project invocation were available.
+They are now obsolete.
+
+
+\subsection sec_packages Namespace packages
+
+[Python namespace packages](https://realpython.com/python-namespace-package/) provide an easy way
+to inject project modules into the PMSCO namespace
+while their source files are kept separate from the core PMSCO packages.
+This way, PMSCO and the project modules can be under separate version control.
+
+Namespace packages work by extending the Python module search path.
+The module loader looks for packages in every entry of the search path
+and does not stop at the first match as it would do for a regular package.
+
+The recommended folder structure is:
+
+~~~~~~
+pmsco-projects/
+-- pyproject.toml
+-- pmsco/
+    +-- projects/
+        +-- project1/
+            +-- project1.py
+            +-- run1.json
+            +-- ...
+        +-- project2/
+        +-- ...
+~~~~~~
+
+In place of `pmsco-projects`, `project1`, `project2`, `run1`, you should use distinct names.
+The two levels `pmsco` and `projects` should be left as is.
+If you now include `pmsco-projects` in the Pyton path,
+all of your projects become available within the `pmsco` namespace, i.e.,
+you can `import pmsco.projects.project1.project1` in Python.
+Furthermore, you can call the module in a run-file without specifying a file path.
+You may install multiple project packages if needed.
+
+The recommended way to add `pmsco-projects` to the Python path is by an editable installation.
+This will allow you to keep editing your project sources in place.
+
+1. Place your project files in a directory tree similar to `pmsco-projects/pmsco/projects/project1/`.
+   The `pmsco/projects` level is mandatory as a part of the path.
+   Replace `pmsco-projects` and `project1` by your own choice.
+2. Be sure not to create any `__init__.py` files in this directory tree.
+3. Copy the `pyproject.toml` file from the PMSCO source into your `pmsco-projects` and adjust its contents.
+   At least give the package a distinct name.
+4. Select another build backend if necessary.
+   The default [uv_build](https://docs.astral.sh/uv/concepts/build-backend/) is recommended for pure Python projects.
+5. 'Install' the project locally.
+   With uv, call `uv sync --active --inexact`
+   while you are in the directory that contains the `pyproject.toml` file.
+   In plain pip the corresponding command would be
+   `pip install --editable .`.
+6. Check that you can `import pmsco.projects.project1.project1` (or whatever your project is called) in a Python shell.
+
+If you encounter problems importing the pmsco modules, check the Python path in a Python shell.
+It must contain the `site-packages` directory of your Python environment.
+Make sure it does not contain any pmsco or project source directory explicitly.
+Also make sure that you don't have any `__init__.py` files in your project tree,
+and do not use explicit paths to pmsco or your project anywhere in your source code or shell configuration files.
+Be careful not to install packages multiple times in different locations.
+In case of trouble, set up a fresh environment.
+
+
+\subsection sec_project_module Project module

 A skeleton of the project module file (with some common imports) may look like this:

@@ -67,26 +136,29 @@ For the project to be useful, some of the methods in the skeleton above need to
 The individual methods are discussed in the following.
 Further descriptions can be found in the documentation of the code.

-\subsection sec_project_cluster Cluster Generator
+\subsection sec_project_cluster Cluster generator

 The cluster generator is a project-specific Python object that produces a cluster, i.e., a list of atomic coordinates,
 based on a small number of model parameters whenever PMSCO requires it.
-The most important member of a cluster generator is its `create_cluster` method.
+The most important method of a cluster generator is `create_cluster`.
 At least this method must be implemented for a functional cluster generator.

 A generic `count_emitters` method is implemented in the base class.
-It needs to be overridden if you want to use parallel calculation of multiple emitters.
+It needs to be overridden if inequivalent emitters should be calculated in parallel.

-\subsubsection sec_project_cluster_create Cluster Definition
+\subsubsection sec_project_cluster_create Cluster definition

 The `create_cluster` method takes the model parameters (a dictionary)
 and the task index (a pmsco.dispatch.CalcID, cf. @ref pag_concepts_tasks) as arguments.
-Given these arguments, it must create and fill a pmsco.cluster.Cluster object.
-See pmsco.cluster.ClusterGenerator.create_cluster for details on the method contract.
+Given these arguments, it creates and fills a @ref pmsco.cluster.Cluster object.
+See @ref pmsco.cluster.ClusterGenerator.create_cluster for details on the method contract.

-As an example, have a look at the following simplified excerpt from the twoatom demo project.
+As an example, have a look at the following simplified excerpt from the `twoatom` demo project.

 ~~~~~~{.py}
+class TwoatomCluster(ClusterGenerator):
+    # ...
+
    def create_cluster(self, model, index):
        # access model parameters
        # dAB - distance between atoms in Angstroms
@@ -120,7 +192,7 @@ As an example, have a look at the following simplified excerpt from the twoatom
 ~~~~~~

 In this example, two atoms are added to the cluster.
-The pmsco.cluster.Cluster class provides several methods to simplify the task,
+The @ref pmsco.cluster.Cluster class provides several methods to simplify the task,
 such as adding layers or bulk regions, rotation, translation, trim, emitter selection, etc.
 Please refer to the documentation of its code for details.
 It may also be instructive to have a look at the demo projects.
@@ -132,17 +204,17 @@ For each atom, the following properties are stored:
 - atom type (chemical element number)
 - chemical element symbol from periodic table
 - x coordinate of the atom position
- t coordinate of the atom position
+- y coordinate of the atom position
 - z coordinate of the atom position
 - emitter flag (0 = scatterer, 1 = emitter, default 0)
 - charge/ionicity (units of elementary charge, default 0)
 - scatterer class (default 0)

-All of these properties except the scatterer class can be set by the add methods of the cluster.
+All of these properties except the scatterer class can be set by the `add_xxxx` methods of the cluster.
 The scatterer class is used internally by the atomic scattering factor calculators.
 Whether the charge/ionicity is used, depends on the particular calculators, EDAC does not use it, for instance.

-Note: You do not need to take care how many emitters a calculator allows,
+\note You do not need to take care how many emitters a calculator allows,
 or whether the emitter needs to be at the origin or the first place of the array.
 These technical aspects are handled by PMSCO code transparently.

@@ -150,8 +222,8 @@ These technical aspects are handled by PMSCO code transparently.

 Domains refer to regions of inequivalent structure in the probing region.
 This may include regions of different orientation, different lattice constant, or even different structure.
-The cluster methods can read the selected domain from the `index.domain` argument.
-This is an index into the pmsco.project.Project.domains list where each item is a dictionary
+The cluster methods read the requested domain from the `index.domain` argument.
+This is an index into the @ref pmsco.project.Project.domains list where each item is a dictionary
 that holds additional, invariable structural parameters.

 A common case are rotational domains.
@@ -177,19 +249,26 @@ and the `create_cluster` method would include additional code to rotate the clus
        return clu
 ~~~~~~

-Depending on the complexity of the system, it may, however, be necessary to write a specific sub-routine for each domain.
+Depending on the complexity of the system, it is advisable to split the code into a separate method for each domain.

-The pmsco.project.Project class includes generic code to add intensities of domains incoherently (cf. pmsco.project.Project.combine_domains).
-If the model space contains parameters 'wdom0', 'wdom1', etc.,
-these parameters are interpreted at weights of domain 0, 1, etc.
-One domain must have a fixed weight to avoid correlated parameters.
+The @ref pmsco.project.Project class includes generic code to add intensities of domains incoherently
+(cf. @ref pmsco.project.Project.combine_domains).
+In this case, the model space should contain parameters 'wdom0', 'wdom1', etc.,
+that define the weights of domain 0, 1, etc.
+
+To avoid correlations between parameters, one domain must have a fixed weight:
 Typically, 'wdom0' is left undefined and defaults to 1.

-\subsubsection sec_project_cluster_emitters Emitter Configurations
+\subsubsection sec_project_cluster_emitters Emitter configurations

-If your project has a large cluster and/or many emitters, have a look at @ref pag_concepts_emitter.
-In this case, you should override the `count_emitters` method and return the number of emitter configurations.
-In the simplest case, this is the number of inequivalent emitters, and the implementation would be:
+If a project uses a large cluster and/or many emitters,
+it may be more efficient to generate emitter-specific cluster configurations,
+for instance to leverage process parallelization,
+or to produce small, local clusters around the emitter site.
+This concept is called _emitter configurations_ and explained in detail in @ref pag_concepts_emitter.
+
+To implement emitter configurations, override the `count_emitters` method to return the number of emitter configurations.
+In the simplest case, this is the number of inequivalent emitters:

 ~~~~~~{.py}
    def count_emitters(self, model, index):
@@ -200,8 +279,8 @@ In the simplest case, this is the number of inequivalent emitters, and the imple

 Next, modify the `create_cluster` method to check the emitter index (`index.emit`).
 If it is -1, the method must return the full cluster with all inequivalent emitters marked.
-If it is positive, only the corresponding emitter must be marked.
-The code could be similar to this example:
+If it is positive, only the corresponding emitter configuration must be marked.
+For example, if each emitting atom represents a separate emitter configuration:

 ~~~~~~{.py}
    def create_cluster(self, model, index):
@@ -211,36 +290,36 @@ The code could be similar to this example:
        # select all possible emitters (atoms of a specific element) in a cylindrical volume
        # idx_emit is an array of atom numbers (0-based atom index)
        idx_emit = clu.find_index_cylinder(origin, r_xy, r_z, self.project.scans[index.scan].emitter)
-        # if a specific emitter should be marked, restrict the array index.
+
+        # if PMSCO asks for a specific emitter, restrict the array index:
        if index.emit >= 0:
            idx_emit = idx_emit[index.emit]
+
        # mark the selected emitters
-        # if index.emit was < 0, all emitters are marked
        clu.data['e'][idx_emit] = 1

        return clu
 ~~~~~~

-Now, the individual emitter configurations will be calculated in separate tasks
-which can be run in parallel in a multi-process environment.
+Now, the individual emitter configurations are calculated in separate tasks
+which can run in parallel in a multi-process environment.
 Note that the processing time of EDAC scales linearly with the number of emitters.
-Thus, parallel execution is beneficial.

-Advanced programmers may exploit more of the flexibility of emitter configurations, cf. @ref pag_concepts_emitter.

-\subsection sec_project_project Project Class
+\subsection sec_project_project Project class

 Most commonly, a project class overrides the `__init__`, `create_model_space` and `create_params` methods.
 Most other inherited methods can be overridden optionally,
 for instance `validate`, `setup`, `calc_modulation`, `rfactor`,
 as well as the combine methods `combine_rfactors`, `combine_domains`, `combine_emitters`, etc.
-Int his introduction, we focus on the most basic three methods.
+This introduction shall focus on the three most important methods.

-\subsubsection sec_project_project_init Initialization and Defaults

-In the `__init__` method, you define and initialize (with default values) additional project properties.
-You may also redefine properties of the base class.
-The following code is just an example to give you some ideas.
+\subsubsection sec_project_project_init Initialization and defaults
+
+The `__init__` method defines and initializes project properties with default values.
+It may also redefine properties of the base class.
+The following code is just an example to give some ideas.

 ~~~~~~{.py}
 class MyProject(pmsco.project.Project):
@@ -259,19 +338,20 @@ class MyProject(pmsco.project.Project):
        self.domains = [{"zrot": 0.}]

    def build_scan_dict(self):
-        self.scan_dict["empty"] = {"filename": "{pmsco}/projects/common/empty-hemiscan.etpi",
+        self.scan_dict["empty"] = {"filename": "${pmsco}/projects/common/empty-hemiscan.etpi",
                                   "emitter": "Si", "initial_state": "2p3/2"}
-        self.scan_dict["Si2p"]  = {"filename": "{data}/xpd-Si2p.etpis",
+        self.scan_dict["Si2p"]  = {"filename": "${data}/xpd-Si2p.etpis",
                                   "emitter": "Si", "initial_state": "2p3/2"}
 ~~~~~~

-The scan dictionary can come in handy if you want to select scans by a shortcut on the command line or in a run file.
+A scan dictionary is one way to specify locations and metadata of experimental files centrally in the project code.
+The scan can then be selected by the dictionary key rather than copying file locations.

-Note that most of the properties can be assigned from a run file.
+Note that all public attributes can be assigned from a run file.
 This happens after the `__init__` method.
 The values set by `__init__` serve as default values.

-\subsubsection sec_project_project_space Model Space
+\subsubsection sec_project_project_space Model space

 The model space defines the keys and value ranges of the model parameters.
 There are three ways to declare the model space in order of priority:
@@ -280,21 +360,21 @@ There are three ways to declare the model space in order of priority:
 2. Assign a ModelSpace to the self.model_space property directly in the `__init__` method.
 3. Implement the `create_model_space` method.

-We begin the third way:
+The third way may look like this:

 ~~~~~~{.py}
-# under class MyProject(pmsco.project.Project):
+class MyProject(pmsco.project.Project):
    def create_model_space(self):
        # create an empty model space
        spa = pmsco.project.ModelSpace()

        # add parameters
-        spa.add_param('dAB',    2.10,  2.00,   2.25,  0.05)
-        spa.add_param('th',    15.00,  0.00,  30.00,  1.00)
+        spa.add_param('dAB',    2.05, width=0.25, step=0.05)
+        spa.add_param('th',    15.00, 0.00, 30.00, 1.00)
        spa.add_param('ph',    90.00)
-        spa.add_param('V0',    21.96, 15.00,  25.00,  1.00)
+        spa.add_param('V0',    21.96, width=10.0, step=1.0)
        spa.add_param('Zsurf',  1.50)
-        spa.add_param('wdom1',  0.5,   0.10,  10.00,  0.10)
+        spa.add_param('wdom1',  0.5,  0.10, 10.00, 0.10)

        # return the model space
        return spa
@@ -303,14 +383,18 @@ We begin the third way:
 This code declares six model parameters: `dAB`, `th`, `ph`, `V0`, `Zsurf` and `wdom1`.
 Three of them are structural parameters (used by the cluster generator above),
 two are used by the `create_params` method (see below),
-and `wdom1` is used in pmsco.project.Project.combine_domains while summing up contributions from different domains.
+and `wdom1` is used in @ref pmsco.project.Project.combine_domains
+while summing up contributions from different domains.

 The values in the arguments list correspond to the start value (initial guess),
 the lower and upper boundaries of the value range,
 and the step size for optimizers that require it.
-If just one value is given, like for `ph` and `Zsurf`, the parameter is held constant during the optimization.
+If just one value is given the parameter is held constant during the optimization.
+The range can, alternatively, be specified by the `width` argument.

-The equivalent declaration in the run-file would look like (parameters after `th` omitted):
+A similar declaration in a run-file could look like as follows (some parameters omitted for brevity).
+Parameter values can be numeric constants,
+or simple Python math expressions in double quotes.

 ~~~~~~{.py}
 {
@@ -318,9 +402,8 @@ The equivalent declaration in the run-file would look like (parameters after `th
    // ...
    "model_space": {
      "dAB": {
-        "start": 2.109,
-        "min": 2.0,
-        "max": 2.25,
+        "start": "2.0 / math.cos(math.radians(15.0))",
+        "width": 0.25,
        "step": 0.05
      },
      "th": {
@@ -329,22 +412,25 @@ The equivalent declaration in the run-file would look like (parameters after `th
        "max": 30.0,
        "step": 1.0
      },
+      "Zsurf": {
+        "start": 1.50
+      }
      // ...
    }
  }
 }
 ~~~~~~

-\subsubsection sec_project_project_params Calculation Parameters
+\subsubsection sec_project_project_params Calculation parameters

 Non-structural parameters that are needed for the input files of the calculators are passed
-in a pmsco.project.CalculatorParams object.
-This object should be created and filled in the `create_params` method of the project class.
+in a @ref pmsco.project.CalculatorParams object.
+This object is created and filled in the `create_params` method of the project class.

-The following example is from the twoatoms demo project:
+The following example is from the `twoatoms` demo project:

 ~~~~~~{.py}
-# under class MyProject(pmsco.project.Project):
+class MyProject(pmsco.project.Project):
    def create_params(self, model, index):
        params = pmsco.project.CalculatorParams()

@@ -381,74 +467,48 @@ The following example is from the twoatoms demo project:

 Most of the code is generic and can be copied to other projects.
 Only the experimental and material parameters need to be adjusted.
-Other properties can be changed as needed, see the documentation of pmsco.project.CalculatorParams for details.
+Other properties can be changed as needed, see @ref pmsco.project.CalculatorParams.

-\subsection sec_project_args Passing Runtime Parameters

-Runtime parameters can be passed in one of three ways:
+\subsection sec_project_args Passing run-time parameters

-1. hard-coded in the project module,
-2. on the command line, or
-3. in a JSON run-file.
+The recommended way of passing calculation parameters is via @ref pag_runfile.
+Run-files allow for a complete separation of code and data in a generic and flexible way.
+Program code can be managed by a version control system,
+and run-files can be stored along with the results.
+This simplifies the reproduction of previous calculations and documentation of the workflow.

-In the first way, all parameters are hard-coded in the `create_project` function of the project module.
-This is the simplest way for a quick start to a small project.
-However, as the project code grows, it's easy to loose track of revisions.
-In programming it is usually best practice to separate code and data.
+For testing and simple projects, it is possible to hard-code all parameters in the project class.

-The command line is another option for passing parameters to a process.
-It requires extra code for parsing the command line and is not very flexible.
-It is difficult to pass complex data types.
-Using the command line is no longer recommended and may become deprecated in a future version.

-The recommended way of passing parameters is via run-files.
-Run-files allow for complete separation of code and data in a generic and flexible way.
-For example, run-files can be stored along with the results.
-However, the semantics of the run-file may look intimidating at first.
-
-\subsubsection sec_project_args_runfile Setting Up a Run-File
+\subsubsection sec_project_args_runfile Setting up a run-file

 The usage and format of run-files is described in detail under @ref pag_runfile.

-\subsubsection sec_project_args_code Hard-Coded Arguments

-Hard-coded parameters are usually set in a `create_module` function of the project module.
-At the end of the module, this function can easily be found.
-The function has two purposes: to create the project object and to set parameters.
-The parameters can be any attributes of the project class and its ancestors.
-See the parent pmsco.project.Project class for a list of common attributes.
+\subsubsection sec_project_args_code Hard-coded arguments

-The `create_project` function may look like in the following example.
-It must return a project object, i.e. an object instance of a class that inherits from pmsco.project.Project.
+Though it's normally recommended to declare all parameters in the run-file,
+parameter values can also be hard-coded in the constructor and/or the validate method of the project class.
+Which method to use depends on the processing stage.

-~~~~~~{.py}
-def create_project():
-    project = MyProject()
+The constructor can set default values for rarely changing parameters.
+The declarations in the run-file override the defaults from the constructor.
+If some parameters need adjusting _after_ the run-file has been loaded,
+this can be done in the validate` method.

-    project.optimizer_params["pop_size"] = 20
+The call sequence of the methods is as follows.

-    project_dir = Path(__file__).parent
-    scan_file = Path(project_dir, "hbnni_e156_int.etpi")
-    project.add_scan(filename=scan_file, emitter="N", initial_state="1s")
-
-    project.add_domain({"zrot": 0.0})
-    project.add_domain({"zrot": 60.0})
-
-    return project
-~~~~~~
-
-To have PMSCO call this function,
-pass the file path of the containing module as the first command line argument of PMSCO, cf. @ref pag_command.
-PMSCO calls this function in absence of a run-file.
-
-
-\subsubsection sec_project_args_cmd Command Line
-
-Since it is not recommended to pass calculation parameters on the command line,
-this mechanism is not described in detail here.
-It is, however, still available.
-If you really need to use it,
-have a look at the code of the pmsco.pmsco.main function
-and how it calls the `create_project`, `parse_project_args` and `set_project_args` of the project module.
+1. `Project.__init__`:
+   The constructor is usually overridden by the project.
+   The constructor must call the superclass before applying its values.
+2. `Project.set_properties`:
+   Sets the parameters from the run-file and resolves class names.
+   This method can be overridden if additional classes need resolving after loading the run-file.
+   It must call the superclass.
+3. `Project.validate`: Parameters are validated, i.e., checked and made consistent.
+   Handler classes are resolved.
+   The `validate` method or its sub-methods can be overridden by the project.
+   The inherited method should be called.

 */
--- a/docs/src/reports.dox
+++ b/docs/src/reports.dox
@@ -0,0 +1,135 @@
+/*! @page pag_reports Reports
+\section sec_reports Reports
+
+The main output of PMSCO is the model parameters to R-factor mapping.
+By default, it is produced in the form of a text file (.dat) as well as an sqlite3 database file (.db).
+Graphical representations of the result data, called _reports_ in PMSCO, can be produced automatically at run-time or
+manually after the calculation has ended.
+
+PMSCO provides a number of pre-defined reports as well as an interface for custom reports.
+Essentially, a report is defined by a Python class which derives from `pmsco.reports.base.ProjectReport`.
+Instances of reports are added to the project's `reports` list during initialization of the calculation job.
+They are called by the calculation handlers whenever a new model-level result is available in the database.
+While reports typically produce graphics files for diagnostics,
+report classes could basically produce any derived data including data files in different formats.
+
+By default, no report is produced during a project run.
+There are several ways to generate reports:
+
+- Add instances of reports to the `reports` list of the project object.
+  This can be done in the project code or in the @ref pag_runfile.
+  One or multiple reports (of different classes) can be added and configured.
+- Some report modules have their own command line interface.
+  This allows you to produce a report at any time during or after the project run.
+- Lastly, all reports are Python classes and can be instantiated and executed in a Python shell.
+
+The remainder of this page describes some of the pre-defined reports and their configuration parameters (attributes).
+
+@note Reporting is still under development.
+The configuration parameters and behaviour is subject to change, and the documentation may be partially outdated.
+Be sure to check the in-line documentation as well as the source code for the latest information.
+
+
+\subsection sec_reports_common Common Parameters
+
+The reports share some common parameters which may, however, be used differently or ignored by some reports.
+
+| Key | Values | Description |
+| --- | --- | --- |
+| `filename_format` | template string using `${key}`-type placeholders | Template string for file names of reports. Possible placeholders are listed below. |
+| `title_format` | template string using `${key}`-type placeholders | Template string for graph titles. Possible placeholders are listed below. |
+| `canvas` | string. default: `matplotlib.backends.backend_agg.FigureCanvasAgg` (PNG) | A matplotlib figure canvas such as FigureCanvasAgg, FigureCanvasPdf or FigureCanvasSVG. |
+
+The `filename_format` and `title_format` attributes are template strings which can contain `${key}` type placeholders.
+placeholders are replaced according to the following table.
+Some of these values may not be available if you call the reports outside of an optimization run
+(e.g., from the command line of a report module).
+
+| Key | Description |
+| --- | --- |
+| `base` | Base file name. Default: job name |
+| `mode` | optimization mode |
+| `job_name` | job name |
+| `project_name` | project name |
+| any directories key | corresponding directories value |
+| any job_tags key | corresponding job_tags value |
+
+
+\subsection sec_reports_convergence Convergence Plot
+
+The convergence plot is a violin plot where each violin represents the R-factor distribution of one generation.
+The minimum, maximum and mean values are marked, and the distribution is indicated by the body.
+Convergence plots are suitable for genetic or swarm optimizations.
+
+| Key | Values | Description |
+| --- | --- | --- |
+| __class_name__ | pmsco.reports.population.ConvergencePlot | |
+| filename_format | template string using `${key}`-type placeholders | See common section. |
+| title_format | template string using `${key}`-type placeholders | See common section. |
+
+
+\subsection sec_reports_genetic Genetic Chart
+
+A genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
+The chart shows the amount of diversity in the population
+and - by comparing charts of different generations - the changes due to mutation.
+The axes are the model parameters (x) and particle number (y).
+The colour is mapped from the relative parameter value within the parameter range.
+Genetic charts are suitable for genetic or swarm optimizations.
+
+| Key | Values | Description |
+| --- | --- | --- |
+| __class_name__ | pmsco.reports.population.GeneticPlot | |
+| filename_format | template string using `${key}`-type placeholders | See common section. |
+| title_format | template string using `${key}`-type placeholders | See common section. |
+| cmap | string: 'viridis', 'plasma' (default), 'inferno', 'magma', 'cividis' | Name of colour map supported by matplotlib. |
+| params | list of model parameter names |  |
+
+In addition to the common template substitutions,
+the genetic chart report replaces the following placeholders
+of the `filename_format` and `title_format` template strings.
+
+| Key | Description |
+| --- | --- |
+| `gen` | Generation index (population reports only) |
+
+
+\subsection sec_reports_swarm Particle Swarm Plot
+
+The particle swarm plot shows the current positions and velocities of particles projected onto two dimensions.
+The plot contains three elements:
+- a pseudo-color scatter plot of all R-factors in the background,
+- a scatter plot of particle positions.
+- a quiver plot indicating the velocities of the particles.
+
+Particle swarm plots are suitable in particle swarm optimization mode only.
+
+| Key | Values | Description |
+| --- | --- | --- |
+| __class_name__ | pmsco.reports.population.SwarmPlot | |
+| filename_format | template string using `${key}`-type placeholders | See common section. |
+| title_format | template string using `${key}`-type placeholders | See common section. |
+| cmap | string: 'viridis', 'plasma' (default), 'inferno', 'magma', 'cividis' | Name of colour map supported by matplotlib. |
+| params | nested list of pairs of model parameter names |  |
+
+In addition to the common template substitutions,
+the particle swarm plot report replaces the following placeholders
+of the `filename_format` and `title_format` template strings.
+
+| Key | Description |
+| --- | --- |
+| `gen` | Generation index (population reports only) |
+| `param0` | Parameter name 0 (population reports only) |
+| `param1` | Parameter name 1 (population reports only) |
+
+
+\subsection sec_reports_misc Miscellaneous
+
+To make a video from swarm or genetic plots, you may use ffmpeg on Linux:
+
+~~~~~~{.sh}
+ffmpeg -framerate 5 -i basename-%00d.geneticplot.png -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p basename.geneticplot.mp4
+~~~~~~
+
+
+*/
--- a/docs/src/runfile.dox
+++ b/docs/src/runfile.dox
@@ -2,59 +2,92 @@
 \section sec_runfile Run File

 This section describes the format of a run-file.
-Run-files are a new way of passing arguments to a PMSCO process which avoids cluttering up the command line.
-It is more flexible than the command line
-because run-files can assign a value to any property of the project object in an abstract way.
-Moreover, there is no necessity for the project code to parse the command line.
+Run-files are a flexible way of passing arguments to a PMSCO process.
+The benefits are:
+
+- contain all essential parameters to repeat a calculation - no need to remember or record the command line
+- avoid cluttering up the command line or frequent changes of source code
+- can be versioned or stored separately from the code, maintain a single file or multiple files - up to the user
+- any property and sub-property of the project object can be assigned in a generic way - even custom properties that are unknown to PMSCO
+- no necessity for the project code to parse the command line
+- schema validation can help to find syntax errors while editing


 \subsection sec_runfile_how How It Works

-Run-files are text files in [JSON](https://en.wikipedia.org/wiki/JSON) format
-which shares most syntax elements with Python.
-JSON files contain nested dictionaries, lists, strings and numbers.
+Run-files are text files in machine and human readable [JSON](https://en.wikipedia.org/wiki/JSON) format.
+In PMSCO, run-files contain dictionaries of parameters to be passed to the project object.
+For the calculations, internally, the project object is the main container of calculation parameters, model objects and input data.

-In PMSCO, run-files contain a dictionary of parameters for the project object
-which is the main container for calculation parameters, model objects and links to data files.
-An abstract run-file parser reads the run-file,
-constructs the specified project object based on the custom project class
-and assigns the attributes of the project object.
-It's important to note that the parser does not recognize specific data types or classes.
-All specific data handling is done by the instantiated objects, mainly the project class.
+Upon launching PMSCO, a generic parser reads the run-file,
+constructs the project object from the specified custom project class
+and assigns the attributes defined in the run-file.
+Run-files are a sort of script that assign data to the project.
+The parser does not expect specific data types or classes.
+It merely copies data items to the project attributes of the same name.
+The validation and interpretation of the data is up to the project object.

-The parser can handle the following situations:
+The parser handles the following situations:

- Strings, numbers as well as dictionaries and lists of simple objects can be assigned directly to project attributes.
- If the project class defines an attribute as a _property_,
-  the class can execute custom code to import or validate data.
- The parser can instantiate an object from a class in the namespace of the project module
-  and assign its properties.
+- Strings, numbers as well as dictionaries and lists of simple objects are assigned directly to project attributes.
+  If the project class declares a setter method for the attribute, the setter is called.
+  Else, the existing attribute is overwritten.
+  Setters can execute custom code to validate the data value.
+- If specified in the run-file, the parser creates objects from classes in the namespace of the project module
+  and recursively assigns their properties.
+
+\note There are no implicit checks of correctness of the assigned data objects!
+The author of the run-file must make sure that the run-file is compatible with the project class,
+else the calculation process might fail.
+
+There are three ways to check assigned attributes before the calculations are started.
+All have to be implemented explicitly by the project maintainer:
+
+1. The run-file can be validated against a JSON schema before launching PMSCO (see below).
+   Schema validation may catch some obvious mistakes
+   but is not complete in the sense that it cannot guarantee error-free execution of the project code.
+2. The classes used with run-files define property setters.
+   The setters can raise an exception or post an error in the log.
+   (The latter won't stop the calculation process.)
+3. The project class implements a validation method to check and fix important or error-prone attributes.
+   It can write warnings and errors to the log, or raise an exception if the process should be aborted.


 \subsection sec_runfile_general General File Format

-Run-files must adhere to the [JSON](https://en.wikipedia.org/wiki/JSON) format,
-which shares most syntax elements with Python.
+Run-files must adhere to the [JSON](https://en.wikipedia.org/wiki/JSON) format.
 Specifically, a JSON file can declare dictionaries, lists and simple objects
 such as strings, numbers and `null`.
-As one extension to plain JSON, PMSCO ignores line comments starting with a hash `#` or double-slash `//`.
-This can be used to temporarily hide a parameter from the parser.
+The syntax of these basic elements is similar to Python source code (there are some differences, though).

-For example run-files, have a look at the twoatom demo project.
+At the top level, a PMSCO run-file contains a dictionary with up to two items:
+
+1. The _project_ item is the most important, it is described in the following under @ref sec_runfile_project.
+2. The _schedule_ item is an optional section for passing the parameters to a job queue of a computing cluster.
+   See @ref sec_runfile_schedule .
+
+
+\subsection sec_runfile_schema Schema
+
+The structure of a JSON file can be described in a _schema_ file that can be used to check the syntax and structure programmatically.
+The `schema/runfile.schema.json` file of the PMSCO distribution describes the structure of a run-file as well as common properties of the project.
+The schema is, however, rather basic and does not cover all parameters, conditional cases or custom project properties.
+
+A run-file can be easily validated against the schema while editing in the PyCharm IDE.
+Alternatively, the jsonschema validator from the Python distribution can be used on the command line.


 \subsection sec_runfile_project Project Specification

-
-The following minimum run-file demonstrates how to specify the project at the top level:
+The following minimum run-file from the twoatom project demonstrates how to specify the project:

 ~~~~~~{.py}
 {
  "project": {
-    "__module__": "projects.twoatom.twoatom",
+    "__module__": "twoatom",
    "__class__": "TwoatomProject",
    "mode": "single",
-    "output_file": "twoatom0001"
+    "job_name": "twoatom0001"
  }
 }
 ~~~~~~
@@ -67,28 +100,33 @@ Further dictionary items correspond to attributes of the project class.

 The module name is the same as would be used in a Python import statement.
 It must be findable on the Python path.
+Alternatively, a file path may be specified.
 PMSCO ensures that the directory containing the `pmsco` and `projects` sub-directories is on the Python path.
 The class name must be in the namespace of the loaded module.

 As PMSCO starts, it imports the specified module,
 constructs an object of the specified project class,
 and assigns any further items to project attributes.
-In the example above, `twoatom0001` is assigned to the `output_file` property.
-Any attributes not specified in the run-file will remain at their default values
-that were set byt the `__init__` method of the project class.
+In the example above, it creates an object of type `TwoatomProject` from the `twoatom` module
+and assigns `single` to the `mode` property and `twoatom0001` to the `job_name` property.

-Note that parameter names must start with an alphabetic character, else they are ignored.
-This provides another way to temporarily ignore an item from the file besides line comments.
+Any attributes not specified in the run-file remain at their default values
+that were set by the `__init__` constructor of the project class.

+Note that parameter names must start with an alphabetic character, else they are ignored
+(useful for comments as JSON does not have a syntax for comments).
 Also note that PMSCO does not spell-check parameter names.
 The parameter values are just written to the corresponding object attribute.
-If a name is misspelled, the value will be written under the wrong name and missed by the code eventually.
+If a name is misspelled, the value will be written to the wrong attribute.

 PMSCO carries out only some most important checks on the given parameter values.
 Incorrect values may lead to improper operation or exceptions later in the calculations.
+The project class can explicitly check and fix important or error-prone attributes, or report errors.
+
+The following sub-sections describe the most common properties of the project class.


-\subsection sec_runfile_common Common Arguments
+\subsubsection sec_runfile_common Common Arguments

 The following table lists some important parameters controlling the calculations.
 They are declared in the pmsco.projects.Project class.
@@ -96,10 +134,10 @@ They are declared in the pmsco.projects.Project class.
 | Key | Values | Description |
 | --- | --- | --- |
 | mode | `single` (default), `grid`, `swarm`, `genetic`, `table`, `test`, `validate` | Operation mode. `validate` can be used to check the syntax of the run-file, the process exits before starting calculations. |
-| directories | dictionary | This dictionary lists common file paths used in the project. It contains keys such as `home`, `project`, `output` (see documentation of Project class in pmsco.project). Enclosed in curly braces, the keys can be used as placeholders in filenames. |
+| directories | dictionary | This dictionary lists common file paths used in the project. It contains keys such as `home`, `project`, `output` (see documentation of Project class in pmsco.project). |
 | output_dir | path | Shortcut for directories["output"] |
 | data_dir | path | Shortcut for directories["data"] |
-| job_name | string, must be a valid file name | Base name for all produced output files. It is recommended to set a unique name for each calculation run. Do not include a path. The path can be set in _output_dir_. |
+| job_name | string, must be a valid and unique file name (see note below) | Base name for all produced output files. It is recommended to set a unique name for each calculation run. Do not include a path. The path can be set in _output_dir_. |
 | cluster_generator | dictionary | Class name and attributes of the cluster generator. See below. |
 | atomic_scattering_factory | string<br>Default: InternalAtomicCalculator from pmsco.calculators.calculator | Class name of the atomic scattering calculator. This name must be in the namespace of the project module. |
 | multiple_scattering_factory | string<br>Default: EdacCalculator from  pmsco.calculators.edac | Class name of the multiple scattering calculator. This name must be in the namespace of the project module. |
@@ -108,27 +146,71 @@ They are declared in the pmsco.projects.Project class.
 | scans | list of dictionaries | See @ref sec_runfile_scans below. |
 | optimizer_params | dictionary | See @ref sec_runfile_optimizer below. |

+\note The *job name* parameter appears most visibly as the prefix of output file names.
+It is also registered in the `jobs` table of the results database (if used),
+and it is used to identify the job with a job scheduling system.
+For these reasons, it is important that the job name be unique within the respective subsystem.
+Specifically, you need to *provide a new job name each time you start pmsco*, otherwise the job may fail.
+It may be more natural to specify the job name on the command line using the `-o` argument
+rather than changing the run file every time.
+Unfortunately, PMSCO cannot auto-generate, auto-increment or verify the job name.
+
+File names specified in a runfile can include an explicit path or a placeholder.
+Placeholders have the format `${key}` where `key` must be one of the keys of the `directories` dictionary.
+The placeholder will then be replaced by the corresponding value before the calculation starts
+(as a part of the pmsco.project.Project.validate method).
+The `directories` dictionary can be filled by the project class or in the runfile.
+In addition, a number of keys are defined by PMSCO and can be used as placeholders in other directories and file paths.
+
+| Key | Type | Description |
+| --- | --- | --- |
+| data | absolute | Directory with experimental data. Must be set by user if needed. |
+| home | absolute | Home directory of the current user |
+| pmsco | absolute | Directory that contains the loaded pmsco.py module. Note: This may be in a site packages directory. |
+| output | absolute |  Output directory. Must be set by the user. |
+| project | absolute |  Directory where the project module is located. |
+| project_name | relative | Name of the project. By default, the name of the project class. |
+| job_name | relative | Name of the calculation job. |
+| mode | relative | Calculation mode |
+| report | absolute |  Report directory. Defaults to `${output}/report`. |
+| run | absolute |  Directory where the runfile is located (if used). |
+| temp | absolute |  Directory for temporary files. Currently not used. |
+| work | absolute |  Current working directory |
+
+Placeholders of absolute paths must be used at the beginning of a path.
+Relative paths can be used at any position in a file path.
+Some of the keys may have empty values if PMSCO was loaded in a non-standard way.
+For verification of the path resolution, all directories are printed to the log file at WARNING level (default).
+
 The following table lists some common control parameters and metadata
 that affect the behaviour of the program but do not affect the calculation results.
 The job metadata is used to identify and describe a job in the results database if requested.

 | Key | Values | Description |
 | --- | --- | --- |
-| job_tags | list of strings | User-specified job tags (metadata). |
+| db_file | new or existing file path or `:memory:` | SQLite3 database file to receive the optimization results. If the database exists, results are inserted under the given job name. If it doesn't exist, a new file is created. If the attribute is `:memory:`, an in-memory database is used internally and flushed at the end of processing. |
+| job_tags | dictionary of strings | User-specified job tags in key-value format (metadata). |
 | description | string | Description of the calculation job (metadata) |
 | time_limit | decimal number<br>Default: 24. | Wall time limit in hours. The optimizers try to finish before the limit. This cannot be guaranteed, however. |
 | keep_files | list of file categories | Output file categories to keep after the calculation. Multiple values can be specified and must be separated by spaces. By default, cluster and model (simulated data) of a limited number of best models are kept. See @ref sec_runfile_files below. |
 | keep_best | integer number<br>Default: 10 | number of best models for which result files should be kept. |
-| keep_level | integer number<br>Default: 1 | numeric task level down to which files are kept. 1 = scan level, 2 = domain level, etc. |
+| keep_levels | integer number<br>Default: 1 | numeric task level down to which files are kept. 1 = scan, 2 = domain, 3 = emitter. |
 | log_level | DEBUG, INFO, WARNING, ERROR, CRITICAL | Minimum level of messages that should be added to the log. Empty string turns off logging. |
 | log_file | file system path<br>Default: job_name + ".log". | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. The log name is created in the working directory.  |


-\subsection sec_runfile_space Model Space
+\subsubsection sec_runfile_space Model Space

 The `model_space` parameter is a dictionary of model parameters.
 The key is the name of the parameter as used by the cluster and input-formatting code,
 the value is a dictionary holding the `start`, `min`, `max`, `step` values to be used by the optimizer.
+Instead of `min` and `max` you may declare the `width`, which will center the space on the start value.
+
+All parameter values can be declared as numbers or as simple Python expressions in double quotes.
+Expressions are evaluated by the Python `eval` function.
+All functions in the namespace of the project module.
+Note that you have to import the `math` or `numpy` modules in your project module
+if you want to use their functions.

 ~~~~~~{.py}
 {
@@ -137,15 +219,14 @@ the value is a dictionary holding the `start`, `min`, `max`, `step` values to be
    "model_space": {
      "dAB": {
        "start": 2.109,
-        "min": 2.0,
-        "max": 2.25,
+        "min": "2.109 - 0.1",
+        "max": "2.109 + 0.1",
        "step": 0.05
      },
      "pAB": {
-        "start": 15.0,
-        "min": 0.0,
-        "max": 30.0,
-        "step": 1.0
+        "start": "4 * 3.56 / math.sqrt(3.0)",
+        "width": 4.0,
+        "step": 0.5
      },
      // ...
    }
@@ -153,13 +234,18 @@ the value is a dictionary holding the `start`, `min`, `max`, `step` values to be
 }
 ~~~~~~

+Alternatively, the `model_space` can be declared as a `ModelSpace` object.
+However, this shall not be described in detail here.

-\subsection sec_runfile_domains Domains
+
+\subsubsection sec_runfile_domains Domains

 Domains is a list of dictionaries.
 Each dictionary holds keys describing the domain to the cluster and input-formatting code.
 The meaning of these keys is up to the project.

+An example:
+
 ~~~~~~{.py}
 {
  "project": {
@@ -175,10 +261,10 @@ The meaning of these keys is up to the project.

 \subsection sec_runfile_scans Experimental Scan Files

-The pmsco.project.Scan objects used in the calculation cannot be instantiated from the run-file directly.
-Instead, the scans object is a list of scan creators/loaders which specify what to do to create a Scan object.
-The pmsco.project module defines three scan creators: ScanLoader, ScanCreator and ScanKey.
-The following code block shows an example of each of the three:
+The pmsco.scan.Scan objects used in the calculation cannot be instantiated from the run-file directly.
+Instead, the scans object of the run-file is a list of scan creators/loaders which specify how to create a Scan object.
+The pmsco.scan module defines four scan creators: `ScanLoader`, `ScanCreator`, `HoloScanCreator` and `ScanKey`.
+The following code block shows examples:

 ~~~~~~{.py}
 {
@@ -186,7 +272,7 @@ The following code block shows an example of each of the three:
    // ...
    "scans": [
      {
-        "__class__": "pmsco.project.ScanCreator",
+        "__class__": "ScanCreator",
        "filename": "twoatom_energy_alpha.etpai",
        "emitter": "N",
        "initial_state": "1s",
@@ -198,15 +284,34 @@ The following code block shows an example of each of the three:
        }
      },
      {
-        "__class__": "pmsco.project.ScanLoader",
-        "filename": "{project}/twoatom_hemi_250e.etpi",
+        "__class__": "ScanLoader",
+        "filename": "${project}/twoatom_hemi_250e.etpi",
        "emitter": "N",
        "initial_state": "1s",
        "is_modf": false
      },
      {
-        "__class__": "pmsco_project.ScanKey",
-        "key": "Ge3s113tp"
+        "__class__": "HoloScanCreator",
+        "filename": "${project}/twoatom_scan3.etpi",
+        "emitter": "N",
+        "initial_state": "1s",
+        "generator": "pmsco.data.holo_grid",
+        "generator_args": {
+          "theta_start": 90,
+          "theta_step": 1,
+          "theta_range": 90,
+          "phi_start": 0,
+          "phi_range": 360,
+          "phi_refinement": 1
+        },
+        "other_positions": {"e": 250, "a": 0}
+      },
+      {
+        "__class__": "HoloScanCreator",
+        "filename": "${project}/twoatom_scan4.etpi",
+        "emitter": "N",
+        "initial_state": "1s",
+        "other_positions": {"e": 250, "a": 0}
      }
    ]
  }
@@ -214,7 +319,14 @@ The following code block shows an example of each of the three:
 ~~~~~~

 The class name must be specified as it would be called in the custom project module.
-`pmsco.project` must, thus, be imported in the custom project module.
+For the example shown above, the following import statements are necessary in the `pmsco.projects.twoatom.py` module.
+(Other forms of the import statement can be used accordingly.)
+
+~~~~~~{.py}
+import numpy as np
+import pmsco.data
+from pmsco.scan import ScanKey, ScanLoader, ScanCreator, HoloScanCreator
+~~~~~~

 The *ScanCreator* object creates a scan using Numpy array constructors in `positions`.
 In the example above, a two-dimensional rectangular energy-alpha scan grid is created.
@@ -223,6 +335,19 @@ and must return a one-dimensional Numpy `ndarray`.

 The `emitter` and `initial_state` keys define the probed core level.

+The *HoloScanCreator* object creates a /holo scan/, i.e., an angle scan of the theta and phi axes.
+The distribution of the grid points is defined by a separate generator function.
+Usually, the default pmsco.data.holo_grid function is used
+which generates the well-known Osterwalder holo scan
+with constant point density in solid angle and equidistant polar steps.
+
+The `generator` and `generator_args` properties have default values.
+The two example holo scans above are equivalent,
+as the first one above just uses default values explicitly.
+If you want to specify a generator function explicitly,
+you must import it into the namespace of your project.
+E.g. for `pmsco.data.holo_grid` you have to `import pmsco.data`.
+
 The *ScanLoader* object loads a data file, specified under `filename`.
 The filename can include a placeholder which is replaced by the corresponding item from Project.directories.
 Note that some of the directories (including `project`) are pre-set by PMSCO.
@@ -232,13 +357,14 @@ The `is_modf` key indicates whether the file contains a modulation function (`tr
 In the latter case, the modulation function is calculated after loading.

 The *ScanKey* is the shortest scan specification in the run-file.
-It is a shortcut to a complete scan description in `scan_dict` dictionary in the project object.
+It should not be used in new projects as it uses hard-coded data links in program code.
+
+ScanKey is a shortcut to a complete scan dictionary in the project object.
 The `scan_dict` must be set up in the `__init__` method of the project class.
 The `key` item specifies which key of `scan_dict` should be used to create the Scan object.
-
 Each item of `scan_dict` holds a dictionary
-that in turn holds the attributes for either a `ScanCreator` or a `ScanLoader`.
-If it contains a `positions` key, it represents a `ScanCreator`, else a `ScanLoader`.
+that holds the attributes for either a `ScanCreator`, `HoloScanCreator` or a `ScanLoader`.
+If it contains a `positions` (`other_positions`) key, it represents a `ScanCreator` (`HoloScanCreator`), else a `ScanLoader`.


 \subsection sec_runfile_optimizer Optimizer Parameters
@@ -247,9 +373,11 @@ The `optimizer_params` is a dictionary holding one or more of the following item

 | Key | Values | Description |
 | --- | --- | --- |
-| pop-size | integer<br>The default value is the greater of 4 or the number of parallel calculation processes. | Population size (number of particles) in swarm and genetic optimization mode. |
-| seed-file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.project.Project.seed_file. |
-| table-file | file system path | Name of the model table file in table scan mode. |
+| pop_size | integer<br>The default value is the greater of 4 or the number of parallel calculation processes. | Population size (number of particles) in swarm and genetic optimization mode. |
+| seed_file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.optimizers.population.Population.seed_from_file. |
+| seed_limit | integer | Number of seed models to import. |
+| recalc_seed | true or false | If true, the seed models are calculated. Otherwise, the R-factor from the seed file is used as result. Use true if the seed file contains no or outdated R-factors. |
+| table_source | file system path | Name of the model table file in table scan mode. |


 \subsubsection sec_runfile_files File Categories
@@ -286,6 +414,45 @@ you have to add the file categories that you want to keep, e.g.,
 Do not specify `rfac` alone as this will effectively not return any file.


+\subsection sec_runfile_reports Reports
+
+Run-time graphical reports are configured in the `reports` section.
+The section is organized as a list of dictionaries.
+Each dictionary sets up a specific report.
+For example:
+
+~~~~~~{.py}
+{
+  "project": {
+    // ...
+    "reports": [
+      {
+        "__class__": "ConvergencePlot",
+        "filename_format": "${base}.convergence",
+        "title_format": "my_calc"
+      },
+      {
+        "__class__": "SwarmPlot",
+        "filename_format": "${base}-${param0}-${param1}-${gen}.swarmplot",
+        "title_format": "my_calc ${param0}-${param1} gen ${gen}",
+        "params": [["A", "B"], ["C", "D"]]
+      }
+    ]
+  }
+}
+~~~~~~
+
+The class name must be specified as it would be called in the custom project module.
+For the example above, the import section of the project must include:
+
+~~~~~~{.py}
+from pmsco.reports.convergence import ConvergencePlot
+from pmsco.reports.swarm import SwarmPlot
+~~~~~~
+
+For details on reports and their configuration, see @ref sec_reports.
+
+
 \subsection sec_runfile_schedule Job Scheduling

 To submit a job to a resource manager such as Slurm, add a `schedule` section to the run file
@@ -306,7 +473,7 @@ To submit a job to a resource manager such as Slurm, add a `schedule` section to
    "__module__": "projects.twoatom.twoatom",
    "__class__": "TwoatomProject",
    "mode": "single",
-    "output_file": "{home}/pmsco/twoatom0001",
+    "output_file": "${home}/pmsco/twoatom0001",
    ...
  }
 }
@@ -314,10 +481,16 @@ To submit a job to a resource manager such as Slurm, add a `schedule` section to

 In the same way as for the project, the `__module__` and `__class__` keys select the class that handles the job submission.
 In this example, it is pmsco.schedule.PsiRaSchedule which is tied to the Ra cluster at PSI.
-For other machines, you can sub-class one of the classes in the pmsco.schedule module and include it in your project module.
+For other machines, you can sub-class one of the classes in the pmsco.schedule module and include it in your project package.
+The derived job submission class must prepare the code, run file and job script, and submit the job to the queue.
+It should copy the code to the calculation directory to avoid version conflicts if the user continues to edit the code.
+Compilation of the code can be done before submission or as a part of the job script.
+
+@note It is difficult to check the run file and code against errors that may abort job execution.
+New code and run files should be tested with a modified, fast-running calculation.

 The parameters of pmsco.schedule.PsiRaSchedule are as follows.
-Some of them are also used in other schedule classes or may have different types or ranges.
+Information about the computing nodes and partitions can be printed by the `sinfo -Nel` and `sinfo --long` commands.

 | Key | Values | Description |
 | --- | --- | --- |
@@ -325,9 +498,9 @@ Some of them are also used in other schedule classes or may have different types
 | tasks_per_node | integer: 1..24, 32 | Number of tasks (CPU cores on Ra) per node. Jobs with less than 24 tasks are assigned to the shared partition. |
 | wall_time | string: [days-]hours[:minutes[:seconds]] <br> dict: with any combination of days, hours, minutes, seconds | Maximum run time (wall time) of the job. |
 | manual | bool | Manual submission (true) or automatic submission (false). Manual submission allows you to inspect the job files before submission. |
-| enabled | bool | Enable scheduling (true). Otherwise, the calculation is started directly (false).
+| enabled | bool | Enable scheduling (true). Otherwise, the calculation is started directly (false). |

@note The calculation job may run in a different working directory than the current one.
 It is important to specify absolute data and output directories in the run file (project/directories section).
-
+Placeholders like `${home}` can be used to make run files portable, cf. @ref sec_run_dirs.
 */
--- a/docs/src/uml/database.puml
+++ b/docs/src/uml/database.puml
@@ -58,6 +58,8 @@ domain
 emit
 region
 rfac
+timestamp
+secs
 }

 class Param << (T,orchid) >> {
@@ -74,6 +76,7 @@ param_id
 model_id
 ..
 value
+delta
 }

 Project "1" *-- "*" Job
--- a/docs/src/uml/scan-classes.puml
+++ b/docs/src/uml/scan-classes.puml
@@ -0,0 +1,86 @@
+@startuml
+'https://plantuml.com/class-diagram
+
+class ConfigurableObject
+
+class Scan {
+        filename: str
+        raw_data: numpy.ndarray
+        dtype: numpy.dtype
+        modulation: numpy.ndarray
+        modulation_func: Callable
+        modulation_args: Dict
+        rfactor_func: Callable
+        rfactor_args: Dict
+        mode: List
+        emitter: str
+        initial_state: str
+        positions: Dict
+
+        __init__()
+        copy()
+        load()
+        define_scan()
+        import_scan_file()
+        analyse_raw_data()
+        generate_holo_scan()
+}
+
+class ScanSpec {
+        filename: str
+        emitter: str
+        initial_state: str
+        modulation_func: Callable
+        modulation_args: Dict
+        rfactor_func: Callable
+        rfactor_args: Dict
+
+        __init__()
+        load()
+}
+
+class ScanKey {
+        project: pmsco.project.Project
+        key: str
+
+        __init__()
+        load()
+}
+
+class ScanLoader {
+        is_modf: bool
+        patch: Dict
+
+        __init__()
+        load()
+}
+
+class ScanCreator {
+        positions: Dict
+
+        __init__()
+        load()
+}
+
+class HoloScanCreator {
+        generator: Callable
+        generator_args: Dict
+
+        __init__()
+        load()
+        set_property()
+}
+
+ConfigurableObject <|-- ScanSpec
+ConfigurableObject <|-- ScanKey
+ScanSpec <|-- ScanCreator
+ScanSpec <|-- ScanLoader
+ScanSpec <|-- HoloScanCreator
+ScanKey --> ScanCreator: creates
+ScanKey --> HoloScanCreator: creates
+ScanKey --> ScanLoader: creates
+ScanLoader --> Scan: creates
+ScanCreator --> Scan: creates
+HoloScanCreator --> Scan: creates
+
+@enduml
--- a/extras/docker-docs/Dockerfile
+++ b/extras/docker-docs/Dockerfile
@@ -0,0 +1,30 @@
+FROM python:3.12
+
+# docker container to build PMSCO documentation
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    default-jre \
+    doxygen \
+    gawk \
+    git \
+    graphviz \
+    pandoc \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir \
+    doxypypy \
+    meson \
+    meson-python \
+    ninja \
+    pynose
+
+RUN wget -O plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
+ENV PLANTUML_JAR_PATH=/app/plantuml.jar
+
+COPY . .
+
+CMD ["sh"]
+
--- a/extras/singularity/singularity_python3
+++ b/extras/singularity/singularity_python3
@@ -1,5 +1,5 @@
 BootStrap: debootstrap
-OSVersion: bionic
+OSVersion: focal
 MirrorURL: http://ch.archive.ubuntu.com/ubuntu/

 %help
@@ -32,7 +32,7 @@ path/to/pmsco must point to the directory that contains the __main__.py file.
 %labels
    Maintainer Matthias Muntwiler
    Maintainer_Email matthias.muntwiler@psi.ch
-    Python_Version 3
+    Python_Version 3.8

 %environment
    export LC_ALL=C
@@ -43,7 +43,7 @@ path/to/pmsco must point to the directory that contains the __main__.py file.

 %post
    export LC_ALL=C
-    export PYTHON_VERSION=3
+    export PYTHON_VERSION=3.8
    export CONDA_ROOT=/opt/miniconda
    export PLANTUML_ROOT=/opt/plantuml

@@ -63,33 +63,44 @@ path/to/pmsco must point to the directory that contains the __main__.py file.
        libblas-dev \
        liblapack-dev \
        libopenmpi-dev \
-        make \
+        nano \
        openmpi-bin \
        openmpi-common \
        sqlite3 \
        wget
    apt-get clean

-    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
-    bash ~/miniconda.sh -b -p ${CONDA_ROOT}
+    wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O ~/miniforge3.sh
+    bash ~/miniforge3.sh -b -p ${CONDA_ROOT}

-    . ${CONDA_ROOT}/bin/activate
+    . ${CONDA_ROOT}/etc/profile.d/conda.sh
+    conda activate base
    conda create -q --yes -n pmsco python=${PYTHON_VERSION}
-    conda activate pmsco
-    conda install -q --yes -n pmsco \
-        pip \
-        "numpy>=1.13" \
-        scipy \
-        ipython \
-        matplotlib \
-        nose \
-        mock \
+    conda install -q --yes -n pmsco -c conda-forge \
+        commentjson \
+        fasteners \
        future \
-        statsmodels \
-        swig \
        gitpython
+        ipython \
+        ipykernel \
+        jsonschema \
+        h5py \
+        matplotlib \
+        meson \
+        mock \
+        pynose \
+        "numpy>=1.13" \
+        pandas \
+        periodictable \
+        pip \
+        scikit-learn \
+        scipy \
+        seaborn \
+        sqlalchemy \
+        statsmodels \
+        swig
    conda clean --all -y
-    pip install periodictable attrdict commentjson fasteners mpi4py doxypypy
+    ${CONDA_ROOT}/envs/pmsco/bin/pip install meson-python mpi4py netgraph networkx doxypypy

    mkdir ${PLANTUML_ROOT}
    wget -O ${PLANTUML_ROOT}/plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
@@ -111,11 +122,16 @@ path/to/pmsco must point to the directory that contains the __main__.py file.
    git checkout master
    git checkout -b ${SINGULAR_BRANCH}

-    make all
-    nosetests -w tests/
+    meson setup build
+    cd build
+    meson compile
+    meson install
+    meson test

 %apprun compile
    . ${CONDA_ROOT}/etc/profile.d/conda.sh
    conda activate pmsco
-    make all
-    nosetests
+    cd build
+    meson compile
+    meson install
+    meson test
--- a/55
+++ b/55
@@ -1,55 +0,0 @@
-SHELL=/bin/sh
-
-# makefile for all programs, modules and documentation
-#
-# required libraries for LOESS module: libblas, liblapack, libf2c
-# (you may have to set soft links so that linker finds them)
-#
-# on shared computing systems (high-performance clusters) 
-# you may have to switch the environment before running this script.
-#
-# note: the public distribution does not include third-party code
-# (EDAC in particular) because of incompatible license terms.
-# please obtain such code from the original authors
-# and copy it to the proper directory before compilation.
-#
-# the MSC and MUFPOT programs are currently not used.
-# they are not built by the top-level targets all and bin.
-#
-# the make system uses the compiler executables of the current environment.
-# to override the executables, you may set the following variables.
-# to switch between python versions, however, the developers recommend miniconda.
-#
-# PYTHON = python executable (default: python)
-# PYTHONOPTS = python options (default: none)
-# CC = C and Fortran compiler executable (default: gcc)
-# CCOPTS = C compiler options (default: none)
-# CXX = C++ compiler executable (default: g++)
-# CXXOPTS = C++ compiler options (default: none)
-#
-# make all PYTHON=/usr/bin/python2.7
-#
-# or:
-#
-# export PYTHON=/usr/bin/python2.7
-# make all
-#
-
-.PHONY: all bin docs clean edac loess msc mufpot phagen
-
-PMSCO_DIR = pmsco
-DOCS_DIR = docs
-
-all: edac loess phagen docs
-
-bin: edac loess phagen
-
-edac loess msc mufpot phagen:
-	$(MAKE) -C $(PMSCO_DIR)
-
-docs:
-	$(MAKE) -C $(DOCS_DIR)
-
-clean:
-	$(MAKE) -C $(PMSCO_DIR) clean
-	$(MAKE) -C $(DOCS_DIR) clean
--- a/pmsco/init.py
+++ b/pmsco/init.py
--- a/pmsco/calculators/init.py
+++ b/pmsco/calculators/init.py
--- a/pmsco/calculators/edac.py
+++ b/pmsco/calculators/edac.py
@@ -11,23 +11,23 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import logging
 import numpy as np
 import os

 import pmsco.calculators.calculator as calculator
-from pmsco.compat import open
 import pmsco.data as md
 import pmsco.cluster as mc
-import pmsco.edac.edac as edac
 from pmsco.helpers import BraceMessage as BMsg

 logger = logging.getLogger(__name__)

+try:
+    import edac
+except (ImportError, ModuleNotFoundError) as e:
+    edac = None
+    logger.critical("Error importing the edac package.", exc_info=e)
+

 class EdacCalculator(calculator.Calculator):
    def write_input_file(self, params, scan, filepath):
@@ -59,7 +59,7 @@ class EdacCalculator(calculator.Calculator):
        """
        files = {}

-        with open(filepath, "w") as f:
+        with open(filepath, "wt", encoding="latin1") as f:
            f.write("verbose off\n")
            f.write("cluster input {0}\n".format(params.cluster_file))
            f.write("emitters {0:d} l(A)\n".format(len(params.emitters)))
@@ -219,8 +219,10 @@ class EdacCalculator(calculator.Calculator):
        dat_filename = out_filename
        if params.fixed_cluster:
            etpi_filename = base_filename + ".etpai"
+            dtype = md.DTYPE_ETPAI
        else:
            etpi_filename = base_filename + ".etpi"
+            dtype = md.DTYPE_ETPI

        # fix EDAC particularities
        params.cluster_file = clu_filename
@@ -246,13 +248,10 @@ class EdacCalculator(calculator.Calculator):
        result_etpi['e'] -= params.work_function

        if 't' in scan.mode and 'p' in scan.mode:
-            hemi_tpi = scan.raw_data.copy()
-            hemi_tpi['i'] = 0.0
-            try:
-                hemi_tpi['s'] = 0.0
-            except ValueError:
-                pass
-            result_etpi = md.interpolate_hemi_scan(result_etpi, hemi_tpi)
+            dest_tpi = np.zeros(scan.raw_data.shape, dtype)
+            dest_tpi['t'] = scan.thetas
+            dest_tpi['p'] = scan.phis
+            result_etpi = md.interpolate_hemi_scan(result_etpi, dest_tpi)

        if params.fixed_cluster:
            expected_shape = max(scan.energies.shape[0], 1) * max(scan.alphas.shape[0], 1)
--- a/pmsco/calculators/msc.py
+++ b/pmsco/calculators/msc.py
@@ -18,7 +18,7 @@ from __future__ import division
 from __future__ import print_function
 import pmsco.calculators.calculator as calculator
 import pmsco.data as md
-import pmsco.msc.msc as msc
+import subprojects.msc.msc as msc
 import logging

 logger = logging.getLogger(__name__)
@@ -27,36 +27,36 @@ logger = logging.getLogger(__name__)
 class MscCalculator(calculator.Calculator):
    def write_input_file(self, params, filepath):
        with open(filepath, "w") as f:
-            f.write(" %s\n" % (params.title) )
-            f.write(" %s\n" % (params.comment) )
+            f.write(" %s\n" % (params.title))
+            f.write(" %s\n" % (params.comment))
            l_init = "spdf".index(params.initial_state[1])
-            f.write(" %4u\n" % (l_init) )
-            f.write(" %4u\n" % (params.spherical_order) )
-            f.write(" %s\n" % (params.polarization) )
-            f.write(" %4u\n" % (params.scattering_level) )
-            f.write("  %7.2f%7.2f\n" % (params.fcut, params.cut) )
-            f.write(" %12.6f\n" % (params.angular_resolution) )
-            f.write(" %12.6f\n" % (params.lattice_constant) )
-            f.write(" %12.6f\n" % (params.z_surface) )
-            f.write(" %4u\n" % (params.atom_types) )
+            f.write(" %4u\n" % (l_init))
+            f.write(" %4u\n" % (params.spherical_order))
+            f.write(" %s\n" % (params.polarization))
+            f.write(" %4u\n" % (params.scattering_level))
+            f.write("  %7.2f%7.2f\n" % (params.fcut, params.cut))
+            f.write(" %12.6f\n" % (params.angular_resolution))
+            f.write(" %12.6f\n" % (params.lattice_constant))
+            f.write(" %12.6f\n" % (params.z_surface))
+            f.write(" %4u\n" % (params.atom_types))
            for iat in range(params.atom_types):
                f.write(" %4u %s\n" % (params.atomic_number[iat], "..."))
                f.write(" %4u %s\n" % (params.atomic_number[iat], params.phase_file[iat]))
-                f.write(" %12.6f\n" % (params.msq_displacement[iat]) )
-            f.write(" %12.6f\n" % (params.planewave_attenuation) )
-            f.write(" %12.6f\n" % (params.inner_potential) )
-            f.write(" %12.6f\n" % (params.symmetry_range) )
-            f.write(" %12.6f\n" % (params.polar_incidence_angle) )
-            f.write(" %12.6f\n" % (params.azimuthal_incidence_angle) )
-            f.write(" %s\n" % (params.vibration_model) )
-            f.write(" %12.6f\n" % (params.substrate_atomic_mass) )
-            f.write(" %12.6f\n" % (params.experiment_temperature) )
-            f.write(" %12.6f\n" % (params.debye_temperature) )
-            f.write(" %12.6f\n" % (params.debye_wavevector) )
-            f.write(" %12.6f%7.3f\n" % (params.rme_minus_value, params.rme_minus_shift) )
-            f.write(" %12.6f%7.3f\n" % (params.rme_plus_value, params.rme_plus_shift) )
-            f.write(" %4u\n" % (1) )
-            f.write(" %4u %12.6f\n" % (1, 1.0) )
+                f.write(" %12.6f\n" % (params.msq_displacement[iat]))
+            f.write(" %12.6f\n" % (params.planewave_attenuation))
+            f.write(" %12.6f\n" % (params.inner_potential))
+            f.write(" %12.6f\n" % (params.symmetry_range))
+            f.write(" %12.6f\n" % (params.polar_incidence_angle))
+            f.write(" %12.6f\n" % (params.azimuthal_incidence_angle))
+            f.write(" %s\n" % (params.vibration_model))
+            f.write(" %12.6f\n" % (params.substrate_atomic_mass))
+            f.write(" %12.6f\n" % (params.experiment_temperature))
+            f.write(" %12.6f\n" % (params.debye_temperature))
+            f.write(" %12.6f\n" % (params.debye_wavevector))
+            f.write(" %12.6f%7.3f\n" % (params.rme_minus_value, params.rme_minus_shift))
+            f.write(" %12.6f%7.3f\n" % (params.rme_plus_value, params.rme_plus_shift))
+            f.write(" %4u\n" % (1))
+            f.write(" %4u %12.6f\n" % (1, 1.0))

    def run(self, params, cluster, scan, output_file):
        """
--- a/pmsco/calculators/phagen/init.py
+++ b/pmsco/calculators/phagen/init.py
--- a/pmsco/calculators/phagen/makefile
+++ b/pmsco/calculators/phagen/makefile
@@ -1,44 +0,0 @@
-SHELL=/bin/sh
-
-# makefile for PHAGEN program and module
-#
-# the PHAGEN source code is not included in the public distribution.
-# please obtain the PHAGEN code from the original author,
-# and copy it to this directory before compilation.
-#
-# see the top-level makefile for additional information.
-
-.SUFFIXES:
-.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
-.PHONY: all clean phagen
-
-FC?=gfortran
-FCOPTS?=-std=legacy
-F2PY?=f2py
-F2PYOPTS?=--f77flags=-std=legacy --f90flags=-std=legacy
-CC?=gcc
-CCOPTS?=
-SWIG?=swig
-SWIGOPTS?=
-PYTHON?=python
-PYTHONOPTS?=
-PYTHONINC?=
-PYTHON_CONFIG = ${PYTHON}-config
-PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
-PYTHON_EXT_SUFFIX ?= $(shell ${PYTHON_CONFIG} --extension-suffix)
-
-all: phagen
-
-phagen: phagen.exe phagen$(PYTHON_EXT_SUFFIX)
-
-phagen.exe: phagen_scf.f msxas3.inc msxasc3.inc
-	$(FC) $(FCOPTS) -o phagen.exe phagen_scf.f
-
-phagen.pyf: | phagen_scf.f
-	$(F2PY) -h phagen.pyf -m phagen phagen_scf.f only: libmain
-
-phagen$(PYTHON_EXT_SUFFIX): phagen_scf.f phagen.pyf msxas3.inc msxasc3.inc
-	$(F2PY) -c $(F2PYOPTS) -m phagen phagen.pyf phagen_scf.f
-
-clean:
-	rm -f *.so *.o *.exe
--- a/pmsco/calculators/phagen/phagen_scf.f.patch
+++ b/pmsco/calculators/phagen/phagen_scf.f.patch
@@ -1,102 +0,0 @@
--- phagen_scf.orig.f	2019-06-05 16:45:52.977855859 +0200
-+++ phagen_scf.f	2019-05-09 16:32:35.790286429 +0200
-@@ -174,6 +174,99 @@
-  1100 format(//,1x,' ** phagen terminated normally ** ',//)
-       end
- 
-+
-+c-----------------------------------------------------------------------
-+      subroutine libmain(infile,outfile,etcfile)
-+c      main calculation routine
-+c      entry point for external callers
-+c
-+c      infile: name of parameter input file
-+c
-+c      outfile: base name of output files
-+c        output files with endings .list, .clu, .pha, .tl, .rad
-+c        will be created
-+c-----------------------------------------------------------------------
-+      implicit real*8 (a-h,o-z)
-+c
-+      include 'msxas3.inc'
-+      include 'msxasc3.inc'
-+
-+      character*60 infile,outfile,etcfile
-+      character*70 listfile,clufile,tlfile,radfile,phafile
-+
-+c
-+c.. constants
-+      antoau  = 0.52917715d0
-+      pi      = 3.141592653589793d0
-+      ev      = 13.6058d0
-+      zero    = 0.d0
-+c.. threshold for linearity
-+      thresh  = 1.d-4
-+c.. fortran io units
-+      idat = 5
-+      iwr = 6
-+      iphas = 30
-+      iedl0 = 31
-+      iwf = 32
-+      iof = 17
-+
-+      iii=LnBlnk(outfile)+1
-+      listfile=outfile
-+      listfile(iii:)='.list'
-+      clufile=outfile
-+      clufile(iii:)='.clu'
-+      phafile=outfile
-+      phafile(iii:)='.pha'
-+      tlfile=outfile
-+      tlfile(iii:)='.tl'
-+      radfile=outfile
-+      radfile(iii:)='.rad'
-+
-+      open(idat,file=infile,form='formatted',status='old')
-+      open(iwr,file=listfile,form='formatted',status='unknown')
-+      open(10,file=clufile,form='formatted',status='unknown')
-+      open(35,file=tlfile,form='formatted',status='unknown')
-+      open(55,file=radfile,form='formatted',status='unknown')
-+      open(iphas,file=phafile,form='formatted',status='unknown')
-+
-+      open(iedl0,form='unformatted',status='scratch')
-+      open(iof,form='unformatted',status='scratch')
-+      open(unit=21,form='unformatted',status='scratch')
-+      open(60,form='formatted',status='scratch')
-+      open(50,form='formatted',status='scratch')
-+      open(unit=13,form='formatted',status='scratch')
-+      open(unit=14,form='formatted',status='scratch')
-+      open(unit=11,status='scratch')
-+      open(unit=iwf,status='scratch')
-+      open(unit=33,status='scratch')
-+      open(unit=66,status='scratch')
-+
-+      call inctrl
-+      call intit(iof)
-+      call incoor
-+      call calphas
-+
-+      close(idat)
-+      close(iwr)
-+      close(10)
-+      close(35)
-+      close(55)
-+      close(iphas)
-+      close(iedl0)
-+      close(iof)
-+      close(60)
-+      close(50)
-+      close(13)
-+      close(14)
-+      close(11)
-+      close(iwf)
-+      close(33)
-+      close(66)
-+      close(21)
-+
-+      endsubroutine
-+
-+
-       subroutine inctrl
-       implicit real*8 (a-h,o-z)
-       include 'msxas3.inc'
--- a/pmsco/calculators/phagen/runner.py
+++ b/pmsco/calculators/phagen/runner.py
@@ -2,33 +2,41 @@
@package pmsco.calculators.phagen.runner
 Natoli/Sebilleau PHAGEN interface

-this module runs the PHAGEN program to calculate scattering factors and radial matrix element.
+This module runs the PHAGEN program to calculate scattering factors and radial matrix elements.
+
+Requires PHAGEN version 2.2 from https://git.ipr.univ-rennes.fr/epsi/msspec_python3.git (contained in subprojects).

@author Matthias Muntwiler

-@copyright (c) 2015-19 by Paul Scherrer Institut @n
+@copyright (c) 2015-23 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import logging
 import os
 import shutil
 import tempfile
+from pathlib import Path
+import sys

 from pmsco.calculators.calculator import AtomicCalculator
-from pmsco.calculators.phagen.phagen import libmain
+
 from pmsco.calculators.phagen.translator import Translator
 import pmsco.cluster
+from pmsco.helpers import stdout_redirected
+import pmsco.project

 logger = logging.getLogger(__name__)

+try:
+    import phagen
+except (ImportError, ModuleNotFoundError) as e:
+    phagen = None
+    logger.critical("Error importing the phagen package.", exc_info=e)
+

 class PhagenCalculator(AtomicCalculator):
    """
@@ -37,7 +45,11 @@ class PhagenCalculator(AtomicCalculator):
    this produces scatterer, radial matrix element and cluster files for EDAC.
    """

-    def run(self, params, cluster, scan, output_file):
+    def run(self,
+            params: pmsco.project.CalculatorParams,
+            cluster: pmsco.cluster.Cluster,
+            scan: pmsco.project.Scan,
+            output_file: str):
        """
        create the input file, run PHAGEN, and translate the output to EDAC format.

@@ -85,19 +97,16 @@ class PhagenCalculator(AtomicCalculator):
        phagen_cluster = pmsco.cluster.Cluster()

        files = {}
-        prev_wd = os.getcwd()
+        prev_wd = Path.cwd()
        try:
            with tempfile.TemporaryDirectory() as temp_dir:
-                os.chdir(temp_dir)
-                os.mkdir("div")
-                os.mkdir("div/wf")
-                os.mkdir("plot")
-                os.mkdir("data")
-
-                # prepare input for phagen
-                infile = "phagen.in"
-                outfile = "phagen.out"
+                temp_path = Path(temp_dir)
+                in_path = temp_path / "input"
+                in_path.mkdir(exist_ok=True)
+                out_path = temp_path / "output"
+                out_path.mkdir(exist_ok=True)

+                infile = in_path / "input.ms"
                try:
                    transl.write_input(infile)
                    report_infile = os.path.join(prev_wd, output_file + ".phagen.in")
@@ -106,12 +115,22 @@ class PhagenCalculator(AtomicCalculator):
                except IOError:
                    logger.warning("error writing phagen input file {fi}.".format(fi=infile))

-                # call phagen
-                libmain(infile, outfile)
+                report_listfile = os.path.join(prev_wd, output_file + ".phagen.list")
+                files[report_listfile] = "log"
+
+                # call phagen, redirect stdout (unit 6)
+                os.chdir(out_path)
+                with open(report_listfile, "wb") as f:
+                    with stdout_redirected(f):
+                        phagen.phagen()
+
+                phafile = out_path / "div" / "phases.dat"
+                radfile = out_path / "fort.55"
+                # tlfile = out_path / "fort.35"
+                clufile = out_path / "clus" / "clus.out"

                # collect results
                try:
-                    phafile = outfile + ".pha"
                    transl.parse_phagen_phase(phafile)
                    report_phafile = os.path.join(prev_wd, output_file + ".phagen.pha")
                    shutil.copy(phafile, report_phafile)
@@ -120,7 +139,6 @@ class PhagenCalculator(AtomicCalculator):
                    logger.error("error loading phagen phase file {fi}".format(fi=phafile))

                try:
-                    radfile = outfile + ".rad"
                    transl.parse_radial_file(radfile)
                    report_radfile = os.path.join(prev_wd, output_file + ".phagen.rad")
                    shutil.copy(radfile, report_radfile)
@@ -129,31 +147,23 @@ class PhagenCalculator(AtomicCalculator):
                    logger.error("error loading phagen radial file {fi}".format(fi=radfile))

                try:
-                    clufile = outfile + ".clu"
                    phagen_cluster.load_from_file(clufile, pmsco.cluster.FMT_PHAGEN_OUT)
                except IOError:
                    logger.error("error loading phagen cluster file {fi}".format(fi=clufile))

-                try:
-                    listfile = outfile + ".list"
-                    report_listfile = os.path.join(prev_wd, output_file + ".phagen.list")
-                    shutil.copy(listfile, report_listfile)
-                    files[report_listfile] = "log"
-                except IOError:
-                    logger.error("error loading phagen list file {fi}".format(fi=listfile))
-
        finally:
            os.chdir(prev_wd)

        # write edac files
        scatfile = output_file + "_{}.scat"
        scatfiles = transl.write_edac_scattering(scatfile)
-        params.phase_files = {c: scatfiles[c] for c in scatfiles}
-        files.update({scatfiles[c]: "atomic" for c in scatfiles})
+        params.phase_files = scatfiles.copy()
+        files.update({f: "atomic" for f in params.phase_files.values()})

-        rmefile = output_file + ".rme"
-        transl.write_edac_emission(rmefile)
-        files[rmefile] = "atomic"
+        rmefile = output_file + "_{}.rme"
+        rmefiles = transl.write_edac_emission(rmefile)
+        params.rme_files = rmefiles.copy()
+        files.update({f: "atomic" for f in params.rme_files.values()})

        cluster.update_atoms(phagen_cluster, {'c'})
        clufile = output_file + ".pmsco.clu"
--- a/pmsco/calculators/phagen/translator.py
+++ b/pmsco/calculators/phagen/translator.py
@@ -13,14 +13,12 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
+import logging
 import numpy as np

 from pmsco.cluster import Cluster
-from pmsco.compat import open
+
+logger = logging.getLogger(__name__)

 ## rydberg energy in electron volts
 ERYDBERG = 13.6056923
@@ -59,7 +57,7 @@ class TranslationParams(object):
        self.initial_state = "1s"
        self.binding_energy = 0.
        self.cluster = None
-        self.kinetic_energies = np.empty(0, dtype=np.float)
+        self.kinetic_energies = np.empty(0, dtype=float)

    @property
    def l_init(self):
@@ -287,7 +285,7 @@ class Translator(object):
            self.write_cluster(f)
            self.write_ionicity(f)
        else:
-            with open(f, "w") as fi:
+            with open(f, "wt", encoding="latin1") as fi:
                self.write_input(fi)

    def parse_phagen_phase(self, f):
@@ -392,7 +390,7 @@ class Translator(object):
                    f.write(" 0 0")
                f.write("\n")
        else:
-            with open(f, "w") as fi:
+            with open(f, "wt", encoding="latin1") as fi:
                self.write_edac_scattering_file(fi, scat)

    def write_edac_phase_file(self, f, scat):
@@ -426,26 +424,36 @@ class Translator(object):
                    f.write(" 0")
                f.write("\n")
        else:
-            with open(f, "w") as fi:
+            with open(f, "wt", encoding="latin1") as fi:
                self.write_edac_phase_file(fi, scat)

    def parse_radial_file(self, f):
        """
-        parse the radial matrix element output file from phagen.
+        parse the radial matrix element output file from phagen version 2.2.
+
+        the file contains 7 header lines and one data line per requested energy.
+        the data line contains real and imaginary parts of the matrix elements.
+        the first four columns contain the electric dipole transitions Rd(li --> li - 1) and Rd(li --> li + 1),
+        followed by higher orders that we do not use here.

        @param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).

        @return: None
+
+        @raise ValueError if the file is in a wrong format.
        """
-        dt = [('ar', 'f8'), ('ai', 'f8'), ('br', 'f8'), ('bi', 'f8')]
-        data = np.atleast_1d(np.genfromtxt(f, dtype=dt))
+        data = np.atleast_2d(np.genfromtxt(f, skip_header=7))
+        if data.shape[0] != self.params.kinetic_energies.shape[0] or data.shape[1] < 4:
+            raise ValueError(f"Unexpected array size of Phagen radial matrix elements output: "
+                             f"expected ({self.params.kinetic_energies.shape[0]}, >= 4), received {data.shape}")

-        self.emission = np.resize(self.emission, data.shape)
+        self.emission = np.resize(self.emission, data.shape[0:1])
        emission = self.emission
-        emission['dw'] = data['ar'] + 1j * data['ai']
-        emission['up'] = data['br'] + 1j * data['bi']
+        emission['e'] = self.params.kinetic_energies
+        emission['dw'] = data[:, 0] + 1j * data[:, 1]
+        emission['up'] = data[:, 2] + 1j * data[:, 3]

-    def write_edac_emission(self, f):
+    def write_edac_emission_file(self, f):
        """
        write the radial photoemission matrix element in EDAC format.

@@ -472,5 +480,24 @@ class Translator(object):
                f.write(" {0:.6f} {1:.6f}".format(item['dw'].real, item['dw'].imag))
                f.write("\n")
        else:
-            with open(f, "w") as of:
-                self.write_edac_emission(of)
+            with open(f, "wt", encoding="latin1") as of:
+                self.write_edac_emission_file(of)
+
+    def write_edac_emission(self, filename_format):
+        """
+        write the radial photoemission matrix element in EDAC format.
+
+        requires self.scattering, self.emission, self.params.kinetic_energies and self.params.initial_state.
+
+        @param filename_format: file name including, optionally, a placeholder {} for the atom class.
+            since phagen calculates only one emitter, the placeholder is not necessary.
+
+        @return: dictionary that maps atom classes to file names.
+            since phagen calculates only one emitter, this dictionary will contain just one entry.
+        """
+        scat = self.scattering
+        atom = scat['a'][0]
+        f = filename_format.format(atom)
+        self.write_edac_emission_file(f)
+        files = {atom: f}
+        return files
--- a/pmsco/cluster.py
+++ b/pmsco/cluster.py
@@ -71,14 +71,14 @@ FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
 FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']

 ## numpy.array datatype of cluster for EDAC cluster file input/output
-DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('c', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
+DTYPE_CLUSTER_EDAC = [('i', 'i4'), ('c', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
 ## file format of EDAC cluster file
 FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
 ## field (column) names of EDAC cluster file
 FIELDS_CLUSTER_EDAC = ['i', 'c', 'x', 'y', 'z']

 ## numpy.array datatype of cluster for XYZ file input/output
-DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
+DTYPE_CLUSTER_XYZ = [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
 ## file format of XYZ cluster file
 FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
 ## field (column) names of XYZ cluster file
@@ -306,7 +306,7 @@ class Cluster(object):
        """
        add bulk atoms to the cluster.
        
-        the lattice is expanded up to the limits given by 
+        the lattice is expanded up to the limits given by
        self.rmax (maximum distance from the origin)
        and z_surf (position of the surface).
        all atoms are non-emitters.
@@ -359,7 +359,7 @@ class Cluster(object):
        @param tol: tolerance for checking uniqueness.
            positions of two atoms are considered equal if all coordinates lie within the tolerance interval.

-        @return: None 
+        @return: None
        """
        assert isinstance(cluster, Cluster)
        data = self.data.copy()
@@ -498,7 +498,7 @@ class Cluster(object):
        
        @param matrix: transformation matrix
        
-        @return: None 
+        @return: None
        """
        pos = np.empty((3, self.data.shape[0]), np.float32)
        pos[0, :] = self.data['x']
@@ -1030,7 +1030,7 @@ class Cluster(object):
        update the index column.
        
        if you have modified the order or number of elements in the self.data array directly,
-        you may need to re-index the atoms if your code uses functions that rely on the index. 
+        you may need to re-index the atoms if your code uses functions that rely on the index.
        
        @return None
        """
--- a/pmsco/compat.py
+++ b/pmsco/compat.py
@@ -1,40 +0,0 @@
-"""
-@package pmsco.compat
-compatibility code
-
-code bits to provide compatibility for different python versions.
-currently supported 2.7 and 3.6.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from io import open as io_open
-
-
-def open(fname, mode='r', encoding='latin1'):
-    """
-    open a data file for read/write/append using the default str type
-
-    this is a drop-in for io.open
-    where data is exchanged via the built-in str type of python,
-    whether this is a byte string (python 2) or unicode string (python 3).
-
-    the file is assumed to be a latin-1 encoded binary file.
-
-    @param fname: file name and path
-    @param mode: 'r', 'w' or 'a'
-    @param encoding: 'latin1' (default), 'ascii' or 'utf-8'
-    @return file handle
-    """
-    if isinstance(b'b', str):
-        # python 2
-        mode += 'b'
-        kwargs = {}
-    else:
-        # python 3
-        mode += 't'
-        kwargs = {'encoding': encoding}
-
-    return io_open(fname, mode, **kwargs)
--- a/pmsco/config.py
+++ b/pmsco/config.py
@@ -4,7 +4,7 @@ infrastructure for configurable objects

@author Matthias Muntwiler

-@copyright (c) 2021 by Paul Scherrer Institut @n
+@copyright (c) 2021-23 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
@@ -12,78 +12,114 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
 """

 import collections.abc
-import functools
 import inspect
 import logging
+import os
 from pathlib import Path
+from string import Template
+from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union

 logger = logging.getLogger(__name__)

+PathLike = Union[str, os.PathLike]
+DataDict = Mapping[str, Union[str, int, float, Iterable, Mapping]]

-def resolve_path(path, dirs):
+
+def resolve_path(path: PathLike, dirs: Mapping[str, Any]):
    """
-    resolve a file path by replacing placeholders
+    Resolve a file path by replacing placeholders.

-    placeholders are enclosed in curly braces.
-    values for all possible placeholders are provided in a dictionary.
+    Placeholders are enclosed in curly braces.
+    Values for all possible placeholders are provided in a dictionary.

    @param path: str, Path or other path-like.
-        example: '{work}/test/testfile.dat'.
-    @param dirs: dictionary mapping placeholders to project paths.
-        the paths can be str, Path or other path-like
-        example: {'work': '/home/user/work'}
+        Example: '${work}/test/testfile.dat'.
+    @param dirs: Dictionary mapping placeholders to project paths.
+        The paths can be str, Path or other path-like
+        Example: {'work': '/home/user/work'}
    @return: pathlib.Path object
    """
-    return Path(*(p.format(**dirs) for p in Path(path).parts))
+
+    return Path(*(Template(p).substitute(dirs) for p in Path(path).parts))


 class ConfigurableObject(object):
    """
-    Parent class for objects that can be configured by a run file
+    Parent class for objects that can be configured from a runfile

-    the run file is a JSON file that contains object data in a nested dictionary structure.
+    The runfile is a JSON file that contains object data in a nested dictionary structure.

-    in the dictionary structure the keys are property or attribute names of the object to be initialized.
-    keys starting with a non-alphabetic character (except for some special keys like __class__) are ignored.
-    these can be used as comments, or they protect private attributes.
+    In the dictionary structure the keys are property or attribute names of the object to be initialized.
+    Keys starting with a non-alphabetic character (except for some special keys like __class__) are ignored.
+    These can be used as comments, or they protect private attributes.

-    the values can be numeric values, strings, lists or dictionaries.
+    The values can be numeric values, strings, lists or dictionaries.

-    simple values are simply assigned using setattr.
-    this may call a property setter if defined.
+    Simple values are simply assigned using setattr.
+    This may call a property setter if defined.

-    lists are iterated. each item is appended to the attribute.
-    the attribute must implement an append method in this case.
+    Lists are iterated. Each item is appended to the attribute.
+    The attribute must implement an append method in this case.

-    if an item is a dictionary and contains the special key '__class__',
+    If an item is a dictionary and contains the special key '__class__',
    an object of that class is instantiated and recursively initialized with the dictionary elements.
-    this requires that the class can be found in the module scope passed to the parser methods,
+    This requires that the class can be found in the module scope passed to the parser methods,
    and that the class inherits from this class.

-    cases that can't be covered easily using this mechanism
+    Cases that can't be covered easily using this mechanism
    should be implemented in a property setter.
-    value-checking should also be done in a property setter (or the append method in sequence-like objects).
+    Value-checking should also be done in a property setter (or the append method in sequence-like objects).
+
+    Attributes
+    ----------
+
+    project_symbols: Dictionary of symbols that should be used to resolve class and function names.
+            This is usually the globals() dictionary of the project module.
    """
+
    def __init__(self):
-        pass
+        super().__init__()
+        self.project_symbols: Optional[Mapping[str, Any]] = None
+
+    def set_properties(self, symbols: Optional[Mapping[str, Any]],
+                       data_dict: DataDict,
+                       project: 'ConfigurableObject') -> None:

-    def set_properties(self, module, data_dict, project):
        """
-        set properties of this class.
+        Set properties from dictionary.

-        @param module: module reference that should be used to resolve class names.
-            this is usually the project module.
-        @param data_dict: dictionary of properties to set.
-            see the class description for details.
-        @param project: reference to the project object.
+        @param symbols: Dictionary of symbols that should be used to resolve class names.
+            This is usually the globals() dictionary of the project module.
+            Classes are resolved using the eval function.
+        @param data_dict: Dictionary of properties to set.
+            See the class description for details.
+        @param project: Reference to the project object.
        @return: None
        """
+
+        self.project_symbols = symbols
        for key in data_dict:
            if key[0].isalpha():
-                self.set_property(module, key, data_dict[key], project)
+                self.set_property(symbols, key, data_dict[key], project)

-    def set_property(self, module, key, value, project):
-        obj = self.parse_object(module, value, project)
+    def set_property(self, symbols: Optional[Mapping[str, Any]],
+                     key: str,
+                     value: DataDict,
+                     project: 'ConfigurableObject') -> None:
+
+        """
+        Set one property.
+
+        @param symbols: Dictionary of symbols that should be used to resolve class names.
+            This is usually the globals() dictionary of the project module.
+            Classes are resolved using the eval function.
+        @param key: Attribute name to set.
+        @param value: New value of the attribute.
+        @param project: Reference to the project object.
+        @return: None
+        """
+
+        obj = self.parse_object(symbols, value, project)
        if hasattr(self, key):
            if obj is not None:
                if isinstance(obj, collections.abc.MutableSequence):
@@ -103,18 +139,25 @@ class ConfigurableObject(object):
        else:
            logger.warning(f"class {self.__class__.__name__} does not have attribute {key}.")

-    def parse_object(self, module, value, project):
+    def parse_object(self, symbols: Optional[Mapping[str, Any]],
+                     value: DataDict,
+                     project: 'ConfigurableObject') -> object:
+
        if isinstance(value, collections.abc.MutableMapping) and "__class__" in value:
-            cn = value["__class__"].split('.')
-            c = functools.reduce(getattr, cn, module)
+            cn = value["__class__"]
+            try:
+                c = eval(cn, symbols)
+            except (AttributeError, KeyError, NameError, ValueError):
+                logger.critical(f"can't resolve class name {cn}")
+                raise
            s = inspect.signature(c)
            if 'project' in s.parameters:
                o = c(project=project)
            else:
                o = c()
-            o.set_properties(module, value, project)
+            o.set_properties(symbols, value, project)
        elif isinstance(value, collections.abc.MutableSequence):
-            o = [self.parse_object(module, i, project) for i in value]
+            o = [self.parse_object(symbols, i, project) for i in value]
        else:
            o = value
        return o
--- a/pmsco/data.py
+++ b/pmsco/data.py
@@ -1,32 +1,36 @@
 """
@package pmsco.data
-import, export, evaluation of msc data.
+Import, export, evaluation of msc data.

-this module provides common functions for loading/saving and manipulating PED scan data sets.
+This module provides common functions for loading/saving and manipulating PED scan data sets.

@author Matthias Muntwiler

-@copyright (c) 2015-17 by Paul Scherrer Institut @n
+@copyright (c) 2015-23 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import logging
+import math
 import numpy as np
+import numpy.typing as npt
 import os
+import scipy.special
 import scipy.optimize as so
-
-from pmsco.compat import open
-import pmsco.loess.loess as loess
+from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union
+import h5py

 logger = logging.getLogger(__name__)

+try:
+    import loess
+except (ModuleNotFoundError, ImportError) as e:
+    loess = None
+    logger.critical("Error importing the loess package.", exc_info=e)
+
 ## energy, intensity
 DTYPE_EI = [('e', 'f4'), ('i', 'f4')]
 ## energy, theta, phi, intensity
@@ -43,9 +47,11 @@ DTYPE_TP = [('t', 'f4'), ('p', 'f4')]
 DTYPE_TPI = [('t', 'f4'), ('p', 'f4'), ('i', 'f4')]
 ## theta, phi, intensity, sigma (standard deviation)
 DTYPE_TPIS = [('t', 'f4'), ('p', 'f4'), ('i', 'f4'), ('s', 'f4')]
+## intensity, theta, phi
+DTYPE_ITP = [('i', 'f4'), ('t', 'f4'), ('p', 'f4')]

 DTYPES = {'EI': DTYPE_EI, 'ETPI': DTYPE_ETPI, 'ETPIS': DTYPE_ETPIS, 'ETPAI': DTYPE_ETPAI, 'ETPAIS': DTYPE_ETPAIS,
-          'TP': DTYPE_TP, 'TPI': DTYPE_TPI, 'TPIS': DTYPE_TPIS, }
+          'TP': DTYPE_TP, 'TPI': DTYPE_TPI, 'TPIS': DTYPE_TPIS, 'ITP': DTYPE_ITP, }
 DATATYPES = DTYPES.keys

 ## supportd scan types
@@ -55,8 +61,11 @@ DATATYPES = DTYPES.keys
 # @arg @c 'TP' theta - phi (holo scan)
 SCANTYPES = ['E', 'EA', 'ET', 'TP']

+GenTextFileLike = Union[str, os.PathLike, Iterable[str], int]
+OSFileLike = Union[str, os.PathLike, int]

-def create_etpi(shape, sigma_column=True):
+
+def create_etpi(shape: Tuple[int], sigma_column: bool = True) -> np.ndarray:
    """
    create an ETPI array of a given size.
    
@@ -64,6 +73,7 @@ def create_etpi(shape, sigma_column=True):
    the array is initialized with zeroes.

    @param shape (tuple) shape of the array
+    @param sigma_column: whether the array should include a sigma field (ETPIS type instead of ETPI)
    """
    if sigma_column:
        data = np.zeros(shape, dtype=DTYPE_ETPIS)
@@ -72,7 +82,7 @@ def create_etpi(shape, sigma_column=True):
    return data


-def create_data(shape, datatype='', dtype=None):
+def create_data(shape: Tuple[int], datatype: str = '', dtype: Optional[npt.DTypeLike] = None) -> np.ndarray:
    """
    create a data array of a given size and type.
    
@@ -90,7 +100,108 @@ def create_data(shape, datatype='', dtype=None):
    return data


-def load_plt(filename, int_column=-1):
+def holo_grid(theta_start: float = 90., theta_step: float = 1., theta_range: float = 90.,
+              phi_start: float = 0., phi_range: float = 360., phi_refinement: float = 1.):
+    """
+    Generator of a holo grid with constant point density in solid angle.
+
+    The generator yields the polar coordinates of a hologram scan in the traditional Osterwalder fashion,
+    where the grid points are distributed evenly on the hemisphere by varying the azimuthal step size,
+    while the polar step size is constant.
+
+    The yield are tuples (theta, phi) in degrees.
+    Theta is the polar, phi the azimuthal coordinate.
+
+    @param theta_start      Maximum polar angle in degrees, 0..90. Defaults to 90 (grazing emission).
+    @param theta_step       Polar angle step in degrees, 1..90. Defaults to 1.
+    @param theta_range      Polar angle range in degrees, 1..th_start. Defaults to 90.
+    @param phi_start        Azimuthal start angle in degrees. Defaults to 0.
+                            This azimuth is included at every polar step.
+    @param phi_range        Azimuthal range in degrees. Defaults to 360.
+    @param phi_refinement   Azimuthal refinement/oversampling (scalar). Defaults to 1.
+                            A refinement of 2 yields a factor 2 more grid points in the azimuthal sub-scans.
+
+    @return yield tuples (theta, phi) in degrees
+    """
+
+    deg2rad = 0.01745329
+
+    def calc_phi_step(th):
+        if th < 0.5 or int(phi_range * math.sin(th * deg2rad) * phi_refinement / theta_step) == 0:
+            phi_st = 0.0
+        else:
+            phi_st = phi_range / int(th / theta_start * phi_range / theta_step)
+        if abs(phi_st) < 0.001:
+            phi_st = 360.
+        return phi_st
+
+    for theta in np.arange(theta_range, -theta_step, -theta_step):
+        phi_step = calc_phi_step(theta)
+        for phi in np.arange(phi_start, phi_range, phi_step):
+            yield theta, phi
+
+
+def holo_array(generator: Callable[..., Iterable[Tuple[float, float]]],
+               generator_args: Dict,
+               datatype: str = 'TP',
+               dtype: Optional[npt.DTypeLike] = None) -> np.ndarray:
+
+    """
+    Create an hologram scan grid in a numpy array.
+
+    A holo data array is a numpy structured array containing at least
+    a column for theta (polar angle) and phi (azimuthal angle).
+    The theta and phi columns are filled with angles from the holo_grid (or custom generator) function.
+    The array can contain further columns for energy, intensity, etc. according to the data type specified.
+    These columns are initialized with zeroes.
+
+    @param generator  Generator that yields tuples (theta, phi) for each grid point,
+                      given the keyword arguments kwargs.
+                      Defaults to holo_grid, the traditional Osterwalder holo scan.
+    @param generator_args Keyword arguments to be passed to the generator.
+                      For arguments of the traditional holo scan, see the documentation of holo_grid.
+    @param datatype   See DATATYPES. Must contain 'T' and 'P' dimensions. Defaults to 'TP'.
+    @param dtype      See DTYPES. Must contain a 't' and 'p' column. Takes precedence over datatype.
+                      Defaults to None (not specified).
+    """
+
+    if not dtype:
+        dtype = DTYPES[datatype]
+
+    tp = np.fromiter(generator(**generator_args), dtype=DTYPES['TP'])
+
+    result = np.zeros(tp.shape, dtype=dtype)
+    result['t'] = tp['t']
+    result['p'] = tp['p']
+
+    return result
+
+
+def analyse_holoscan_steps(holoscan: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Find the polar and azimuthal steps in a holoscan.
+
+    @param holoscan:
+    @return: thetas: unique theta angles. sorted.
+             dtheta: theta steps for each theta
+             dphi: phi step for each theta
+    """
+
+    thetas, indices, counts = np.unique(holoscan['t'], return_index=True, return_counts=True)
+    dtheta = np.diff(thetas)
+    dtheta = np.append(dtheta, dtheta[-1])
+
+    adjusted_phis = np.append(holoscan['p'], holoscan['p'][-1])
+    phis0 = adjusted_phis[indices]
+    phis1 = adjusted_phis[indices+1]
+    dphi = phis1 - phis0
+    phi_range = counts[-1] * dphi[-1]
+    dphi[counts <= 1] = phi_range
+
+    return thetas, dtheta, dphi
+
+
+def load_plt(filename: GenTextFileLike, int_column: int = -1) -> np.ndarray:
    """
    loads ETPI data from an MSC output (plt) file

@@ -122,7 +233,8 @@ def load_plt(filename, int_column=-1):
    return data


-def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_cluster=False):
+def load_edac_pd(filename: OSFileLike, int_column: int = -1,
+                 energy: float = 0.0, theta: float = 0.0, phi: float = 0.0, fixed_cluster: bool = False) -> np.ndarray:
    """
    load ETPI or ETPAI data from an EDAC PD output file.

@@ -157,7 +269,8 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_
    data[i]['i'] = selected intensity column
    @endverbatim
    """
-    with open(filename, "r") as f:
+
+    with open(filename, "rt", encoding="latin1") as f:
        header1 = f.readline().strip()
        header2 = f.readline().strip()
    if not header1 == '--- scan PD':
@@ -218,7 +331,7 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_
    return etpi


-def load_etpi(filename):
+def load_etpi(filename: GenTextFileLike) -> np.ndarray:
    """
    loads ETPI or ETPIS data from a text file

@@ -253,7 +366,7 @@ def load_etpi(filename):
    return data


-def load_data(filename, dtype=None):
+def load_data(filename: GenTextFileLike, dtype: Optional[npt.DTypeLike] = None):
    """
    load column data (ETPI, and the like) from a text file.
    
@@ -288,7 +401,7 @@ def load_data(filename, dtype=None):
    return data


-def format_extension(data):
+def format_extension(data: np.ndarray) -> str:
    """
    format the file extension based on the contents of an array.

@@ -299,7 +412,7 @@ def format_extension(data):
    return "." + "".join(data.dtype.names)


-def save_data(filename, data):
+def save_data(filename: OSFileLike, data: npt.ArrayLike) -> None:
    """
    save column data (ETPI, and the like) to a text file.
    
@@ -315,7 +428,7 @@ def save_data(filename, data):
    np.savetxt(filename, data, fmt='%g')


-def sort_data(data):
+def sort_data(data: np.ndarray) -> None:
    """
    sort scan data (ETPI and the like) in a consistent order.

@@ -338,7 +451,8 @@ def sort_data(data):
    data.sort(kind='mergesort', order=sort_key)


-def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
+def restructure_data(data: np.ndarray, dtype: Optional[npt.DTypeLike] = None,
+                     defaults: Optional[Mapping] = None) -> np.ndarray:
    """
    restructure the type of a data array by adding or removing columns.

@@ -361,6 +475,8 @@ def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
    @return: re-structured numpy array or
        @c data if the new and original data types are the same.
    """
+    if dtype is None:
+        dtype = DTYPE_ETPAIS
    if data.dtype == dtype:
        return data
    else:
@@ -378,7 +494,7 @@ def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
        return new_data


-def common_dtype(scans):
+def common_dtype(scans: Iterable[Union[npt.ArrayLike, npt.DTypeLike]]) -> npt.DTypeLike:
    """
    determine the common data type for a number of scans.

@@ -409,7 +525,7 @@ def common_dtype(scans):
    return dtype


-def detect_scan_mode(data):
+def detect_scan_mode(data: np.ndarray) -> Tuple[List[str], Dict[str, np.ndarray]]:
    """
    detect the scan mode and unique scan positions in a data array.

@@ -495,7 +611,7 @@ def detect_scan_mode(data):
    return scan_mode, scan_positions


-def filter_tp(data, filter):
+def filter_tp(data: np.ndarray, _filter: np.ndarray) -> np.ndarray:
    """
    select data points from an ETPI array that match theta and phi coordinates of another ETPI array.

@@ -503,7 +619,7 @@ def filter_tp(data, filter):

    @param data ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).

-    @param filter ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
+    @param _filter ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
        only 't' and 'p' columns are used.

    @return filtered data (numpy.ndarray)
@@ -512,18 +628,19 @@ def filter_tp(data, filter):
    """
    # copy theta,phi into separate structured arrays
    data_tp = np.zeros_like(data, dtype=[('t', '<i4'), ('p', '<i4')])
-    filter_tp = np.zeros_like(filter, dtype=[('t', '<i4'), ('p', '<i4')])
+    filt_tp = np.zeros_like(_filter, dtype=[('t', '<i4'), ('p', '<i4')])
    # multiply by 10, round to integer
    data_tp['t'] = np.around(data['t'] * 10.0)
    data_tp['p'] = np.around(data['p'] * 10.0)
-    filter_tp['t'] = np.around(filter['t'] * 10.0)
-    filter_tp['p'] = np.around(filter['p'] * 10.0)
+    filt_tp['t'] = np.around(_filter['t'] * 10.0)
+    filt_tp['p'] = np.around(_filter['p'] * 10.0)
    # calculate intersection
-    idx = np.in1d(data_tp, filter_tp)
+    idx = np.in1d(data_tp, filt_tp)
    result = data[idx]
    return result

-def interpolate_hemi_scan(rect_tpi, hemi_tpi):
+
+def interpolate_hemi_scan(rect_tpi: np.ndarray, hemi_tpi: np.ndarray) -> np.ndarray:
    """
    interpolate a hemispherical scan from a rectangular angle scan.

@@ -555,7 +672,9 @@ def interpolate_hemi_scan(rect_tpi, hemi_tpi):
        hemi_tpi['i'][sel_theta] = result
    return hemi_tpi

-def reshape_2d(flat_data, axis_columns, return_column='i'):
+
+def reshape_2d(flat_data: np.ndarray, axis_columns: Sequence[str], return_column: str = 'i') -> \
+        Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    reshape an ETPI-like array into a two-dimensional array according to the scan axes.
    
@@ -564,7 +683,9 @@ def reshape_2d(flat_data, axis_columns, return_column='i'):
        the array must be sorted in the order of axis_labels.
        
    @param axis_columns list of column names that designate the axes
-    
+
+    @param return_column: name of field to return in two dimensions
+
    @return the tuple (result_data, axis0, axis1), where
    @arg result_data (ndarray) new two-dimensional ndarray of the scan
    @arg axis0 (ndarray) scan positions along the first dimension
@@ -579,7 +700,7 @@ def reshape_2d(flat_data, axis_columns, return_column='i'):
    return data.copy(), axis0, axis1


-def calc_modfunc_mean(data):
+def calc_modfunc_mean(data: np.ndarray) -> np.ndarray:
    """
    calculates the modulation function using the mean value of data.
    this is a simplified calculation method
@@ -615,7 +736,7 @@ def calc_modfunc_mean(data):
    return modf


-def calc_modfunc_loess(data, smth=0.4):
+def calc_modfunc_loess(data: np.ndarray, smth: float = 0.4) -> np.ndarray:
    """
    calculate the modulation function using LOESS (locally weighted regression) smoothing.

@@ -669,20 +790,27 @@ def calc_modfunc_loess(data, smth=0.4):
    return modf


-def rfactor(experiment, theory):
+def square_diff_rfactor(experiment: np.ndarray, theory: np.ndarray) -> float:
    """
-    calculate the R-factor of a calculated modulation function.
+    Calculate the R-factor from the normalized sum of squared differences.

-    if the sigma column is present in experiment and non-zero,
+    If the sigma column is present in experiment and non-zero,
    the R-factor terms are weighted by 1/sigma**2.

-    the input arrays must have the same shape and the coordinate columns must be identical (they are ignored).
-    the array elements are compared element-by-element.
-    terms having NaN intensity are ignored.
+    The input arrays must have the same shape and the coordinate columns must be identical.
+    The array elements are compared element-by-element.
+    The values of the coordinate arrays do not influence the result.
+    Terms having NaN intensity are ignored.

-    @param experiment: ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
+    This function can be specified in the Scan.rfactor_func parameter of the project.

-    @param theory: ETPI or ETPAI array containing the calculated modulation functions.
+    @param experiment: (numpy structured array)
+        ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
+        If an `s` field is present and non-zero,
+        the R-factor terms are weighted by 1/sigma**2.
+
+    @param theory: (numpy structured array)
+        ETPI or ETPAI array containing the theoretical function.

    @return scalar R-factor in the range from 0.0 to 2.0.

@@ -702,7 +830,7 @@ def rfactor(experiment, theory):
    return sum1 / sum2


-def scaled_rfactor(scale, experiment, weights, theory):
+def scaled_rfactor_func(scale: float, experiment: np.ndarray, weights: np.ndarray, theory: np.ndarray) -> float:
    """
    calculate the R-factor of a modulation function against the measurement with scaled amplitude.

@@ -732,6 +860,7 @@ def scaled_rfactor(scale, experiment, weights, theory):

    @raise ValueError if all experiments and theory values or all weights are zero.
    """
+
    difs = weights * (scale * experiment - theory) ** 2
    sums = weights * (scale ** 2 * experiment ** 2 + theory ** 2)
    sum1 = difs.sum(dtype=np.float64)
@@ -739,7 +868,7 @@ def scaled_rfactor(scale, experiment, weights, theory):
    return sum1 / sum2


-def optimize_rfactor(experiment, theory):
+def optimize_rfactor(experiment: np.ndarray, theory: np.ndarray) -> float:
    """
    calculate the R-factor of a calculated modulation function against the measurement, adjusting their amplitude.

@@ -750,13 +879,15 @@ def optimize_rfactor(experiment, theory):
    this is useful if the amplitudes of the two functions do not match due to systematic effects
    of the calculation or the measurement.

-    the optimization is done in a scipy.optimize.least_squares optimization of the scaled_rfactor() function.
+    the optimization is done in a scipy.optimize.least_squares optimization of the scaled_rfactor_func() function.
    the initial guess of the scaling factor is 0.7, the constraining boundaries are 1/10 and 10.

    the input arrays must have the same shape and the coordinate columns must be identical (they are ignored).
    the array elements are compared element-by-element.
    terms having NaN intensity are ignored.

+    This function can be specified in the Scan.rfactor_func parameter of the project.
+
    @param experiment: ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.

    @param theory: ETPI or ETPAI array containing the calculated modulation functions.
@@ -773,13 +904,13 @@ def optimize_rfactor(experiment, theory):
    else:
        wgts = np.ones_like(experiment['i'])

-    result = so.least_squares(scaled_rfactor, 0.7, bounds=(0.1, 10.0), args=(experiment['i'], wgts, theory['i']))
-    result_r = scaled_rfactor(result.x, experiment['i'], wgts, theory['i'])
+    result = so.least_squares(scaled_rfactor_func, 0.7, bounds=(0.1, 10.0), args=(experiment['i'], wgts, theory['i']))
+    result_r = scaled_rfactor_func(result.x, experiment['i'], wgts, theory['i'])

    return result_r


-def alpha_average(data):
+def alpha_average(data: np.ndarray) -> np.ndarray:
    """
    average I(alpha, theta, phi) over alpha.

@@ -809,7 +940,7 @@ def alpha_average(data):
    return result


-def phi_average(data):
+def phi_average(data: np.ndarray) -> np.ndarray:
    """
    average I(theta, phi) over phi.

@@ -827,9 +958,9 @@ def phi_average(data):
        names = list(data.dtype.names)
        names.remove('p')
        dtype = [(name, data.dtype[name].str) for name in names]
-        result = create_data((nt), dtype=dtype)
+        result = create_data((nt,), dtype=dtype)

-        for i,t in enumerate(t_axis):
+        for i, t in enumerate(t_axis):
            sel = np.abs(scan_positions['t'] - t) < 0.01
            for name in names:
                result[name][i] = np.mean(data[name][sel], dtype=np.float64)
@@ -839,7 +970,7 @@ def phi_average(data):
    return result


-def alpha_mirror_average(data):
+def alpha_mirror_average(data: np.ndarray) -> np.ndarray:
    """
    calculate the average of I(alpha, theta, phi) and I(-alpha, theta, phi).

@@ -871,3 +1002,14 @@ def alpha_mirror_average(data):
        logger.warning('asymmetric alpha scan. skipping alpha mirror average.')

    return result1
+
+
+if loess is not None:
+    default_modfunc = calc_modfunc_loess
+    logger.info("pmsco.data.default_modfunc = pmsco.data.calc_modfunc_loess")
+else:
+    default_modfunc = calc_modfunc_mean
+    logger.warning("pmsco.data.default_modfunc = pmsco.data.calc_modfunc_mean")
+
+default_rfactor = square_diff_rfactor
+logger.info("pmsco.data.default_rfactor = pmsco.data.square_diff_rfactor")
--- a/pmsco/database.py
+++ b/pmsco/database.py
--- a/pmsco/database/access.py
+++ b/pmsco/database/access.py
@@ -0,0 +1,169 @@
+"""
+@package pmsco.database.access
+wrapper classes for access to a pmsco database
+
+the most import class to be used is DatabaseAccess.
+
+usage:
+~~~~~~{.py}
+db = DatabaseAccess()
+db.connect("file.db")
+with db.session():
+    # database access here
+    # ...
+    # commit transaction
+    session.commit()
+    # continue in new transaction
+
+# at the end of the context
+# the session is closed and orm objects are detached from the database.
+~~~~~~
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2016-21 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+import fasteners
+import logging
+from pathlib import Path
+import pmsco.database.orm as orm
+
+logger = logging.getLogger(__name__)
+
+
+class _DummyLock(object):
+    """
+    dummy lock used for in memory database.
+    """
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+
+class LockedSession(object):
+    """
+    database session context manager
+
+    this context manager (to be used in a with statement)
+    acquires a lock on the database lock file
+    and provides a database session (orm.Session()).
+
+    the session is closed (and pending transactions committed) on exit.
+    if an exception occurs, pending transactions are rolled back before the session is closed.
+
+    @note the term _session_ refers to a session in sqlalchemy.
+    """
+    def __init__(self, lock_file=None):
+        self.lock_file = lock_file
+        self._session = None
+        self._lock = None
+
+    def __enter__(self):
+        self._lock = self.lock()
+        self._lock.__enter__()
+        self._session = orm.Session()
+        return self._session
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type is None:
+            self._session.close()
+        else:
+            self._session.rollback()
+            self._session.close()
+        self._lock.__exit__(exc_type, exc_val, exc_tb)
+        self._lock = None
+
+    def lock(self):
+        """
+        create a file-lock context manager for the database.
+
+        this is either a fasteners.InterProcessLock object on self._lock_filename
+        or a _DummyLock object if the database is in memory.
+        InterprocessLock allows to serialize access to the database by means of a lock file.
+        this is necessary if multiple pmsco instances require access to the same database.
+        _DummyLock is used with an in-memory database which does not require locking.
+
+        the lock object can be used as context-manager in a with statement.
+        """
+        if self.lock_file:
+            return fasteners.InterProcessLock(self.lock_file)
+        else:
+            return _DummyLock()
+
+
+class DatabaseAccess(object):
+    """
+    basic database connection
+
+    this class maintains a database connection and builds session objects.
+
+    a _session_ corresponds to an sqlalchemy session, which defines the lifecycle of mapped objects.
+    a session can open one or multiple (subsequent) transactions.
+
+    usage:
+    ~~~~~~{.py}
+    db = DatabaseAccess()
+    db.connect("file.db")
+    with db.session():
+        # database access
+        session.commit()
+    ~~~~~~
+
+    the session object is a context handler.
+    it commits the transaction and closes the session at the end of the context.
+    if an exception occurs, it rolls back the transaction and closes the session before passing the exception.
+    """
+    def __init__(self):
+        self.db_file = ""
+        self.lock_file = ""
+
+    def connect(self, db_file, lock_file=""):
+        """
+        connect to a new or existing database file.
+
+        if the file does not exist, or if it is empty, a new database schema is created.
+
+        @param db_file: name of a file or ":memory:" for an in-memory database.
+
+        @param lock_file: name of a file that is used to lock the database.
+            by default, the db_filename with a suffix of ".lock" is used.
+            for most uses, the default should be fine.
+            the argument is provided mainly for testing the locking functionality.
+
+            this must be a file that is not used for anything else.
+            the file does not need to exist.
+            it's best if the file is in the same directory as the database file.
+            all clients of a database must use the same lock file.
+
+        @return: None
+        """
+        self.db_file = db_file
+
+        if lock_file:
+            self.lock_file = lock_file
+        elif db_file == ":memory:":
+            self.lock_file = ""
+        else:
+            self.lock_file = Path(str(db_file) + ".lock")
+
+        orm.connect(orm.sqlite_link(self.db_file))
+
+    def session(self):
+        """
+        open a database session.
+
+        this function returns a pmsco.database.util.LockedSession object
+        which is a context handler that provides an sqlalchemy session
+        that is locked against concurrent access from other DatabaseAccess instances.
+        see the class description for an example usage pattern.
+
+        @return: pmsco.database.util.LockedSession() object.
+        """
+        return LockedSession(self.lock_file)
--- a/pmsco/database/common.py
+++ b/pmsco/database/common.py
@@ -0,0 +1,329 @@
+"""
+@package pmsco.database.common
+common database operations
+
+this module gathers a number of common database operations.
+all functions require an open session object from pmsco.database.access.DatabaseAccess.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2016-21 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+import logging
+import sqlalchemy
+import pmsco.database.orm as orm
+
+logger = logging.getLogger(__name__)
+
+
+def filter_project(query, project_or_name_or_id):
+    """
+    filter a query by project
+
+    @param query: sqlalchemy query object
+    @param project_or_name_or_id: orm.Project object or project name or project id.
+    @return: modified query
+    """
+    if isinstance(project_or_name_or_id, orm.Project):
+        query = query.filter(orm.Project == project_or_name_or_id)
+    elif isinstance(project_or_name_or_id, int):
+        query = query.filter(orm.Project.id == project_or_name_or_id)
+    else:
+        query = query.filter(orm.Project.name == project_or_name_or_id)
+    return query
+
+
+def filter_job(query, job_or_name_or_id):
+    """
+    filter a query by job
+
+    @param query: sqlalchemy query object
+    @param job_or_name_or_id: orm.Job object or job name or job id.
+    @return: modified query
+    """
+    if isinstance(job_or_name_or_id, orm.Job):
+        query = query.filter(orm.Job == job_or_name_or_id)
+    elif isinstance(job_or_name_or_id, int):
+        query = query.filter(orm.Job.id == job_or_name_or_id)
+    else:
+        query = query.filter(orm.Job.name == job_or_name_or_id)
+    return query
+
+
+def query_params(session, project=None, job=None):
+    """
+    query parameter names and their associated objects from the database
+
+    the result is a dictionary of orm.Param objects mapped to their respective keys.
+    the parameters can be filtered by project and/or job.
+    if no arguments are given, parameters from all projects are returned.
+
+    @note make sure previous changes have been committed. else the query may not find all records.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+    @param project: orm.Project object or project name or project id.
+        default: don't filter projects.
+    @param job: orm.Job object or job name or job id.
+        default: don't filter jobs
+    @return: dictionary of parameters
+    """
+    query = session.query(orm.Param).join(orm.ParamValue).join(orm.Model).join(orm.Job).join(orm.Project)
+    if project is not None:
+        query = filter_project(query, project)
+    if job is not None:
+        query = filter_job(query, job)
+    params = query.all()
+    params = {param.key: param for param in params}
+    return params
+
+
+def query_tags(session, project=None, job=None):
+    """
+    query tag names and their associated objects from the database
+
+    the result is a dictionary of orm.Tag objects mapped to their respective keys.
+    the tags can be filtered by project and/or job.
+    if no arguments are given, tags from all projects are returned.
+
+    @note the orm.Job.tags mapping is an alternative way to access job tags.
+
+    @note make sure previous changes have been committed. else the query may not find all records.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+    @param project: orm.Project object or project name or project id.
+        default: don't filter projects.
+    @param job: orm.Job object or job name or job id.
+        default: don't filter jobs
+    @return: dictionary of tags
+    """
+    query = session.query(orm.Tag).join(orm.JobTag).join(orm.Job).join(orm.Project)
+    if project is not None:
+        query = filter_project(query, project)
+    if job is not None:
+        query = filter_job(query, job)
+    tags = query.all()
+    tags = {tag.key: tag for tag in tags}
+    return tags
+
+
+def query_job_tags(session, project=None, job=None):
+    """
+    query tags (keys and values)  from the database
+
+    the result is a dictionary of tag values (str) mapped to their respective keys (str).
+    the tags can be filtered by project and/or job.
+    if no arguments are given, tags from all projects are returned.
+
+    @note for one specific job, this is equivalent to the orm.Job.tags mapping.
+
+    @note make sure previous changes have been committed. else the query may not find all records.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+    @param project: orm.Project object or project name or project id.
+        default: don't filter projects.
+    @param job: orm.Job object or job name or job id.
+        default: don't filter jobs
+    @return: tags dictionary {key: value}
+    """
+    query = session.query(orm.JobTag).join(orm.Job).join(orm.Project)
+    if project is not None:
+        query = filter_project(query, project)
+    if job is not None:
+        query = filter_job(query, job)
+    job_tags = query.all()
+    job_tags = {jt.tag.key: jt.value for jt in job_tags}
+    return job_tags
+
+
+def register_project(session, name, code, allow_existing=False):
+    """
+    register (insert or query) a project with the database.
+
+    a new project record with the given parameters is inserted into the database.
+    if a project of the same name already exists, the existing record is returned.
+
+    @attention the orm.Project.id field is undefined until the session is committed!
+        it's better to identify a project by name or orm.Project object.
+
+    @note make sure previous changes have been committed. else the query may not find an existing project.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+        the session is committed if a new project entry has been added.
+    @param name: project name. must be unique within the database.
+    @param code: name of the project module.
+    @param allow_existing: selects the behaviour if a project record exists in the database:
+        return the corresponding orm.Project (True) or raise an exception (False, default).
+        the exception is ValueError.
+    @return: orm.Project object.
+        the object can be used and modified as long as the session is active.
+        note that the id attribute is invalid until the session is committed!
+    @raise ValueError if the job exists and allow_existing is False.
+    """
+    query = session.query(orm.Project)
+    query = query.filter(orm.Project.name == name)
+    project = query.one_or_none()
+
+    if project is None:
+        project = orm.Project(name=name, code=code)
+        session.add(project)
+        session.commit()
+    elif not allow_existing:
+        raise ValueError(f"project {project.name} exists")
+
+    return project
+
+
+def get_project(session, project_or_name_or_id):
+    """
+    resolve a project by name or id.
+
+    this function resolves a project specification to an orm.Project object.
+    if `project_or_name_or_id` is an orm.Project object, it just returns that object without any checks.
+    else, the project is looked up in the database.
+
+    @attention if `project_or_name_or_id` is an orm.Project object the function returns it without checks!
+    that means if the object is detached, you cannot use it to query results from the database.
+    if you need an object that is valid and in sync with the database,
+    resolve it by name or id!
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+    @param project_or_name_or_id: orm.Project object or project name or project id.
+    @return: orm.Project object
+    """
+    if isinstance(project_or_name_or_id, orm.Project):
+        project = project_or_name_or_id
+    elif isinstance(project_or_name_or_id, int):
+        project = session.query(orm.Project).get(project_or_name_or_id)
+    else:
+        query = session.query(orm.Project)
+        query = query.filter(orm.Project.name == project_or_name_or_id)
+        project = query.one()
+    return project
+
+
+def register_job(session, project, job_name, allow_existing=False, **job_attr):
+    """
+    register (insert or query) a new job with the database.
+
+    a new job record with the given parameters is inserted into the database.
+    if a job of the same name exists within the given project, the existing record is returned
+    (without modifications!).
+
+    @note make sure previous changes have been committed. else the query may not find an existing project.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+        the session is committed if a new job entry has been added.
+    @param project: orm.Project object or project name or project id.
+    @param job_name: name of job. unique in the project
+    @param job_attr: optional attributes of the job.
+        the keywords correspond to attribute names of the pmsco.database.Job object.
+    @param allow_existing: selects the behaviour if a job record exists in the database:
+        return the corresponding orm.Job (True) or raise an exception (False, default).
+        the exception is ValueError.
+    @return: orm.Job object.
+        the object can be used and modified as long as the session is active.
+        note that the id attribute is invalid until the session is committed!
+    @raise ValueError if the job exists and allow_existing is False.
+    """
+    project = get_project(session, project)
+
+    query = session.query(orm.Job).join(orm.Project)
+    query = query.filter(orm.Project.name == project.name)
+    query = query.filter(orm.Job.name == job_name)
+    job = query.one_or_none()
+
+    if job is None:
+        job = orm.Job()
+        job.name = job_name
+        job.project = project
+        optional_args = {'mode', 'machine', 'git_hash', 'datetime', 'processes', 'hours', 'description'}
+        for name, value in job_attr.items():
+            if name in optional_args:
+                setattr(job, name, value)
+        session.add(job)
+        session.commit()
+    elif not allow_existing:
+        raise ValueError(f"a job {job_name} exists in project {project.name}")
+
+    return job
+
+
+def get_job(session, project_or_name_or_id, job_or_name_or_id):
+    """
+    resolve a job by name or id.
+
+    this function resolves any combination of project and job specification to an orm.Job object.
+    if `job_or_name_or_id` is an orm.Job object, it just returns that object without any checks.
+    else, the job is looked up in the database.
+
+    @attention if `job_or_name_or_id` is an orm.Job object the function returns it without checks!
+    that means if the object is detached, you cannot query results from the database.
+    if you need an object that is valid and in sync with the database,
+    query the job by name or id!
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+    @param project_or_name_or_id: orm.Project object or project name or project id.
+    @param job_or_name_or_id: orm.Job object or job name or job id.
+    @return: orm.Job object
+    """
+    if isinstance(job_or_name_or_id, orm.Job):
+        job = job_or_name_or_id
+    elif isinstance(job_or_name_or_id, int):
+        job = session.query(orm.Job).get(job_or_name_or_id)
+    else:
+        project = get_project(session, project_or_name_or_id)
+        query = session.query(orm.Job).join(orm.Project)
+        query = query.filter(orm.Project.name == project.name)
+        query = query.filter(sqlalchemy.or_(orm.Job.id == job_or_name_or_id,
+                                            orm.Job.name == job_or_name_or_id))
+        job = query.one()
+    return job
+
+
+def register_job_tags(session, job, tags):
+    """
+    insert or update key-value tags of a job
+
+    this is one of many options to populate the Tag and JobTag tables.
+    it is not required to use this function.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+    @param job: orm.Job object
+    @param tags: dictionary of tags
+    @return: None
+    """
+    for k, v in tags.items():
+        job.tags[k] = v
+    if tags:
+        session.commit()
+
+
+def register_params(session, params):
+    """
+    register (insert missing) parameter names
+
+    add new parameter names to the global list of parameter names.
+
+    this is one of many options to populate the Param table.
+    it is not required to use this function.
+
+    this function implies a session flush.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+        the session is committed if new parameters have been added
+    @param params: sequence of parameter names
+        param names with leading underscore are ignored.
+    @return: None
+    """
+    existing_params = query_params(session).keys()
+    params = [param for param in params if param[0] != '_']
+    new_params = set(params) - set(existing_params)
+    for k in new_params:
+        session.add(orm.Param(key=k))
+    if new_params:
+        session.commit()
--- a/pmsco/database/git.py
+++ b/pmsco/database/git.py
@@ -0,0 +1,57 @@
+"""
+@package pmsco.database.git
+git metadata
+
+this module retrieves the git hash of the running code for job metadata.
+this requires that the code is run from a git repository
+and that the gitpython package is installed.
+gitpython is loaded on demand.
+common errors (missing gitpython or invalid repository) are handled.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2015-21 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+import importlib
+
+
+def git():
+    """
+    import the git module from GitPython
+
+    @return: git module or None if an error occurred
+    """
+    try:
+        return importlib.import_module('git')
+    except ImportError:
+        return None
+
+
+def get_git_hash(repo_path=None):
+    """
+    get the git commit (hash) of the running code (HEAD)
+
+    the method looks for a git repository in the source tree of this module.
+    if successful, it returns the hash string of the HEAD commit.
+
+    @return: hexadecimal hash string.
+        empty string if the file is not in a git repository.
+    """
+    if repo_path is None:
+        repo_path = __file__
+
+    _git = git()
+    if _git is not None:
+        try:
+            repo = _git.Repo(repo_path, search_parent_directories=True)
+        except _git.exc.InvalidGitRepositoryError:
+            return ""
+        else:
+            return repo.head.commit.hexsha
+    else:
+        return ""
--- a/pmsco/database/ingest.py
+++ b/pmsco/database/ingest.py
@@ -0,0 +1,406 @@
+"""
+@package pmsco.database.ingest
+
+ingest existing data such as flat results files (.dat or .tasks.dat) into a database.
+
+the results file is a space-delimited, general text file
+such as produced by pmsco.optimizers.population.Population.save_array().
+each line contains one result dataset, the columns correspond to the regular and special parameters.
+the first row contains the parameter names.
+
+the main function is ingest_job_results().
+the other functions require an open database session from pmsco.database.access.DatabaseAccess.session(),
+and ingest the metadata and the actual results, respectively.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2016-21 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+import datetime
+import logging
+import numpy as np
+from pathlib import Path
+from pmsco.database.access import DatabaseAccess
+import pmsco.database.common as common
+import pmsco.database.orm as orm
+import pmsco.database.util as util
+
+logger = logging.getLogger(__name__)
+
+
+def insert_result(session, job, index, result, delta=None):
+    """
+    add or update a calculation result including index and model to the database.
+
+    @param session: (sqlalchemy.Session) database session.
+        when updating an existing model, previous changes must have been committed,
+        else the model may not be found.
+        this function does not commit the transaction.
+    @param job: (orm.Job) job object.
+        use pmsco.database.common.get_object to retrieve by id or name.
+    @param index: (pmsco.dispatch.CalcID or dict)
+        calculation index.
+        in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
+        '_model', '_scan', '_domain', '_emit', '_region'.
+        extra values in the dictionary are ignored.
+        undefined indices must be -1.
+    @param result: (dict) dictionary containing the parameter values and the '_rfac' result.
+        may also contain the special values '_gen', '_particle', '_timestamp'.
+        '_gen' and '_particle' are integers and default to None.
+        '_timestamp' can be numeric (seconds since jan 1, 1970)
+        or an object that implements a timestamp function like datetime.datetime.
+        it defaults to the current (local) time.
+    @param delta: (dict) dictionary containing the delta values.
+        the keys must correspond to model keys in the result dictionary.
+        this argument is optional.
+
+    @return: (orm.Model, orm.Result) model and result objects
+    """
+    model_obj = store_model(session, job, index, result)
+    result_obj = store_result_data(session, model_obj, index, result)
+    store_param_values(session, model_obj, result, delta)
+    return model_obj, result_obj
+
+
+def store_model(session, job, index, result):
+    """
+    add or update the model entry for a calculation result in the database.
+
+    the method updates the Models table.
+    the model is identified by job and index.model.
+    the result is identified by job and index.
+    if the model exists in the database, it is updated.
+
+    @param session: (sqlalchemy.Session) database session.
+        when updating an existing model, previous changes must have been committed,
+        else the model may not be found.
+        this function does not commit the transaction.
+    @param job: (orm.Job) job object.
+        use pmsco.database.common.get_object to retrieve by id or name.
+    @param index: (pmsco.dispatch.CalcID or dict)
+        calculation index.
+        in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
+        '_model', '_scan', '_domain', '_emit', '_region'.
+        extra values in the dictionary are ignored.
+        undefined indices must be -1.
+    @param result: (dict) dictionary containing the parameter values and the '_rfac' result.
+        may also contain the special values '_gen' and '_particle'.
+        '_gen' and '_particle' default to None if not present.
+
+    @return: (orm.Model) updated model object
+    """
+    assert isinstance(job, orm.Job)
+
+    model_dict = {'gen': None, 'particle': None}
+    model_dict.update(util.special_params(result))
+    try:
+        model_dict['model'] = index.model
+    except AttributeError:
+        model_dict['model'] = index['_model']
+
+    q = session.query(orm.Model)
+    q = q.filter(orm.Model.job == job)
+    q = q.filter(orm.Model.model == model_dict['model'])
+    model_obj = q.one_or_none()
+
+    if model_obj is None:
+        model_obj = orm.Model()
+        model_obj.job = job
+        model_obj.model = model_dict['model']
+        session.add(model_obj)
+
+    model_obj.gen = model_dict['gen']
+    model_obj.particle = model_dict['particle']
+
+    return model_obj
+
+
+def store_result_data(session, model_obj, index, result):
+    """
+    add or update a result in the database.
+
+    the method updates the Results table.
+    the model is identified by model_id.
+    the result is identified by model_id and index.
+    if the result exists in the database, it is updated.
+
+    @param session: (sqlalchemy.Session) database session.
+        when updating an existing model, previous changes must have been committed,
+        else the result entry may not be found.
+        this function does not commit the transaction.
+    @param model_obj: (orm.Model) model object that is already part of the session.
+    @param index: (pmsco.dispatch.CalcID or dict)
+        calculation index.
+        in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
+        '_model', '_scan', '_domain', '_emit', '_region'.
+        extra values in the dictionary are ignored.
+        undefined indices must be -1.
+    @param result: (dict) dictionary containing the parameter values and the '_rfac' result.
+        may also contain the special values '_gen', '_particle', '_timestamp'.
+        '_gen' and '_particle' are integers and default to None.
+        '_timestamp' can be numeric (seconds since jan 1, 1970)
+        or an object that implements a timestamp function like datetime.datetime.
+        it defaults to the current (local) time.
+
+    @return: (orm.Result) updated Results object.
+    """
+    assert isinstance(model_obj, orm.Model)
+
+    result_dict = util.special_params(result)
+    result_dict.update(util.special_params(index))
+
+    q = session.query(orm.Result)
+    q = q.filter(orm.Result.model == model_obj)
+    q = q.filter(orm.Result.scan == result_dict['scan'])
+    q = q.filter(orm.Result.domain == result_dict['domain'])
+    q = q.filter(orm.Result.emit == result_dict['emit'])
+    q = q.filter(orm.Result.region == result_dict['region'])
+
+    result_obj = q.one_or_none()
+    if result_obj is None:
+        result_obj = orm.Result()
+        result_obj.model = model_obj
+        result_obj.scan = result_dict['scan']
+        result_obj.domain = result_dict['domain']
+        result_obj.emit = result_dict['emit']
+        result_obj.region = result_dict['region']
+        session.add(result_obj)
+
+    result_obj.rfac = result_dict['rfac']
+    try:
+        result_obj.timestamp = result_dict['timestamp'].timestamp()
+    except KeyError:
+        result_obj.timestamp = datetime.datetime.now().timestamp()
+    except AttributeError:
+        result_obj.timestamp = result_dict['timestamp']
+    try:
+        result_obj.secs = result_dict['secs']
+    except KeyError:
+        pass
+
+    return result_obj
+
+
+def store_param_values(session, model_obj, result, delta=None):
+    """
+    add or update parameter values of a model in the database.
+
+    the method updates the ParamValues table.
+
+    @param session: (sqlalchemy.Session) database session.
+        when updating an existing model, previous changes must have been committed,
+        else the result entry may not be found.
+        this function flushes the session at the end.
+        it does not commit the transaction.
+    @param model_obj: (orm.Model) model object that is already part of the session.
+    @param result: (dict) dictionary containing the parameter values.
+        the parameter names must exist in the Params table and in the self._model_params dictionary.
+        special values (with a leading underscore) are ignored.
+        extra parameters may raise a KeyError.
+    @param delta: (dict) dictionary containing the delta values.
+        the keys must correspond to model keys in the result dictionary.
+        this argument is optional.
+
+    @return: None
+
+    @raise: KeyError if a parameter key is not registered.
+    """
+    assert isinstance(model_obj, orm.Model)
+
+    for key in util.regular_params(result).keys():
+        pv = orm.ParamValue()
+        pv.model = model_obj
+        pv.param_key = key
+        pv.value = result[key]
+        try:
+            pv.delta = delta[key]
+        except (TypeError, KeyError):
+            pass
+        session.add(pv)
+    session.flush()
+
+
+def ingest_results_file(session, project, job, filename):
+    """
+    import a results file into the database.
+
+    this is a sub-method used by ingest().
+
+    a job entry with the given id must exist,
+    but there must be no model entries referencing the job.
+    it is not possible to update existing models, results or parameter values using this method.
+    instead, you have to delete the job (which also deletes all dependent entries)
+    and re-import the results.
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+        the session is flushed but not committed at the end of this function.
+    @param project: orm.Project object or project name or project id.
+    @param job: orm.Job object or job name or job id.
+    @param filename: path and name of the results file.
+
+    @return: None.
+
+    @raise ValueError if the job already has model entries.
+    """
+    job = common.get_job(session, project, job)
+    assert isinstance(job, orm.Job)
+
+    data = np.atleast_1d(np.genfromtxt(filename, names=True))
+
+    try:
+        unique_models, unique_index = np.unique(data['_model'], True)
+    except ValueError:
+        unique_models = np.array([0])
+        unique_index = np.array([0])
+    unique_data = data[unique_index]
+
+    special_params = util.special_params(data.dtype.names)
+
+    model_objs = {}
+    # iterate on models
+    for _data in unique_data:
+        try:
+            _model = _data['_model']
+        except ValueError:
+            _model = unique_models[0]
+        model = orm.Model(job=job, model=_model)
+        if 'gen' in special_params:
+            model.gen = _data['_gen']
+        if 'particle' in special_params:
+            model.particle = _data['_particle']
+        session.add(model)
+        model_objs[_model] = model
+        for key, value in util.regular_params(_data).items():
+            model.values[key] = value
+        session.flush()
+
+    # iterate on results
+    for _data in data:
+        try:
+            _model = _data['_model']
+        except ValueError:
+            _model = unique_models[0]
+        result_entry = {'model': None,
+                        'scan': -1,
+                        'domain': -1,
+                        'emit': -1,
+                        'region': -1,
+                        'rfac': None}
+        result_entry.update(util.special_params(_data))
+        result_entry['model'] = model_objs[_model]
+        result = orm.Result()
+        for key, value in result_entry.items():
+            setattr(result, key, value)
+        session.add(result)
+
+    session.flush()
+
+
+def ingest_job_metadata(session, **kwargs):
+    """
+    ingest job metadata
+
+    @param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
+        the session is flushed but not committed at the end of this function.
+
+    @param kwargs: dictionary of function arguments.
+        the dictionary contains the following values.
+        all arguments are required unless noted.
+        @arg 'resultsfile' (required) name of the .tasks.dat results file.
+        @arg 'project' (required) unique name of the project.
+        @arg 'code' (optional) name of the project code.
+        @arg 'job' (required) name of the calculation job. job name must not exist for the project yet.
+        @arg 'mode' (required) pmsco optimization mode.
+        @arg 'machine' (optional) name of the machine where the job ran.
+        @arg 'processes' (optional) number of processes.
+        @arg 'hours' (optional) run time in hours (wall time).
+        @arg 'git_hash' (optional) git hash of the code revision.
+        @arg 'datetime' (datetime.datetime) time stamp (optional).
+            if not specified, the argument defaults to the time stamp of the results file.
+            hint: the constructor of a datetime object is
+            `datetime.datetime(year, month, day, hour, minute, second)`.
+        @arg 'description' (optional) meaningful description of the calculation job, up to the user.
+        @arg 'jobtags' (dict, optional) key=value tags to be associated with the job
+
+    @return (orm.Project, orm.Job) orm objects of the inserted records.
+
+    @raise sqlalchemy.exc.IntegrityError if the job already exists in the database.
+
+    """
+
+    if 'datetime' not in kwargs:
+        rf = Path(kwargs['resultsfile'])
+        kwargs['datetime'] = datetime.datetime.fromtimestamp(rf.stat().st_mtime)
+
+    project = common.register_project(session, kwargs['project'], kwargs['code'])
+    job = common.register_job(session, project, kwargs['job'], **kwargs)
+    try:
+        common.register_job_tags(session, job, kwargs['jobtags'])
+    except KeyError:
+        pass
+
+    session.flush()
+    return project, job
+
+
+def ingest_job_results(**kwargs):
+    """
+    import results from a calculation job.
+
+    this function contains all steps necessary to import the results (tasks.dat)
+    from a calculation job into a database.
+    it registers the project and job, and imports the results data.
+    the project may exist in the database, the job must not exist (raises an exception).
+
+    arguments can be specified as dict (**d) or in keyword=value form.
+
+    @param kwargs: dictionary of function arguments.
+        the dictionary contains the following values.
+        all arguments are required unless noted.
+        @arg 'workdir' (optional) path to the working directory.
+            the working directory of the operating system is changed.
+            this is the root for relative paths of the database and results files.
+            if not specified, the working directory is unchanged.
+        @arg 'dbfile' (required) name of the database file.
+        @arg 'project' (required) unique name of the project.
+        @arg 'code' (optional) name of the project code.
+        @arg 'job' (required) name of the calculation job. job name must not exist for the project yet.
+        @arg 'mode' (required) pmsco optimization mode.
+        @arg 'machine' (optional) name of the machine where the job ran.
+        @arg 'processes' (optional) number of processes.
+        @arg 'hours' (optional) run time in hours (wall time).
+        @arg 'git_hash' (optional) git hash of the code revision.
+        @arg 'datetime' (datetime.datetime) time stamp (optional).
+            if not specified, the argument defaults to the time stamp of the results file.
+            hint: the constructor of a datetime object is
+            `datetime.datetime(year, month, day, hour, minute, second)`.
+        @arg 'description' (optional) meaningful description of the calculation job, up to the user.
+        @arg 'jobtags' (dict, optional) key=value tags to be associated with the job
+        @arg 'resultsfile' (required) name of the .tasks.dat results file.
+
+    @return dict with 'project_id' and 'job_id'.
+        these are the database ids of the project and job records.
+
+    @raise sqlalchemy.exc.IntegrityError if the job already exists in the database.
+    """
+    try:
+        wd = Path(kwargs['workdir'])
+    except KeyError:
+        pass
+    else:
+        wd.cwd()
+
+    dba = DatabaseAccess()
+    dba.connect(kwargs['dbfile'])
+    with dba.session() as session:
+        project, job = ingest_job_metadata(session, **kwargs)
+        ingest_results_file(session, project, job, kwargs['resultsfile'])
+        session.commit()
+        ref = {'project_id': project.id, 'job_id': job.id}
+
+    return ref
--- a/pmsco/database/orm.py
+++ b/pmsco/database/orm.py
@@ -0,0 +1,746 @@
+"""
+@package pmsco.database.orm
+pmsco results database object-relational mapper
+
+this module declares the database schema and object mapping.
+the object-relational mapping uses
+the [sqlalchemy framework](https://docs.sqlalchemy.org/en/13/orm/tutorial.html).
+the database backend is sqlite3.
+
+for examples how to use the database, see the ingest module and the unit tests.
+
+@author Matthias Muntwiler
+
+@copyright (c) 2021 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+
+"""
+import datetime
+
+from sqlalchemy import create_engine
+from sqlalchemy import event
+from sqlalchemy import Column, Sequence, ForeignKey
+from sqlalchemy import Boolean, Integer, Float, String, DateTime
+from sqlalchemy.engine import Engine
+from sqlalchemy.ext.associationproxy import association_proxy
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import object_session
+from sqlalchemy.orm import relationship
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.orm import validates
+from sqlalchemy.orm.collections import attribute_mapped_collection
+from sqlalchemy.orm.exc import NoResultFound
+
+import numpy as np
+import sqlite3
+from pmsco.dispatch import CalcID
+import pmsco.database.util as db_util
+
+# make sure sqlite understands numpy data types
+sqlite3.register_adapter(np.float64, float)
+sqlite3.register_adapter(np.float32, float)
+sqlite3.register_adapter(np.int64, int)
+sqlite3.register_adapter(np.int32, int)
+
+
+Base = declarative_base()
+engine = None
+Session = sessionmaker()
+
+
+class Project(Base):
+    """
+    database object representing a project
+
+    @note there is an implicit constructor with keyword arguments that correspond to the attributes.
+    """
+    ## @var id
+    # (int, primary key) database id of the project
+
+    ## @var name
+    # project name, should be short, must be unique within a project
+
+    ## @var jobs
+    # collection of related jobs
+    #
+    # defines the relationship between Project and Job objects.
+    # the instance attribute maps job names (str) to Job objects.
+
+    __tablename__ = "Projects"
+    id = Column(Integer, Sequence('project_id_seq'), primary_key=True)
+    name = Column(String(50, collation='NOCASE'), nullable=False, unique=True)
+    code = Column(String(50, collation='NOCASE'))
+    jobs = relationship('Job', backref='project',
+                        collection_class=attribute_mapped_collection('name'),
+                        cascade="all, delete, delete-orphan", lazy='joined')
+
+    def __repr__(self):
+        return f'Project({repr(self.name), repr(self.code)})'
+
+
+class Job(Base):
+    """
+    database object representing a calculation job
+
+    a job object holds several descriptive values of a calculation job.
+    it also refers to a project.
+
+    tags are key-value pairs that describe the job in standardized terms.
+    they can provide a consistent classification scheme across jobs and projects.
+    for example, they can store special project arguments that may be important
+    to distinguish calculations in different stages or contexts.
+
+    the class also defines mapping and proxy objects that simplify the use of tags and models.
+    explicit creation of Tag and JobTag objects is then not necessary.
+
+    @attention after modifying the mapped collections job_tags, tags or models
+        make sure to call flush() or commit() on the session
+        before accessing those mappings in other objects
+        else integrity errors may occur!
+    """
+
+    ## @var id
+    # (int, primary key) database id of the job
+
+    ## @var project_id
+    # (int, foreign key) database id of the related project
+
+    ## @var name
+    # job name, should be short, must be unique within a project
+
+    ## @var mode
+    # pmsco calculation mode
+
+    ## @var machine
+    # name of the computing facility
+
+    ## @var git_hash
+    # git hash of the used code if under version control
+
+    ## @var datetime
+    # start date and time of the job, ISO format (yyyy-mm-dd hh:mm:ss)
+
+    ## @var processes
+    # number of processes
+
+    ## @var hours
+    # job run time (wall time) in hours
+
+    ## @var description
+    # up to the user
+
+    ## @var job_tags
+    # collection of related job tags
+    #
+    # defines the relationship between Job and JobTag objects.
+    # the instance attribute maps tag keys (str) to JobTag objects.
+
+    ## @var tags
+    # collection of tags
+    #
+    # maps tag keys (str) to tag values (str).
+    # this is an association proxy of job_tags.
+
+    ## @var models
+    # collection of related models
+    #
+    # defines the relationship between Job and Model objects.
+    # the instance attribute maps model numbers to Model objects
+
+    __tablename__ = "Jobs"
+    id = Column(Integer, Sequence('job_id_seq'), primary_key=True)
+    project_id = Column(Integer, ForeignKey('Projects.id'), index=True)
+    name = Column(String(50, collation='NOCASE'), nullable=False)
+    mode = Column(String(20, collation='NOCASE'))
+    machine = Column(String(50, collation='NOCASE'))
+    git_hash = Column(String(50, collation='NOCASE'))
+    datetime = Column(String(50))
+    processes = Column(Integer)
+    hours = Column(Float)
+    description = Column(String(200, collation='NOCASE'))
+
+    job_tags = relationship('JobTag', back_populates='job',
+                            collection_class=attribute_mapped_collection('tag_key'),
+                            cascade="all, delete, delete-orphan")
+    # mapping tag_key -> tag_value
+    tags = association_proxy('job_tags', 'value', creator=lambda k, v: JobTag(key=k, value=v))
+
+    models = relationship('Model', back_populates='job',
+                          collection_class=attribute_mapped_collection('model'),
+                          cascade="all, delete, delete-orphan")
+
+    def __repr__(self):
+        try:
+            project_name = repr(self.project.name)
+        except AttributeError:
+            project_name = None
+        try:
+            job_name = repr(self.name)
+        except AttributeError:
+            job_name = None
+        return f'Job({project_name}, {job_name}, {repr(self.mode)})'
+
+
+class Tag(Base):
+    """
+    database object representing a tag name
+    """
+
+    ## @var id
+    # (int, primary key) database id of the tag name
+
+    ## @var key
+    # tag name/key, should be short, must be unique
+
+    ## @var tag_jobs
+    # collection of related JobTag objects
+    #
+    # defines the relationship between Tag and JobTag objects.
+
+    __tablename__ = "Tags"
+    id = Column(Integer, Sequence('tag_id_seq'), primary_key=True)
+    key = Column(String(20, collation='NOCASE'), nullable=False, unique=True)
+
+    tag_jobs = relationship('JobTag', back_populates='tag', cascade="all, delete, delete-orphan")
+
+    def __init__(self, key):
+        self.key = key
+
+    def __repr__(self):
+        return f'Tag({repr(self.key)})'
+
+
+class JobTag(Base):
+    """
+    association object class for job tags
+
+    Job - Tag is a many-to-many relationship built using this association class.
+    by using the dictionary-like Job.tags proxy, explicit creation of association objects can be avoided.
+
+    the class applies the
+    [UniqueObjectValidateOnPending pattern](https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObjectValidatedOnPending)
+    to look up existing tags in the database when a Tag object is needed and only the key is given.
+    """
+    ## @var id
+    # (int, primary key) database id of the job tag
+
+    ## @var tag_id
+    # (int, foreign key) database id of the related tag name
+
+    ## @var job_id
+    # (int, foreign key) database id of the related job
+
+    ## @var value
+    # value (str) of the job tag
+
+    ## @var tag
+    # associated Tag object
+    #
+    # defines the relationship between JobTag and Tag objects
+
+    ## @var job
+    # associated Job object
+    #
+    # defines the relationship between JobTag and Job objects
+
+    ## @var tag_key
+    # key (name) of the asscoiated Tag object
+    #
+    # this is an association proxy that provides direct access to tag.key
+    # or links to or creates a Tag object behind the scenes.
+
+    __tablename__ = "JobTags"
+    id = Column(Integer, Sequence('jobtag_id_seq'), primary_key=True)
+    tag_id = Column(Integer, ForeignKey('Tags.id'), index=True)
+    job_id = Column(Integer, ForeignKey('Jobs.id'), index=True)
+    value = Column(String(200, collation='NOCASE'))
+
+    tag = relationship("Tag", back_populates="tag_jobs")
+    job = relationship("Job", back_populates="job_tags")
+    tag_key = association_proxy("tag", "key")
+
+    def __init__(self, key=None, value=None):
+        if key is not None:
+            self.tag_key = key
+        self.value = value
+
+    @validates("tag")
+    def _validate_tag(self, key, value):
+        """
+        receive the event that occurs when `jobtag.tag` is set.
+
+        if the object is present in a Session, then make sure it's the Tag
+        object that we looked up from the database.
+
+        otherwise, do nothing and we'll fix it later when the object is
+        put into a Session.
+
+        @param key: attribute name, i.e., 'tag'
+        @param value: a JobTag object
+        """
+        sess = object_session(self)
+        if sess is not None:
+            return _setup_tag(sess, value)
+        else:
+            return value
+
+
+@event.listens_for(Session, "transient_to_pending")
+def _validate_tag(session, object_):
+    """
+    receive a JobTag object when it gets attached to a Session to correct its unique Tag relationship.
+    """
+    if isinstance(object_, JobTag):
+        if object_.tag is not None and object_.tag.id is None:
+            old_tag = object_.tag
+            new_tag = _setup_tag(session, object_.tag)
+            if new_tag is not old_tag:
+                if old_tag in session:
+                    session.expunge(old_tag)
+                object_.tag = new_tag
+
+
+def _setup_tag(session, tag_object):
+    """
+    given a Session and a Tag object, return the correct Tag object from the database.
+    """
+    with session.no_autoflush:
+        try:
+            return session.query(Tag).filter_by(key=tag_object.key).one()
+        except NoResultFound:
+            return tag_object
+
+
+class Model(Base):
+    """
+    database object representing a model
+
+    the object holds the model number (which is unique within the context of a single job only),
+    the diagnostic generation and particle values, and refers to the job where the model is used.
+
+    the class also defines relationship properties that simplify access to referenced objects.
+    for instance, parameter values can be accessed via the values['param_key'] mapping proxy.
+
+    examples:
+    ~~~~~~{.py}
+    model = Model(model=10, gen=5, particle=2)
+    model.job = job1_object
+
+    model.values['dA'] = 25.6
+    model.deltas['dA'] = 0.1
+
+    pv = ParamValue(value=39.0, delta=-0.3)
+    model.param_values['dB'] = pv
+
+    result = Result(calc_id=calc_id, rfac=0.77)
+    model.results.append(result)
+    ~~~~~~
+
+    @attention after modifying the mapped collections param_values, values or deltas,
+        make sure to call flush() or commit() on the session
+        before accessing those mappings in another model
+        else integrity errors may occur!
+    """
+
+    ## @var id
+    # (int, primary key) database id of the model
+
+    ## @var job_id
+    # (int, foreign key) database id of the related job
+
+    ## @var model
+    # (int) model number as used in the task index of pmsco
+    #
+    # @note the model number is not unique in the database as multiple jobs can produce same task indices.
+    # the unique number, self.id is not used in pmsco code.
+
+    ## @var gen
+    # (int) generation number assigned by some optimizers. defaults to None.
+
+    ## @var particle
+    # (int) particle number assigned by some optimizers. defaults to None.
+
+    ## @var job
+    # associated Job
+    #
+    # defines the relationship between Model and Job objects.
+
+    ## @var results
+    # collection of Result objects
+    #
+    # defines the relationship between Model and Result objects.
+
+    ## @var param_values
+    # collection of ParamValue objects
+    #
+    # defines the relationship between Model and ParamValue objects.
+    # the instance attribute maps parameter keys to ParamValue objects.
+
+    ## @var values
+    # collection of parameter values
+    #
+    # this is an association proxy that maps parameter keys to parameter values (ParamValue.value).
+    # ParamValue objects are accessed and created behind the scene.
+
+    ## @var deltas
+    # collection of delta values
+    #
+    # this is an association proxy that maps parameter keys to parameter deltas (ParamValue.delta.
+    # ParamValue objects are accessed and created behind the scene.
+
+    __tablename__ = "Models"
+    id = Column(Integer, Sequence('model_id_seq'), primary_key=True)
+    job_id = Column(Integer, ForeignKey('Jobs.id'), index=True)
+    model = Column(Integer, index=True)
+    gen = Column(Integer)
+    particle = Column(Integer)
+
+    job = relationship("Job", back_populates="models")
+    results = relationship('Result', back_populates='model', cascade="all, delete, delete-orphan")
+    # mapping param_key -> ParamValue object
+    param_values = relationship('ParamValue', back_populates='model',
+                                collection_class=attribute_mapped_collection('param_key'),
+                                cascade="all, delete, delete-orphan")
+
+    # mapping param_key -> param_value
+    values = association_proxy('param_values', 'value', creator=lambda k, v: ParamValue(key=k, value=v))
+    deltas = association_proxy('param_values', 'delta', creator=lambda k, v: ParamValue(key=k, delta=v))
+
+    def __repr__(self):
+        return f'Model(id={repr(self.id)}, job_id={repr(self.job_id)}, model={repr(self.model)})'
+
+    def as_dict(self):
+        """
+        object properties in a dictionary.
+
+        the dictionary keys correspond to the column names of numpy arrays.
+        the mapping db_field -> column name is declared in pmsco.database.util.DB_SPECIAL_PARAMS
+
+        @return: (dict)
+        """
+        d = {'_db_model_id': self.id}
+        for attr, key in db_util.DB_SPECIAL_PARAMS.items():
+            try:
+                d[key] = getattr(self, attr)
+            except AttributeError:
+                pass
+        return d
+
+
+class Result(Base):
+    """
+    database object representing a calculation result
+
+    the result object holds the calculated R-factor per job and calculation index.
+
+    the calculation index (CalcID) is not unique in the database because it may contain results from multiple jobs.
+    thus, the object links to a Model object which is unique.
+    the calc_id property can be used to reconstruct a CalcID.
+    """
+
+    ## @var id
+    # (int, primary key) database id of the result
+
+    ## @var model_id
+    # (int, foreign key) database id of the related model
+
+    ## @var model
+    # associated Model object
+    #
+    # defines the relationship between Result and Model objects.
+    #
+    # @attention do not confuse the Result.model and Model.model attributes of same name!
+    # to obtain the model number to which a result belongs, use Result.model.model.
+
+    ## @var scan
+    # (int) scan index as used in the calculations
+
+    ## @var domain
+    # (int) domain index as used in the calculations
+
+    ## @var emit
+    # (int) emitter index as used in the calculations
+
+    ## @var region
+    # (int) region index as used in the calculations
+
+    ## @var rfac
+    # (float) calculated R-factor
+
+    ## @var timestamp
+    # (float) end date and time of this calculation task
+    #
+    # the float value represents seconds since jan 1, 1970 (datetime.datetime.timestamp).
+    # the datetime proxy converts to and from python datetime.datetime.
+
+    ## @var datetime
+    # (datetime.datetime) end date and time of this calculation task
+    #
+    # this is a conversion proxy for timestamp.
+
+    ## @var secs
+    # (float) total duration of the calculation task in seconds
+    #
+    # total cpu time necessary to get this result (including child tasks) in seconds.
+
+    ## @var calc_id
+    # (CalcID) calculation task index
+    #
+    # conversion proxy for the task index components.
+    #
+    # on assignment, the scan, domain, emit and region attributes are updated.
+    # it does not update the model index as it is not stored by this object!
+    # the model index must be set separately in the linked Model object.
+
+    __tablename__ = "Results"
+    id = Column(Integer, Sequence('result_id_seq'), primary_key=True)
+    model_id = Column(Integer, ForeignKey('Models.id'), index=True)
+    scan = Column(Integer, index=True)
+    domain = Column(Integer, index=True)
+    emit = Column(Integer, index=True)
+    region = Column(Integer, index=True)
+    rfac = Column(Float)
+    timestamp = Column(Float)
+    secs = Column(Float)
+
+    model = relationship("Model", back_populates="results")
+
+    def __init__(self, calc_id=None, scan=None, domain=None, emit=None, region=None,
+                 rfac=None, timestamp=None, secs=None):
+        if calc_id is not None:
+            self.calc_id = calc_id
+        else:
+            self.scan = scan
+            self.domain = domain
+            self.emit = emit
+            self.region = region
+        self.rfac = rfac
+        self.timestamp = timestamp
+        self.secs = secs
+
+    def __repr__(self):
+        return f'Result(model_id={repr(self.model_id)}, calc_id={repr(self.calc_id)}, rfac={repr(self.rfac)})'
+
+    @property
+    def calc_id(self):
+        return CalcID(self.model.model, self.scan, self.domain, self.emit, self.region)
+
+    @calc_id.setter
+    def calc_id(self, calc_id):
+        self.scan = calc_id.scan
+        self.domain = calc_id.domain
+        self.emit = calc_id.emit
+        self.region = calc_id.region
+
+    @property
+    def datetime(self):
+        return datetime.datetime.fromtimestamp(self.timestamp)
+
+    @datetime.setter
+    def datetime(self, value):
+        self.timestamp = value.timestamp()
+
+    def as_dict(self):
+        """
+        object properties in a dictionary.
+
+        the dictionary keys correspond to the column names of numpy arrays.
+        the mapping db_field -> column name is declared in pmsco.database.util.D.B_SPECIAL_PARAMS
+
+        @return: (dict)
+        """
+        d = {'_db_result_id': self.id}
+        for attr, key in db_util.DB_SPECIAL_PARAMS.items():
+            try:
+                d[key] = getattr(self, attr)
+            except AttributeError:
+                pass
+        return d
+
+
+class Param(Base):
+    """
+    database object representing a parameter
+
+    the parameter object holds the name (or key) of a calculation parameter.
+
+    explicit creation of parameter objects can be avoided by using the mappings of the Model class.
+    """
+    ## @var id
+    # (int, primary key) database id of the parameter name
+
+    ## @var key
+    # parameter name/key as used in calculations, should be very short, must be unique
+
+    ## @var param_values
+    # collection of related ParamValue objects
+    #
+    # defines the relationship between Param and ParamValue objects.
+
+    __tablename__ = "Params"
+    id = Column(Integer, Sequence('param_id_seq'), primary_key=True)
+    key = Column(String(20, collation='NOCASE'), nullable=False, unique=True)
+
+    param_values = relationship('ParamValue', back_populates='param', cascade="all, delete, delete-orphan")
+
+    def __init__(self, key):
+        self.key = key
+
+    def __repr__(self):
+        return f'Param({repr(self.key)})'
+
+
+class ParamValue(Base):
+    """
+    association object class for parameter values
+
+    Model - Param is a many-to-many relationship built using this association class.
+    by using the dictionary-like Model.values and Model.deltas proxies,
+    explicit creation of association objects can be avoided.
+
+    the class applies the
+    [UniqueObjectValidateOnPending pattern](https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObjectValidatedOnPending)
+    to look up existing params in the database when a Param object is needed and only the key is given.
+    """
+    ## @var id
+    # (int, primary key) database id of the parameter value
+
+    ## @var param_id
+    # (int, foreign key) database id of the related parameter name
+
+    ## @var model_id
+    # (int, foreign key) database id of the related model
+
+    ## @var value
+    # (float) numeric value of the parameter
+
+    ## @var delta
+    # (float) numeric delta value of the parameter (reported by some optimizers)
+
+    ## @var param
+    # associated Param object
+    #
+    # defines the relationship between ParamValue and Param objects
+
+    ## @var model
+    # associated Model object
+    #
+    # defines the relationship between ParamValue and Model objects
+
+    ## @var param_key
+    # key (name) of the asscoiated Param object
+    #
+    # this is an association proxy that provides direct access to param.key.
+    # it accesses or creates Param objects behind the scenes.
+
+    __tablename__ = "ParamValues"
+    id = Column(Integer, Sequence('paramvalue_id_seq'), primary_key=True)
+    param_id = Column(Integer, ForeignKey('Params.id'), index=True)
+    model_id = Column(Integer, ForeignKey('Models.id'), index=True)
+    value = Column(Float)
+    delta = Column(Float)
+
+    param = relationship("Param", back_populates="param_values")
+    model = relationship("Model", back_populates="param_values")
+
+    param_key = association_proxy('param', 'key')
+
+    def __init__(self, model=None, param=None, key=None, value=None, delta=None):
+        if model is not None:
+            self.model = model
+        if param is not None:
+            self.param = param
+        elif key is not None:
+            self.param_key = key
+        self.value = value
+        self.delta = delta
+
+    @validates("param")
+    def _validate_param(self, key, value):
+        """
+        receive the event that occurs when `paramvalue.param` is set.
+
+        if the object is present in a Session, then make sure it's the Param
+        object that we looked up from the database.
+
+        otherwise, do nothing and we'll fix it later when the object is put into a Session.
+        """
+        sess = object_session(self)
+        if sess is not None:
+            return _setup_param(sess, value)
+        else:
+            return value
+
+
+@event.listens_for(Session, "transient_to_pending")
+def _validate_param(session, object_):
+    """
+    receive a ParamValue object when it gets attached to a Session to correct its unique Param relationship.
+    """
+    if isinstance(object_, ParamValue):
+        if object_.param is not None and object_.param.id is None:
+            old_param = object_.param
+            new_param = _setup_param(session, object_.param)
+            if new_param is not old_param:
+                if old_param in session:
+                    session.expunge(old_param)
+                object_.param = new_param
+
+
+def _setup_param(session, param_object):
+    """
+    given a Session and a Tag object, return the correct Tag object from the database.
+    """
+    with session.no_autoflush:
+        try:
+            return session.query(Param).filter_by(key=param_object.key).one()
+        except NoResultFound:
+            return param_object
+
+
+@event.listens_for(Engine, "connect")
+def set_sqlite_pragma(dbapi_connection, connection_record):
+    """
+    set sqlite pragmas.
+
+    make sure sqlite enforces relational integrity.
+
+    @param dbapi_connection:
+    @param connection_record:
+    @return:
+    """
+    cursor = dbapi_connection.cursor()
+    cursor.execute("PRAGMA foreign_keys=ON")
+    cursor.close()
+
+
+def sqlite_link(path=None):
+    """
+    format the sqlalchemy link to an sqlite3 database.
+
+    @param path: file path. if empty, an in-memory database is created.
+    @return: (str) database link for the sqlalchemy engine.
+    """
+    if not path:
+        path = ':memory:'
+    return f'sqlite:///{path}'
+
+
+def connect(db_link):
+    """
+    connect to the database.
+
+    create the sqlalchemy engine and bind the session maker.
+    the database engine and session maker are global.
+    this function should be called only once in a process.
+
+    @param db_link: (str) database link expected by the sqlalchemy engine
+    @return: None
+    """
+    global engine
+    engine = create_engine(db_link, echo=False)
+    Base.metadata.create_all(engine)
+    Session.configure(bind=engine)
--- a/pmsco/database/project.py
+++ b/pmsco/database/project.py
@@ -0,0 +1,158 @@
+"""
+@package pmsco.database.project
+wrapper class for project-specific database operations
+
+
+usage:
+~~~~~~{.py}
+db = DatabaseAccess()
+db.connect("file.db")
+with db.session():
+    # database access here
+    # ...
+    # commit transaction
+    session.commit()
+    # continue in new transaction
+    # ...
+
+# at the end of the context
+# the session is closed and orm objects are detached from the database.
+~~~~~~
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2016-21 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+import datetime
+import logging
+import socket
+from pmsco.database.access import DatabaseAccess
+import pmsco.database.common as db_common
+import pmsco.database.ingest as db_ingest
+import pmsco.database.query as db_query
+from pmsco.dispatch import mpi_size
+
+logger = logging.getLogger(__name__)
+
+
+class ProjectDatabase(DatabaseAccess):
+    """
+    wrapper class for project specific database operations
+
+    the purpose of this class is to bundle all specific code and run-time information
+    for database access of a running calculation job.
+
+    after calling ingest_project_metadata(),
+    the class object stores the persistent project and job identifiers.
+    the other methods provide convenient wrappers so that database code can be kept minimal in the project.
+
+    usage:
+    ~~~~~~{.py}
+    db = ProjectDatabase()
+    db.connect('file.db')
+    db.ingest_project_metadata(...)
+    for result in results:
+        db.ingest_result(result...)
+    ~~~~~~
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.db_project_id = None
+        self.db_job_id = None
+
+    def ingest_project_metadata(self, project):
+        """
+        ingest project metadata into the database
+
+        @param project: pmsco.project.Project object
+
+        @return: None
+        """
+        with self.session() as session:
+            db_project = db_common.register_project(session=session,
+                                                    name=project.project_name,
+                                                    code=project.__module__,
+                                                    allow_existing=True)
+
+            db_job = db_common.register_job(session=session,
+                                            project=db_project,
+                                            job_name=project.job_name,
+                                            allow_existing=False,
+                                            mode=project.mode,
+                                            machine=socket.gethostname(),
+                                            git_hash=project.git_hash,
+                                            datetime=datetime.datetime.now(),
+                                            processes=mpi_size,
+                                            hours=project.timedelta_limit.total_seconds() / 3600.,
+                                            description=project.description)
+
+            db_common.register_job_tags(session, db_job, project.job_tags)
+            db_common.register_params(session, project.model_space.start.keys())
+            session.commit()
+
+            self.db_project_id = db_project.id
+            self.db_job_id = db_job.id
+
+    def ingest_result(self, index, result, delta):
+        """
+        add or update a result in the database.
+
+        the method updates the Models, Results and ParamValues tables.
+
+        the model is identified by self.job_id and index.model.
+        the result is identified by self.job_id and index.
+        if the model or result exists in the database, it is updated.
+
+        @param index: (pmsco.dispatch.CalcID or dict)
+            calculation index.
+            in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
+            '_model', '_scan', '_domain', '_emit', '_region'.
+            extra values in the dictionary are ignored.
+            undefined indices must be -1.
+
+        @param result: (dict) dictionary containing the parameter values and the '_rfac' result.
+            may also contain the special values '_gen', '_particle', '_timestamp'.
+            '_gen' and '_particle' are integers and default to None.
+            '_timestamp' can be numeric (seconds since jan 1, 1970)
+            or an object that implements a timestamp function like datetime.datetime.
+            it defaults to the current (local) time.
+
+        @param delta: (dict) dictionary containing the delta values.
+            the keys must correspond to model keys in the result dictionary.
+            this argument is optional.
+        """
+        assert self.db_project_id is not None
+        assert self.db_job_id is not None
+        with self.session() as session:
+            job_obj = db_common.get_job(session, self.db_project_id, self.db_job_id)
+            model_obj = db_ingest.store_model(session, job_obj, index, result)
+            db_ingest.store_result_data(session, model_obj, index, result)
+            db_ingest.store_param_values(session, model_obj, result, delta)
+            session.commit()
+
+    def query_best_task_models(self, level, count):
+        """
+        query N best models per task.
+
+        this is a wrapper for pmsco.database.query.query_best_task_models().
+        in addition to the wrapped function, it opens a session and uses the registered db_job_id.
+
+        this query is used by the file tracker to determine the models to keep.
+
+        @param level: level up to which to query.
+            the level can be specified by level name (str) or numeric index (0..4).
+            if it is scan (equivalent to 1), the method queries the model and scan levels.
+        @param count: number of models to query per task.
+
+        @return set of matching model numbers (model index, Models.model field).
+        """
+        with self.session() as session:
+            models = db_query.query_best_task_models(session, self.db_job_id, level, count)
+
+        return models
--- a/pmsco/database/query.py
+++ b/pmsco/database/query.py
@@ -0,0 +1,470 @@
+"""
+@package pmsco.database.query
+specialized query functions for the pmsco database
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2016-21 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+import logging
+import numpy as np
+from sqlalchemy import func
+import pmsco.database.orm as orm
+import pmsco.database.util as util
+import pmsco.dispatch as dispatch
+
+logger = logging.getLogger(__name__)
+
+
+def query_newest_job(session):
+    """
+    retrieve the entry of the newest job
+    
+    the newest entry is determined by the datetime field.
+
+    @param session:
+
+    @return: pmsco.database.orm.Job object
+    """
+    q = session.query(orm.Job)
+    q = q.order_by(orm.Job.datetime.desc(), orm.Job.id.desc())
+    job = q.first()
+    return job
+
+
+def query_model(session, job_id=None, model_id=None, model=None):
+    """
+    retrieve model parameters and control variables from the database.
+
+    @param model_id: id of the model in the database.
+
+    @return: (dict, dict) value dictionary and delta dictionary.
+        dictionary keys are parameter values.
+        the special value '_model' is included.
+    """
+    query = session.query(orm.ParamValue)
+    if job_id is not None:
+        query = query.filter(orm.Job.id == job_id)
+    if model_id is not None:
+        query = query.filter(orm.Model.id == model_id)
+    if model is not None:
+        query = query.filter(orm.Model.model == model)
+    result = query.all()
+
+    param_value = {}
+    param_delta = {}
+    model_obj = None
+    for pv in result:
+        if model_obj is None:
+            model_obj = pv.model
+        param_value[pv.param.key] = pv.value
+        param_delta[pv.param.key] = pv.delta
+
+    param_value['_model_id'] = model_obj.id
+    param_value['_model'] = model_obj.model
+    param_value['_gen'] = model_obj.gen
+    param_value['_particle'] = model_obj.particle
+    param_delta['_model_id'] = model_obj.id
+    param_delta['_model'] = model_obj.model
+    param_delta['_gen'] = model_obj.gen
+    param_delta['_particle'] = model_obj.particle
+
+    return param_value, param_delta
+
+
+def query_results(session, job_id):
+    query = session.query(orm.Result)
+    query = query.join(orm.Model)
+    query = query.filter(orm.Job == job_id)
+    return None
+
+
+def query_tasks(session, job_id):
+    """
+    query the task index used in a calculation job.
+
+    this query neglects the model index
+    and returns the unique tuples (-1, scan, domain, emit, region).
+
+    @param job_id: (int) id of the associated Jobs entry.
+
+    @return list of pmsco.dispatch.CalcID tuples of task indices.
+        the model attribute is -1 in all elements.
+    """
+    query = session.query(orm.Result.scan, orm.Result.domain, orm.Result.emit, orm.Result.region)
+    query = query.join(orm.Model)
+    query = query.filter(orm.Model.job_id == job_id)
+    query = query.distinct()
+    query = query.order_by(orm.Result.scan, orm.Result.domain, orm.Result.emit, orm.Result.region)
+    results = query.all()
+
+    output = []
+    for row in results:
+        d = row._asdict()
+        d['model'] = -1
+        output.append(dispatch.CalcID(**d))
+
+    return output
+
+
+def query_best_task_models(session, job_id, level, count):
+    """
+    query N best models per task.
+
+    this query is used by the file tracker to determine the models to keep.
+
+    @param job_id: (int) id of the associated Jobs entry.
+    @param level: level up to which to query.
+        the level can be specified by level name (str) or numeric index (0..4).
+        if it is scan (equivalent to 1), the method queries the model and scan levels.
+    @param count: number of models to query per task.
+
+    @return set of matching model numbers (Models.model field).
+    """
+
+    try:
+        level = int(level)
+    except ValueError:
+        level = dispatch.CALC_LEVELS.index(level)
+    assert 0 <= level < len(dispatch.CALC_LEVELS)
+
+    def _query_models(t):
+        query = session.query(orm.Model.model).join(orm.Job).join(orm.Result)
+        query = query.filter(orm.Job.id == job_id)
+        query = query.filter(orm.Result.scan == t.scan)
+        query = query.filter(orm.Result.domain == t.domain)
+        query = query.filter(orm.Result.emit == t.emit)
+        query = query.filter(orm.Result.region == t.region)
+        query = query.order_by(orm.Result.rfac)
+        results = query[0:count]
+        return set((row.model for row in results))
+
+    tasks = query_tasks(session, job_id)
+    models = set()
+    for task in tasks:
+        if task.numeric_level <= level:
+            q_models = _query_models(task)
+            models |= q_models
+
+    return models
+
+
+def query_model_params_array(session, jobs=None, models=None, order=None, limit=None):
+    """
+    query parameter values and return them in a numpy array
+
+    the models table can be filtered by job and/or model.
+    else, the whole database is returned (which might be huge!).
+
+    @param session:
+    @param jobs: filter by job.
+        the argument can be a singleton or sequence of orm.Job objects or numeric id.
+    @param models: filter by model.
+        the argument can be a singleton or sequence of orm.Model objects or their id.
+    @param order: ordering of results. this can be a sequence of orm.Model attributes.
+        the default order is by job_id and model.
+    @param limit: maximum number of models to return
+    @return: dict['values']: numpy values array, dict['deltas']: numpy deltas array
+    """
+    count_query = session.query(orm.Model)
+    pn_query = session.query(orm.Param.key)
+    pv_query = session.query(orm.ParamValue)
+
+    if jobs:
+        try:
+            jobs = [int(jobs)]
+        except TypeError:
+            pass
+        job_ids = [j if isinstance(j, int) else j.id for j in jobs]
+        count_query = count_query.filter(orm.Model.job_id.in_(job_ids))
+        pn_query = pn_query.filter(orm.Model.job_id.in_(job_ids))
+        pv_query = pv_query.filter(orm.Model.job_id.in_(job_ids))
+
+    if models:
+        try:
+            models = [int(models)]
+        except TypeError:
+            pass
+        model_ids = [m if isinstance(m, int) else m.id for m in models]
+        count_query = count_query.filter(orm.ParamValue.model_id.in_(model_ids))
+        pn_query = pn_query.filter(orm.ParamValue.model_id.in_(model_ids))
+        pv_query = pv_query.filter(orm.ParamValue.model_id.in_(model_ids))
+
+    if order is not None:
+        pv_query = pv_query.order_by(*order)
+    else:
+        pv_query = pv_query.order_by(orm.Model.job_id, orm.Model.model)
+    if limit:
+        pv_query = pv_query[0:limit]
+
+    n_models = count_query.count()
+    param_names = pn_query.all()
+    param_values = pv_query.all()
+
+    special_names = orm.Model().as_dict().keys()
+    dt_names = special_names + param_names
+    dt = np.dtype([(n, util.field_to_numpy_type(n)) for n in sorted(dt_names, key=str.lower)])
+    values = np.zeros((n_models,), dtype=dt)
+    deltas = np.zeros((n_models,), dtype=dt)
+
+    for i, pv in enumerate(param_values):
+        for k, v in pv.model.as_dict():
+            values[i][k] = deltas[i][k] = v
+        values[i][pv.param_key] = pv.value
+        deltas[i][pv.param_key] = pv.delta
+
+    return {'values': values, 'deltas': deltas}
+
+
+calc_id_props = {'model': orm.Model.model,
+                 'scan': orm.Result.scan,
+                 'domain': orm.Result.domain,
+                 'emit': orm.Result.emit,
+                 'region': orm.Result.region}
+
+
+def query_model_results_array(session, jobs=None, models=None, order=None, limit=None,
+                              query_hook=None, hook_data=None, include_params=False, **index):
+    """
+    query a results table with flexible filtering options
+
+    the function returns a structured numpy array of the results and, optionally, parameter values.
+    the database is fully flattened, row of the array represents one result.
+
+    the jobs and models arguments filter for specific jobs and/or models.
+
+    custom filters can be added in a query hook function.
+    the hook function receives an sqlalchemy Query object of the Result table,
+    joined with the Model and Job tables.
+    other joins must be added explicitly.
+    the hook function can add more filters and return the modified query.
+
+    the hook function is called after the filters from the other function arguments
+    (job, models, index) have been applied,
+    and before the ordering and limit are applied.
+
+    @param session:
+    @param jobs: filter by job.
+        the argument can be a singleton or sequence of orm.Job objects or numeric id.
+    @param models: filter by model.
+        the argument can be a singleton or sequence of orm.Model objects or their id.
+    @param order: ordering of results. this can be a sequence of orm.Result attributes.
+        the default order is by `orm.Result.rfac`.
+        to override the default ascending order, append a modifier, e.g., `orm.Result.rfac.desc()`.
+    @param limit: maximum number of models to return
+    @param query_hook: hook function that modifies an sqlalchemy.orm.Query object.
+        the function receives the query as first argument, and any data from hook_data as keyword arguments.
+        it must return the modified query object.
+    @param hook_data: (dict) keyword arguments to be passed to the query_hook function.
+    @param include_params: include parameter values of each model in the result.
+        by default, only data from the Model and Result records is included.
+    @param index: filters the results list by scan, domain, emit, and/or region index.
+        for example, to get only the final results per model, specify `scan=-1`.
+    @return: numpy values array
+    """
+    results_query = session.query(orm.Result).join(orm.Model).join(orm.Job)
+
+    if jobs:
+        results_query = filter_objects(results_query, orm.Job, jobs)
+
+    if models:
+        results_query = filter_objects(results_query, orm.Model, models)
+
+    for k, v in index.items():
+        results_query = results_query.filter(calc_id_props[k] == v)
+
+    if query_hook is not None:
+        results_query = query_hook(results_query, **hook_data)
+
+    if order is not None:
+        results_query = results_query.order_by(*order)
+    if limit:
+        results = results_query[0:limit]
+    else:
+        results = results_query.all()
+    n_results = len(results)
+    logger.debug(f"query_model_results_array: {results_query.statement} ({n_results} rows)")
+
+    dt_names = [n for n in util.DB_SPECIAL_PARAMS.values()]
+    if include_params:
+        model_ids = {r.model_id for r in results}
+        pn_query = session.query(orm.Param.key).join(orm.ParamValue)
+        pn_query = pn_query.filter(orm.ParamValue.model_id.in_(model_ids))
+        pn_query = pn_query.distinct()
+        pn_query = pn_query.order_by(orm.Param.key)
+        p_names = [r.key for r in pn_query.all()]
+        dt_names.extend(p_names)
+        logger.debug(f"query_model_results_array: {pn_query.statement} ({len(p_names)} rows)")
+
+    dt = []
+    v0 = []
+    for n in dt_names:
+        ft = util.field_to_numpy_type(n)
+        dt.append((n, ft))
+        v0.append(np.nan if ft[0] == 'f' else 0)
+    dt = np.dtype(dt)
+    v0 = np.array([tuple(v0)], dtype=dt)
+    values_array = np.full((n_results,), v0, dtype=dt)
+    deltas_array = np.full((n_results,), v0, dtype=dt)
+
+    for i, r in enumerate(results):
+        d = {**r.as_dict(), **r.model.as_dict()}
+        for k, v in d.items():
+            try:
+                values_array[i][k] = v
+            except TypeError:
+                values_array[i][k] = 0
+        deltas_array[i] = values_array[i]
+        if include_params:
+            for k, v in r.model.values.items():
+                values_array[i][k] = v
+            for k, v in r.model.deltas.items():
+                deltas_array[i][k] = v
+
+    return values_array, deltas_array
+
+
+def query_best_models_per_job(session, projects=None, jobs=None, task_level='model', order=None, limit=None):
+    """
+    return the best model (by rfac) of each selected job
+
+    the query gathers the R-factors of the selected jobs at the selected task levels
+    and, for each job, returns the (database) model id where the lowest R-factor is reported
+    among the gathered results.
+
+    this can be useful if you want to compile a report of the best model per job.
+
+    @param session:
+    @param projects: filter by project.
+        the argument can be a singleton or sequence of orm.Project objects or numeric id.
+    @param jobs: filter by job.
+        the argument can be a singleton or sequence of orm.Job objects or numeric id.
+    @param task_level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
+        deepest task_level to include in the query.
+        results on deeper levels are not considered.
+        e.g. if you pass 'scan', R-factors of individual scans are included in the query.
+        note that including deeper levels will not increase the number of results returned.
+        the lowest level that can be specified is `emit`.
+    @param order: ordering of results. this can be a sequence of orm.Result attributes.
+        the default order is by `orm.Result.rfac`.
+    @param limit: maximum number of models to return
+
+    @return sequence of (orm.Model, orm.Result) tuples.
+        the number of results corresponds to the number of jobs in the filter scope.
+        to find out details of the models, execute another query that filters on these model ids.
+
+    the method produces an SQL query similar to:
+    @code{.sql}
+    select Models.id from Models
+    join Results on Models.id = Results.model_id
+    join Jobs on Models.job_id = Jobs.id
+    where scan=-1
+    and project_id=1
+    and job_id in (1,2,3)
+    group by Models.job_id
+    having min(rfac)
+    order by rfac
+    @endcode
+    """
+
+    try:
+        level = dispatch.CALC_LEVELS.index(task_level) + 1
+    except ValueError:
+        level = task_level + 1
+    try:
+        level_name = dispatch.CALC_LEVELS[level]
+    except IndexError:
+        level_name = dispatch.CALC_LEVELS[4]
+
+    query = session.query(orm.Model, orm.Result).join(orm.Result)
+
+    if projects:
+        query = filter_objects(query, orm.Project, projects)
+
+    if jobs:
+        query = filter_objects(query, orm.Job, jobs)
+
+    query = query.filter(getattr(orm.Result, level_name) == -1)
+    query = query.group_by(orm.Model.job_id)
+    query = query.having(func.min(orm.Result.rfac))
+
+    if order is not None:
+        query = query.order_by(*order)
+    else:
+        query = query.order_by(orm.Result.rfac)
+    if limit:
+        query = query[0:limit]
+    else:
+        query = query.all()
+
+    return query
+
+
+def filter_objects(query, entity, objects):
+    """
+    filter a query for the given objects
+
+    apply a simple object filter to a database query.
+    the criteria can be a single object or a sequence of objects.
+    the objects can be specified either by their object representation or numeric id.
+    the query is filtered by id.
+    thus, in the first case, the objects must have a valid id.
+
+    @param query: sqlalchemy.orm.Query object that queries a table that is linked to the entity table.
+        the function joins the entity table.
+        a table with a direct foreign key relationship to the entity table must already be in the query.
+    @param entity: orm entity class, e.g. pmsco.database.orm.Project.
+    @param objects: singleton or sequence of orm objects or their numeric ids.
+
+    @return: modified query
+    """
+    # avoid duplicate joins
+    if str(query.statement).find(entity.__tablename__) < 0:
+        query = query.join(entity)
+    try:
+        objects = [p if isinstance(p, int) else p.id for p in objects]
+        query = query.filter(entity.id.in_(objects))
+    except TypeError:
+        object = objects if isinstance(objects, int) else objects.id
+        query = query.filter(entity.id == object)
+    return query
+
+
+def filter_task_levels(query, level='model', include_parents=False):
+    """
+    refine a query by filtering by task level.
+
+    @param query: sqlalchemy.orm.Query object that queries the Result table
+        (possibly joined with others).
+    @param level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
+        deepest task_level to include in the query.
+        results on deeper levels are not considered.
+        e.g. if you pass 'scan', R-factors of individual scans are included in the query.
+        the lowest level that can be specified is `emit`.
+    @param include_parents: by default, the query will return only results from the given level.
+        if True, combined results (parents) will be returned as well.
+    """
+
+    try:
+        level = dispatch.CALC_LEVELS.index(level)
+    except ValueError:
+        level = int(level)
+    child_level = level + 1
+
+    try:
+        child_level_name = dispatch.CALC_LEVELS[child_level]
+        level_name = dispatch.CALC_LEVELS[level]
+    except IndexError:
+        child_level_name = dispatch.CALC_LEVELS[4]
+        level_name = dispatch.CALC_LEVELS[3]
+
+    query = query.filter(getattr(orm.Result, child_level_name) == -1)
+    if not include_parents:
+        query = query.filter(getattr(orm.Result, level_name) >= 0)
+
+    return query
--- a/pmsco/database/util.py
+++ b/pmsco/database/util.py
@@ -0,0 +1,161 @@
+import logging
+import numpy as np
+from pathlib import Path
+import pmsco.dispatch as dispatch
+
+logger = logging.getLogger(__name__)
+
+
+## mapping of database fields to special parameter names
+#
+# `_db` parameters are returned by some query methods to identify the database records.
+#
+DB_SPECIAL_PARAMS = {"project_id": "_db_project_id",
+                     "job_id": "_db_job_id",
+                     "model_id": "_db_model_id",
+                     "result_id": "_db_result_id",
+                     "model": "_model",
+                     "scan": "_scan",
+                     "domain": "_domain",
+                     "emit": "_emit",
+                     "region": "_region",
+                     "gen": "_gen",
+                     "particle": "_particle",
+                     "rfac": "_rfac",
+                     "secs": "_secs",
+                     "timestamp": "_timestamp"}
+
+
+## numpy data types of special parameters by database field
+#
+# this dictionary helps to create a numpy array from a database record.
+#
+DB_SPECIAL_NUMPY_TYPES = {"_db_project_id": "i8",
+                          "_db_job_id": "i8",
+                          "_db_model_id": "i8",
+                          "_db_result_id": "i8",
+                          "_model": "i8",
+                          "_scan": "i8",
+                          "_domain": "i8",
+                          "_emit": "i8",
+                          "_region": "i8",
+                          "_gen": "i8",
+                          "_particle": "i8",
+                          "_rfac": "f8",
+                          "_secs": "f8",
+                          "_timestamp": "f8"}
+
+
+def regular_params(d):
+    """
+    filter regular parameters from dictionary
+
+    returns a dictionary containing only the regular parameters (those not prefixed with an underscore).
+
+    @param d: dict or numpy.void or pmsco.dispatch.CalcID.
+        the param names must have no leading underscore.
+        the numpy.void type occurs when an element of a structured array is extracted.
+        the CalcID does not contain a regular parameter and will return an empty dictionary.
+        it is supported only for compatibility with special_params function.
+        a tuple or list is interpreted as a sequence of parameter names.
+        in this case the names representing special parameters are returned with underscore removed.
+
+    @return: dict for mapping types (numpy and dict) containing the regular key: value pairs of the original object.
+        list (tuple) of parameter names for sequence (tuple) types.
+        leading underscores are removed from key names.
+    """
+    if isinstance(d, np.void):
+        d = {k: d[k] for k in d.dtype.names if k[0] != "_"}
+    elif isinstance(d, dispatch.CalcID):
+        d = {}
+    elif isinstance(d, tuple):
+        d = [k for k in d if k[0] != "_"]
+        d = tuple(d)
+    elif isinstance(d, dict):
+        d = {k: v for k, v in d.items() if k[0] != "_"}
+    else:
+        d = [k for k in d if k[0] != "_"]
+
+    return d
+
+
+def special_params(d):
+    """
+    filter special parameters from model dictionary, numpy record or sequence.
+
+    special parameters are those prefixed with an underscore.
+    the underscore is removed from the keys.
+    fields starting with '_db_' are removed.
+
+    @param d: dict or numpy.void or pmsco.dispatch.CalcID or sequence.
+        in the case of a dict or numpy.void,
+        the key names of the special parameters must have a leading underscore.
+        the numpy.void type occurs when an element of a structured array is extracted.
+        in the case of a CalcID, the attribute names become the key names.
+        a tuple or list is interpreted as a sequence of parameter names.
+        in this case the names representing special parameters are returned with underscore removed.
+
+    @return
+        the return type depends on the type of input `d`:
+        @arg in the case of a dict, numpy.void or CalcID it is a dictionary.
+        @arg in the case of a tuple or list the return type is the same as the input.
+    """
+    if isinstance(d, np.void):
+        d = {k[1:]: d[k] for k in d.dtype.names if k[0] == "_" and k[0:4] != "_db_"}
+    elif isinstance(d, dispatch.CalcID):
+        d = d._asdict()
+    elif isinstance(d, tuple):
+        d = [k[1:] for k in d if k[0] == "_" and k[0:4] != "_db_"]
+        d = tuple(d)
+    elif isinstance(d, dict):
+        d = {k[1:]: v for k, v in d.items() if k[0] == "_" and k[0:4] != "_db_"}
+    else:
+        d = [k[1:] for k in d if k[0] == "_" and k[0:4] != "_db_"]
+
+    return d
+
+
+def field_to_param(f):
+    """
+    translate database field name to parameter name.
+
+    field names of optimization parameters are unchanged.
+    special parameters are prefixed by '_' or '_db_'.
+
+    @param f: (str) database field name.
+    @return: (str) parameter name as used in model dictionaries.
+    """
+    try:
+        p = DB_SPECIAL_PARAMS[f]
+    except KeyError:
+        p = f
+    return p
+
+
+def field_to_numpy_type(f):
+    """
+    determine the numpy data type string of a database field.
+
+    @param f: (str) database field name.
+    @return: (str) numpy type description, e.g. 'f8'.
+    """
+    try:
+        t = DB_SPECIAL_NUMPY_TYPES[f]
+    except KeyError:
+        t = 'f8'
+    return t
+
+
+def is_sqlite3_file(path_like):
+    """
+    test whether a file is an sqlite3 database file.
+
+    @param path_like: file path (str or pathlib.Path).
+    @return: (bool)
+    """
+    try:
+        with Path(path_like).open("rb") as f:
+            s = f.read(16)
+            return s == b"SQLite format 3\000"
+    except OSError:
+        return False
--- a/pmsco/dispatch.py
+++ b/pmsco/dispatch.py
@@ -19,8 +19,6 @@ import collections
 import copy
 import logging

-from attrdict import AttrDict
-
 try:
    from mpi4py import MPI
    mpi_comm = MPI.COMM_WORLD
@@ -190,6 +188,15 @@ class CalculationTask(object):
    # this is typically initialized to the parameters of the parent task,
    # and varied at the level where the task ID was produced.

+    ## @var delta (dict)
+    # dictionary containing a delta vector of the model parameters.
+    #
+    # this is a diagnostic value of the optimizer, it is not used by calculators.
+    # if defined, it is entered into the results database (ParamValue.delta field).
+    #
+    # the exact meaning depends on the optimizer.
+    # in particle swarm, e.g., it is the current velocity of the particle.
+
    ## @var file_root (string)
    # file name without extension and index.

@@ -258,6 +265,7 @@ class CalculationTask(object):
        self.id = CalcID(-1, -1, -1, -1, -1)
        self.parent_id = self.id
        self.model = {}
+        self.delta = {}
        self.file_root = ""
        self.file_ext = ""
        self.result_filename = ""
@@ -500,6 +508,16 @@ class CachedCalculationMethod(object):
            del self._cache[index]


+class AttrDict(collections.UserDict):
+    def __getattr__(self, key):
+        return self.__getitem__(key)
+
+    def __setattr__(self, key, value):
+        if key == "data":
+            return super().__setattr__(key, value)
+        return self.__setitem__(key, value)
+
+
 class MscoProcess(object):
    """
    code shared by MscoMaster and MscoSlave.
--- a/pmsco/edac/.gitignore
+++ b/pmsco/edac/.gitignore
@@ -1,2 +0,0 @@
-edac_all_wrap.*
-edac.py
--- a/pmsco/edac/init.py
+++ b/pmsco/edac/init.py
@@ -1 +0,0 @@
-__author__ = 'muntwiler_m'
--- a/pmsco/edac/makefile
+++ b/pmsco/edac/makefile
@@ -1,47 +0,0 @@
-SHELL=/bin/sh
-
-# makefile for EDAC program and module
-#
-# the EDAC source code is not included in the public distribution.
-# please obtain it from the original author,
-# copy it to this directory,
-# and apply the edac_all.patch patch before compilation.
-#
-# see the top-level makefile for additional information.
-
-.SUFFIXES:
-.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
-.PHONY: all clean edac
-
-FC?=gfortran
-FCCOPTS?=
-F2PY?=f2py
-F2PYOPTS?=
-CXX?=g++
-CXXOPTS?=-Wno-write-strings
-PYTHON?=python
-PYTHONOPTS?=
-
-all: edac
-
-edac: edac.exe _edac.so edac.py
-
-edac.exe: edac_all.cpp
-	$(CXX) $(CXXOPTS) -o edac.exe edac_all.cpp
-
-edac.py _edac.so: edac_all.cpp edac_all.i setup.py	
-	$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
-
-revision.py: _edac.so
-	git log --pretty=format:"code_rev = 'Code revision %h, %ad'" --date=iso -1 > $@ || echo "code_rev = 'Code revision unknown, "`date +"%F %T %z"`"'" > $@
-	echo "" >> revision.py
-
-revision.txt: _edac.so	edac.exe
-	git log --pretty=format:"Code revision %h, %ad" --date=iso -1 > $@ || echo "Code revision unknown, "`date +"%F %T %z"` > $@
-	echo "" >> revision.txt
-
-clean:
-	rm -f *.so *.o *.exe *.pyc
-	rm -f edac.py edac_all_wrap.*
-	rm -f revision.*
-
--- a/pmsco/edac/setup.py
+++ b/pmsco/edac/setup.py
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-
-"""
-setup.py file for EDAC
-"""
-
-from distutils.core import setup, Extension
-
-
-edac_module = Extension('_edac',
-                           sources=['edac_all.cpp', 'edac_all.i'],
-                           swig_opts=['-c++']
-                           )
-
-setup (name = 'edac',
-       version = '0.1',
-       author      = "Matthias Muntwiler",
-       description = """EDAC module in Python""",
-       ext_modules = [edac_module],
-       py_modules = ["edac"], 
-       requires=['numpy']
-       )
-
--- a/pmsco/elements/init.py
+++ b/pmsco/elements/init.py
@@ -1,41 +0,0 @@
-"""
-@package pmsco.elements
-extended properties of the elements
-
-this package extends the element table of the `periodictable` package
-(https://periodictable.readthedocs.io/en/latest/index.html)
-by additional attributes like the electron binding energies.
-
-the package requires the periodictable package (https://pypi.python.org/pypi/periodictable).
-
-
-@author Matthias Muntwiler
-
-@copyright (c) 2020 by Paul Scherrer Institut @n
-Licensed under the Apache License, Version 2.0 (the "License"); @n
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-"""
-
-import periodictable.core
-
-
-def _load_binding_energy():
-    """
-    delayed loading of the binding energy table.
-    """
-    from . import bindingenergy
-    bindingenergy.init(periodictable.core.default_table())
-
-
-def _load_photoionization():
-    """
-    delayed loading of the binding energy table.
-    """
-    from . import photoionization
-    photoionization.init(periodictable.core.default_table())
-
-
-periodictable.core.delayed_load(['binding_energy'], _load_binding_energy)
-periodictable.core.delayed_load(['photoionization'], _load_photoionization)
--- a/pmsco/elements/bindingenergy.py
+++ b/pmsco/elements/bindingenergy.py
@@ -1,22 +1,22 @@
 """
@package pmsco.elements.bindingenergy
-electron binding energies of the elements
+Electron binding energies of the elements

-extends the element table of the `periodictable` package
+Extends the element table of the `periodictable` package
 (https://periodictable.readthedocs.io/en/latest/index.html)
 by the electron binding energies.

-the binding energies are compiled from Gwyn Williams' web page
+The binding energies are compiled from Gwyn Williams' web page
 (https://userweb.jlab.org/~gwyn/ebindene.html).
-please refer to the original web page or the x-ray data booklet
+Please refer to the original web page or the x-ray data booklet
 for original sources, definitions and remarks.
-binding energies of gases are replaced by respective values of a common compound
-from the 'handbook of x-ray photoelectron spectroscopy' (physical electronics, inc., 1995).
+Binding energies of gases are replaced by respective values of a common compound
+from the 'handbook of x-ray photoelectron spectroscopy' (Physical Electronics, Inc., 1995).

-usage
+Usage
 -----

-this module requires the periodictable package (https://pypi.python.org/pypi/periodictable).
+This module requires the periodictable package (https://pypi.python.org/pypi/periodictable).

 ~~~~~~{.py}
 import periodictable as pt
@@ -29,15 +29,16 @@ print(pt.elements.name('gold').binding_energy['4f7/2'])
 print(pt.elements[79].binding_energy['4f7/2'])
 ~~~~~~

-note that attributes are writable.
-you may assign refined values in your instance of the database.
+The database is loaded from the accompanying bindingenergy.json file on first demand.
+Attributes are writable, you may update the values in your run-time instance of the database.

-the query_binding_energy() function queries all terms with a particular binding energy.
+Normally, the user will not need to call any functions in this module directly.
+The query_binding_energy() function queries all terms with a particular binding energy.


@author Matthias Muntwiler

-@copyright (c) 2020 by Paul Scherrer Institut @n
+@copyright (c) 2020-23 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
@@ -46,15 +47,15 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n

 import json
 import numpy as np
-import os
+from pathlib import Path
 import periodictable as pt
-from pmsco.compat import open
-
+import periodictable.core

 index_energy = np.zeros(0)
 index_number = np.zeros(0)
 index_term = []
-default_data_path = os.path.join(os.path.dirname(__file__), "bindingenergy.json")
+
+default_data_path = Path(Path(__file__).parent, "bindingenergy.json")


 def load_data(data_path=None):
@@ -63,13 +64,13 @@ def load_data(data_path=None):

    the data file must be in the same format as generated by save_data.

-    @param file path of the data file. default: "bindingenergy.json" next to this module file
+    @param data_path file path of the data file. default: "bindingenergy.json" next to this module file

    @return dictionary
    """
    if data_path is None:
        data_path = default_data_path
-    with open(data_path) as fp:
+    with open(data_path, "rt", encoding="utf8") as fp:
        data = json.load(fp)
    return data

@@ -78,7 +79,7 @@ def save_data(data_path=None):
    """
    save binding energy data to json file

-    @param file path of the data file. default: "bindingenergy.json" next to this module file
+    @param data_path file path of the data file. default: "bindingenergy.json" next to this module file

    @return None
    """
@@ -91,7 +92,7 @@ def save_data(data_path=None):
            element_data[term] = energy
        if element_data:
            data[element.number] = element_data
-    with open(data_path, 'w', 'utf8') as fp:
+    with open(data_path, "w", encoding="utf8") as fp:
        json.dump(data, fp, sort_keys=True, indent='\t')


@@ -120,6 +121,7 @@ def build_index():

    @return None
    """
+
    global index_energy
    global index_number
    global index_term
@@ -210,3 +212,14 @@ def import_flat_text(f):
    data = np.atleast_1d(np.genfromtxt(f, names=True, dtype=None, encoding="utf8"))
    for d in data:
        pt.elements[d['number']].binding_energy[d['term']] = d['energy']
+
+
+def _load_binding_energy():
+    """
+    delayed loading of the binding energy table.
+    """
+
+    init(periodictable.core.default_table())
+
+
+periodictable.core.delayed_load(['binding_energy'], _load_binding_energy)
--- a/pmsco/elements/cross-sections.dat
+++ b/pmsco/elements/cross-sections.dat
--- a/pmsco/elements/photoionization.py
+++ b/pmsco/elements/photoionization.py
@@ -1,63 +1,258 @@
 """
@package pmsco.elements.photoionization
-photoionization cross-sections of the elements
+Photoionization cross-sections of the elements

-extends the element table of the `periodictable` package
+Extends the element table of the `periodictable` package
 (https://periodictable.readthedocs.io/en/latest/index.html)
-by a table of photoionization cross-sections.
+by a table of photoionization cross-sections and asymmetry parameters.


-the data is available from (https://vuo.elettra.eu/services/elements/)
+The data is available from (https://vuo.elettra.eu/services/elements/)
 or (https://figshare.com/articles/dataset/Digitisation_of_Yeh_and_Lindau_Photoionisation_Cross_Section_Tabulated_Data/12389750).
-both sources are based on the original atomic data tables by Yeh and Lindau (1985).
-the Elettra data includes interpolation at finer steps,
-whereas the Kalha data contains only the original data points by Yeh and Lindau
+Both sources are based on the original atomic data tables by Yeh and Lindau (1985).
+The Elettra data includes the cross section and asymmetry parameter and is interpolated at finer steps,
+whereas the Kalha data contains only the cross sections at the photon energies calculated by Yeh and Lindau
 plus an additional point at 8 keV.
-the tables go up to 1500 eV photon energy and do not resolve spin-orbit splitting.
+The tables go up to 1500 eV photon energy and do not resolve spin-orbit splitting.


-usage
+Usage
 -----

-this module requires python 3.6, numpy and the periodictable package (https://pypi.python.org/pypi/periodictable).
+This module adds the photoionization attribute to the elements database of the periodictable package (https://pypi.python.org/pypi/periodictable).
+Python >= 3.6, numpy >= 1.15 and the periodictable package are required.

 ~~~~~~{.py}
 import numpy as np
 import periodictable as pt
 import pmsco.elements.photoionization

-# read any periodictable's element interfaces as follows.
-# eph and cs are numpy arrays of identical shape that hold the photon energies and cross sections.
-eph, cs = pt.gold.photoionization.cross_section['4f']
-eph, cs = pt.elements.symbol('Au').photoionization.cross_section['4f']
-eph, cs = pt.elements.name('gold').photoionization.cross_section['4f']
-eph, cs = pt.elements[79].photoionization.cross_section['4f']
+# get a SubShellPhotoIonization object from any of periodictable's element interface:
+sspi = pt.gold.photoionization['4f']
+sspi = pt.elements.symbol('Au').photoionization['4f']
+sspi = pt.elements.name('gold').photoionization['4f']
+sspi = pt.elements[79].photoionization['4f']

-# interpolate for specific photon energy
-print(np.interp(photon_energy, eph, cs)
+# get the cross section, asymmetry parameter or differential cross section at 800 eV photon energy:
+sspi.cross_section(800)
+sspi.asymmetry_parameter(800)
+sspi.diff_cross_section(800, gamma=30)
+
+# with the j quantum number, the cross-section is weighted based on a full sub-shell:
+sspi = pt.gold.photoionization['4f7/2']
+print(sspi.weight)
+print(pt.gold.photoionization['4f7/2'].cross_section(800) / pt.gold.photoionization['4f'].cross_section(800))
+
+# the original data is contained in the data array (which is a numpy.recarray):
+sspi.data.eph, sspi.data.cs, sspi.data.ap
 ~~~~~~

-the data is loaded from the cross-sections.dat file which is a python-pickled data file.
-to switch between data sources, use one of the load functions defined here
-and dump the data to the cross-sections.dat file.
+The data is loaded on demand from the cross-sections.dat file when the photoionization record is first accessed.
+Normally, the user will not need to call any functions in this module directly.
+
+The load_elettra_data()/load_kalha_data() and save_pickled_data() functions are provided
+to import data from one of the sources referenced above and
+to create the cross-sections.dat file.


@author Matthias Muntwiler

-@copyright (c) 2020 by Paul Scherrer Institut @n
+@copyright (c) 2020-23 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

+import copy
 import numpy as np
 from pathlib import Path
 import periodictable as pt
 import pickle
 import urllib.request
 import urllib.error
-from . import bindingenergy
+import periodictable.core
+
+
+class PhotoIonization(dict):
+    """
+    photo-ionization parameters of an element
+
+    this class provides the photo-ionization cross-section and asymmetry parameter of the sub-shells of an element.
+    it is, essentially, a dictionary, mapping 'nl' and 'nlj' terms to the corresponding SubShellPhotoIonization object.
+
+    examples of 'nl' and 'nlj' terms: '4f' and '4f7/2'
+
+    @note the dictionary actually contains raw data for 'nl' terms only.
+        for 'nlj' terms, the corresponding 'nl' object is copied,
+        and a weight according to the spin-orbit multiplicity is set.
+
+    @note 'nlj' terms are not considered by any methods or properties
+        except the bracket notation or __getitem__ method!
+        in particular, iteration or the keys() method will yield 'nl' terms only.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        dictionary constructor
+
+        the class accepts the same arguments as the Python built-in dict constructor.
+        keys are 'nl' terms, e.g. '4f', and values must be SubShellPhotoIonization() objects.
+
+        @param args:
+        @param kwargs:
+        """
+        super().__init__(*args, **kwargs)
+        self.cross_section_units = "Mb"
+
+    def __getitem__(self, k):
+        """
+        get sub-shell photo-ionization data by 'nl' or 'nlj' term.
+
+        @param k: dictionary key.
+            if this is an 'nl' term, the original object is returned.
+            if this is an 'nlj' term, a proxy of the corresponding 'nl' object
+            with shared data but weight based on j-branching is returned.
+
+        @return: SubShellPhotoIonization() object
+
+        @note whether the original or a proxy object is returned,
+            its data attribute always refers to the original data.
+            any modification will affect the original data (process memory).
+        """
+        spi = super().__getitem__(k[0:2])
+        if len(k) > 2:
+            spi = copy.copy(spi)
+            spi.set_spin_orbit(k[1:5])
+        return spi
+
+
+class SubShellPhotoIonization(object):
+    """
+    Sub-shell photo-ionization parameters versus photon energy.
+
+    this class provides the photo-ionization cross-section and asymmetry parameter of one sub-shell.
+    it contains a three-column record array of photon energy, cross section and asymmetry parameter in self.data.
+    accessory functions provide high-level access to specific views and interpolated data.
+
+    a weighting factor self.weight is multiplied to the method results.
+    it is normally used to weight the spin-orbit peaks by calling set_spin_orbit().
+    """
+    SPIN_ORBIT_WEIGHTS = {"p1/2": 1. / 3.,
+                          "p3/2": 2. / 3.,
+                          "d3/2": 2. / 5.,
+                          "d5/2": 3. / 5.,
+                          "f5/2": 3. / 7.,
+                          "f7/2": 4. / 7.}
+
+    def __init__(self, photon_energy, cross_section, asymmetry_parameter):
+        """
+        initialize a new object instance.
+
+        all arrays must have the same length.
+
+        @param photon_energy: (array-like) photon energies
+        @param cross_section: (array-like) cross-section values
+        @param asymmetry_parameter: (array-like) asymmetry parameter values
+        """
+        super().__init__()
+        self.data = np.rec.fromarrays([photon_energy, cross_section, asymmetry_parameter], names='eph, cs, ap')
+        self.weight = 1.
+
+    def cross_section(self, photon_energy):
+        """
+        interpolated sub-shell cross-section at a specific energy.
+
+        the weighting factor self.weight (e.g. spin-orbit) is included in the result.
+
+        @param photon_energy: photon energy in eV.
+            can be scalar or numpy array.
+        @return: cross-section in Mb.
+            numpy.nan where photon_energy is off range.
+        """
+        cs = np.interp(photon_energy, self.data.eph, self.data.cs, left=np.nan, right=np.nan) * self.weight
+        return cs
+
+    def asymmetry_parameter(self, photon_energy):
+        """
+        interpolated asymmetry parameter at a specific energy.
+
+        @param photon_energy: photon energy in eV.
+            can be scalar or numpy array.
+        @return: asymmetry parameter (0..2).
+            numpy.nan where photon_energy is off range.
+        """
+        ap = np.interp(photon_energy, self.data.eph, self.data.ap, left=np.nan, right=np.nan)
+        return ap
+
+    def diff_cross_section(self, photon_energy, gamma):
+        """
+        differential cross-section for linear polarization.
+
+        the weighting factor self.weight (e.g. spin-orbit) is included in the result.
+
+        @param photon_energy: photon energy in eV.
+        @param gamma: angle between polarization vector and electron propagation direction in degrees.
+        @return: differential cross-section in Mb.
+        """
+        p2 = (3 * np.cos(gamma) ** 2 - 1) / 2
+        cs = self.cross_section(photon_energy)
+        ap = self.asymmetry_parameter(photon_energy)
+        dcs = cs / 4 / np.pi * (1 + ap * p2)
+        return dcs
+
+    def photon_energy_array(self):
+        """
+        photon energy array.
+
+        the weighting factor self.weight (e.g. spin-orbit) is included in the result.
+
+        @return:
+        """
+        return self.data.eph
+
+    def cross_section_array(self):
+        """
+        sub-shell cross-section versus photon energy.
+
+        the weighting factor self.weight (e.g. spin-orbit) is included in the result.
+
+        @return: numpy.ndarray
+        """
+        return self.data.cs * self.weight
+
+    def asymmetry_parameter_array(self):
+        """
+        sub-shell asymmetry parameter versus photon energy.
+
+        the weighting factor self.weight (e.g. spin-orbit) is included in the result.
+
+        @return: numpy.ndarray
+        """
+        return self.data.ap
+
+    def diff_cross_section_array(self, gamma):
+        """
+        differential cross-section for linear polarization (full array).
+
+        @param gamma: angle between polarization vector and electron propagation direction in degrees.
+        @return: (np.ndarray) differential cross-section in Mb.
+        """
+        p2 = (3 * np.cos(gamma) ** 2 - 1) / 2
+        dcs = self.data.cs / 4 / np.pi * (1 + self.data.ap * p2) * self.weight
+        return dcs
+
+    def set_spin_orbit(self, lj):
+        """
+        set the weight according to the spin-orbit quantum number (based on full sub-shell).
+
+        the weight is stored in the self.weight attribute.
+        it is applied to the results of the cross-section methods, but not to the raw data in self.data!
+
+        @param lj: (str) 4-character lj term notation, e.g. 'f7/2'
+        @return: None
+        """
+        self.weight = self.SPIN_ORBIT_WEIGHTS.get(lj, 1.)


 def load_kalha_data():
@@ -98,7 +293,7 @@ def load_kalha_file(path):
        for l in 'spdf':
            col = f"{n}{l}"
            try:
-                data[col] = (eph, a[col].copy())
+                data[col] = SubShellPhotoIonization(eph, a[col].copy(), np.zeros_like(eph))
            except ValueError:
                pass
    return data
@@ -138,24 +333,24 @@ def load_elettra_file(symbol, nl):

    @param symbol: (str) element symbol
    @param nl: (str) nl term, e.g. '2p' (no spin-orbit)
-    @return: (photon_energy, cross_section) tuple of 1-dimensional numpy arrays.
+    @return: PhotoIonizationData(photon_energy, cross_section, asymmetry_parameter)
+        named tuple of 1-dimensional numpy arrays.
    """
+    spi = None
+
    url = f"https://vuo.elettra.eu/services/elements/data/{symbol.lower()}{nl}.txt"
    try:
        data = urllib.request.urlopen(url)
    except urllib.error.HTTPError:
-        eph = None
-        cs = None
+        pass
    else:
        a = np.genfromtxt(data)
        try:
-            eph = a[:, 0]
-            cs = a[:, 1]
+            spi = SubShellPhotoIonization(a[:, 0], a[:, 1], a[:, 4])
        except IndexError:
-            eph = None
-            cs = None
+            pass

-    return eph, cs
+    return spi


 def load_elettra_data():
@@ -171,9 +366,9 @@ def load_elettra_data():
            nl = nlj[0:2]
            eb = element.binding_energy[nlj]
            if nl not in element_data and eb <= 2000:
-                eph, cs = load_elettra_file(element.symbol, nl)
-                if eph is not None and cs is not None:
-                    element_data[nl] = (eph, cs)
+                spi = load_elettra_file(element.symbol, nl)
+                if spi is not None:
+                    element_data[nl] = spi
        if len(element_data):
            data[element.symbol] = element_data

@@ -212,15 +407,9 @@ def load_pickled_data(path):
    return data


-class Photoionization(object):
-    def __init__(self):
-        self.cross_section = {}
-        self.cross_section_units = "Mb"
-
-
 def init(table, reload=False):
    """
-    loads cross section data into the periodic table.
+    loads cross-section data into the periodic table.

    this function is called by the periodictable to load the data on demand.

@@ -233,16 +422,25 @@ def init(table, reload=False):
    table.properties.append('photoionization')

    # default value
-    pt.core.Element.photoionization = Photoionization()
+    pt.core.Element.photoionization = PhotoIonization()

    p = Path(Path(__file__).parent, "cross-sections.dat")
    data = load_pickled_data(p)
    for el_key, el_data in data.items():
+        # el_data is dict('nl': PhotoIonizationData)
        try:
            el = table[int(el_key)]
        except ValueError:
            el = table.symbol(el_key)
-        pi = Photoionization()
-        pi.cross_section = el_data
-        pi.cross_section_units = "Mb"
-        el.photoionization = pi
+        el.photoionization = PhotoIonization(el_data)
+
+
+def _load_photoionization():
+    """
+    delayed loading of the binding energy table.
+    """
+
+    init(periodictable.core.default_table())
+
+
+periodictable.core.delayed_load(['photoionization'], _load_photoionization)
--- a/pmsco/elements/spectrum.py
+++ b/pmsco/elements/spectrum.py
@@ -77,9 +77,9 @@ def get_binding_energy(photon_energy, element, nlj):
        return np.nan


-def get_cross_section(photon_energy, element, nlj):
+def get_cross_section(photon_energy, element, nlj, gamma=None):
    """
-    look up the photoionization cross section.
+    look up the photo-ionization cross-section.

    since the Yeh/Lindau tables do not resolve the spin-orbit splitting,
    this function applies the normal relative weights of a full sub-shell.
@@ -89,31 +89,28 @@ def get_cross_section(photon_energy, element, nlj):
    @param photon_energy: photon energy in eV.
    @param element: Element object of the periodic table.
    @param nlj: (str) spectroscopic term, e.g. '4f7/2'.
-    @return: (float) cross section in Mb.
+        the j-value can be left out, in which case the sum over all j-states is returned.
+    @param gamma: (float) angle in degrees between linear polarization vector and photoelectron emission direction.
+        By default (None), unpolarized light or magic angle (54.7 deg) geometry is assumed.
+    @return: (float) total (gamma=None) or differential (gamma not None) cross section in Mb.
    """
-    nl = nlj[0:2]
+
    if not hasattr(element, "photoionization"):
        element = get_element(element)
    try:
-        pet, cst = element.photoionization.cross_section[nl]
+        pi = element.photoionization[nlj]
    except KeyError:
        return np.nan

-    # weights of spin-orbit peaks
-    d_wso = {"p1/2": 1./3.,
-             "p3/2": 2./3.,
-             "d3/2": 2./5.,
-             "d5/2": 3./5.,
-             "f5/2": 3./7.,
-             "f7/2": 4./7.}
-    wso = d_wso.get(nlj[1:], 1.)
-    cst = cst * wso
+    if gamma is None:
+        cs = pi.cross_section(photon_energy)
+    else:
+        cs = pi.diff_cross_section(photon_energy, gamma)

-    # todo: consider spline
-    return np.interp(photon_energy, pet, cst)
+    return cs


-def build_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5):
+def build_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5, gamma=None):
    """
    calculate the positions and amplitudes of core-level photoemission lines.

@@ -126,6 +123,8 @@ def build_spectrum(photon_energy, elements, binding_energy=False, work_function=
        if a dictionary is given, the (float) values are stoichiometric weights of the elements.
    @param binding_energy: (bool) return binding energies (True) rather than kinetic energies (False, default).
    @param work_function: (float) work function of the instrument in eV.
+    @param gamma: (float) angle in degrees between linear polarization vector and photoelectron emission direction.
+        By default (None), unpolarized light or magic angle (54.7 deg) geometry is assumed.
    @return: tuple (labels, positions, intensities) of 1-dimensional numpy arrays representing the spectrum.
        labels are in the format {Symbol}{n}{l}{j}.
    """
@@ -141,7 +140,7 @@ def build_spectrum(photon_energy, elements, binding_energy=False, work_function=
                for j in ['', '1/2', '3/2', '5/2', '7/2']:
                    nlj = f"{n}{l}{j}"
                    eb = get_binding_energy(photon_energy, el, nlj)
-                    cs = get_cross_section(photon_energy, el, nlj)
+                    cs = get_cross_section(photon_energy, el, nlj, gamma=gamma)
                    try:
                        cs = cs * elements[element]
                    except (KeyError, TypeError):
@@ -163,7 +162,7 @@ def build_spectrum(photon_energy, elements, binding_energy=False, work_function=
        return labels, ekin, intens


-def plot_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5, show_labels=True):
+def plot_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5, gamma=None, show_labels=True):
    """
    plot a simple spectrum representation of a material.

@@ -178,11 +177,13 @@ def plot_spectrum(photon_energy, elements, binding_energy=False, work_function=4
        if a dictionary is given, the (float) values are stoichiometric weights of the elements.
    @param binding_energy: (bool) return binding energies (True) rather than kinetic energies (False, default).
    @param work_function: (float) work function of the instrument in eV.
+    @param gamma: (float) angle in degrees between linear polarization vector and photoelectron emission direction.
+        By default (None), unpolarized light or magic angle (54.7 deg) geometry is assumed.
    @param show_labels: (bool) show peak labels (True, default) or not (False).
    @return: (figure, axes)
    """
    labels, energy, intensity = build_spectrum(photon_energy, elements, binding_energy=binding_energy,
-                                               work_function=work_function)
+                                               work_function=work_function, gamma=gamma)

    fig, ax = plt.subplots()
    ax.stem(energy, intensity, basefmt=' ', use_line_collection=True)
--- a/pmsco/graphics/init.py
+++ b/pmsco/graphics/init.py
--- a/pmsco/graphics/cluster.py
+++ b/pmsco/graphics/cluster.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+
+"""
+@package pmsco.graphics.cluster
+graphics rendering module for clusters.
+
+this module is experimental.
+interface and implementation may change without notice.
+
+at the moment we are evaluating rendering solutions.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2017 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import os
+import numpy as np
+import argparse
+import logging
+
+logger = logging.getLogger(__name__)
+
+try:
+    import pymol2
+except ImportError:
+    logger.warning("error importing pymol2. cluster rendering using pymol2 disabled.")
+    pymol2 = None
+
+try:
+    from mpl_toolkits.mplot3d import Axes3D
+    from matplotlib.figure import Figure
+    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+    # from matplotlib.backends.backend_pdf import FigureCanvasPdf
+    # from matplotlib.backends.backend_svg import FigureCanvasSVG
+except ImportError:
+    Axes3D = None
+    Figure = None
+    FigureCanvas = None
+    logger.warning("error importing matplotlib. cluster rendering using matplotlib disabled.")
+
+
+def render_file(spath, view):
+    sname = "cluster"
+    opath = spath + ".png"
+
+    pm = pymol2.PyMOL()
+    cmd = pm.cmd
+    pm.start()
+    try:
+        cmd.reinitialize()
+        cmd.load(spath, sname)
+        cmd.disable("all")
+        cmd.enable(sname)
+
+        cmd.set("orthoscopic", 1)
+        cmd.bg_color("white")
+        cmd.show_as("spheres")
+        cmd.alter("all", "vdw=0.8")
+        #cmd.show("sticks")
+
+        #zoom selection-expression # selection to fill the viewer
+        #orient selection-expression # largest dim horizontal, second-largest vertical
+
+        #cmd.orient() --- should stick to fixed orientation
+        #cmd.turn("x", -90)
+        #cmd.turn("x", 0)
+        #cmd.turn("y", 0)
+
+        #cmd.clip("slab", 5.0)
+        cmd.viewport(640, 640)
+        cmd.zoom(complete=1)
+        #pymol.cmd.rebuild() #--- necessary?
+
+        cmd.png(opath)
+    finally:
+        pm.stop()
+
+
+def render_cluster(clu):
+    pass
+
+
+def set_axes_equal(ax):
+    """
+    Make axes of 3D plot have equal scale so that spheres appear as spheres,
+    cubes as cubes, etc..  This is one possible solution to Matplotlib's
+    ax.set_aspect('equal') and ax.axis('equal') not working for 3D.
+
+    @author https://stackoverflow.com/a/31364297
+
+    @param ax: a matplotlib axis, e.g., as output from plt.gca().
+    """
+
+    x_limits = ax.get_xlim3d()
+    y_limits = ax.get_ylim3d()
+    z_limits = ax.get_zlim3d()
+
+    x_range = abs(x_limits[1] - x_limits[0])
+    x_middle = np.mean(x_limits)
+    y_range = abs(y_limits[1] - y_limits[0])
+    y_middle = np.mean(y_limits)
+    z_range = abs(z_limits[1] - z_limits[0])
+    z_middle = np.mean(z_limits)
+
+    # The plot bounding box is a sphere in the sense of the infinity
+    # norm, hence I call half the max range the plot radius.
+    plot_radius = 0.5*max([x_range, y_range, z_range])
+
+    ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
+    ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
+    ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])
+
+
+def render_xyz_matplotlib(filename, data, canvas=None):
+    """
+    produce a graphics file from an array of 3d coordinates in the matplotlib scatter style.
+
+    the default file format is PNG.
+
+    this function requires the matplotlib module.
+    if it is not available, the function raises an error.
+
+    @param filename: path and name of the scan file.
+        this is used to derive the output file path by adding the extension of the graphics file format.
+    @param data: numpy array of shape (N,3).
+    @param canvas: a FigureCanvas class reference from a matplotlib backend.
+        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
+
+    @return (str) path and name of the generated graphics file.
+        empty string if an error occurred.
+
+    @raise TypeError if matplotlib is not available.
+    """
+
+    if canvas is None:
+        canvas = FigureCanvas
+    fig = Figure()
+    canvas(fig)
+
+    ax = fig.add_subplot(111, projection='3d')
+    # ax.set_aspect('equal')
+    try:
+        # method available in matplotlib 2.1 and later
+        ax.set_proj_type('ortho')
+    except AttributeError:
+        pass
+    ax.scatter(data[:, 0], data[:, 1], data[:, 2], c='r', marker='o')
+    ax.set_xlabel('x')
+    ax.set_ylabel('y')
+    ax.set_zlabel('z')
+    set_axes_equal(ax)
+
+    out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
+    fig.savefig(out_filename)
+    return out_filename
+
+
+def exec_cli():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-v', '--view', default='z')
+    parser.add_argument(dest='files', nargs='+')
+    args = parser.parse_args()
+    for fil in args.files:
+        render_file(fil, args.view)
+
+
+if __name__ == '__main__':
+    exec_cli()
+    sys.exit(0)
--- a/pmsco/graphics/population.py
+++ b/pmsco/graphics/population.py
@@ -1,443 +0,0 @@
-"""
-@package pmsco.graphics.population
-graphics rendering module for population dynamics.
-
-the main function is render_genetic_chart().
-
-this module is experimental.
-interface and implementation are subject to change.
-
-@author Matthias Muntwiler, matthias.muntwiler@psi.ch
-
-@copyright (c) 2021 by Paul Scherrer Institut @n
-Licensed under the Apache License, Version 2.0 (the "License"); @n
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-"""
-
-import logging
-import numpy as np
-import os
-from pmsco.database import regular_params, special_params
-
-logger = logging.getLogger(__name__)
-
-try:
-    from matplotlib.figure import Figure
-    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
-    # from matplotlib.backends.backend_pdf import FigureCanvasPdf
-    # from matplotlib.backends.backend_svg import FigureCanvasSVG
-except ImportError:
-    Figure = None
-    FigureCanvas = None
-    logger.warning("error importing matplotlib. graphics rendering disabled.")
-
-
-def _default_range(pos):
-    """
-    determine a default range from actual values.
-
-    @param pos: (numpy.ndarray) 1-dimensional structured array of parameter values.
-    @return: range_min, range_max are dictionaries of the minimum and maximum values of each parameter.
-    """
-    names = regular_params(pos.dtype.names)
-    range_min = {}
-    range_max = {}
-    for name in names:
-        range_min[name] = pos[name].min()
-        range_max[name] = pos[name].max()
-    return range_min, range_max
-
-
-def _prune_constant_params(pnames, range_min, range_max):
-    """
-    remove constant parameters from the list and range
-
-    @param pnames: (list)
-    @param range_min: (dict)
-    @param range_max: (dict)
-    @return:
-    """
-    del_names = [name for name in pnames if range_max[name] <= range_min[name]]
-    for name in del_names:
-        pnames.remove(name)
-        del range_min[name]
-        del range_max[name]
-
-
-def render_genetic_chart(output_file, input_data_or_file, model_space=None, generations=None, title=None, cmap=None,
-                         canvas=None):
-    """
-    produce a genetic chart from a given population.
-
-    a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
-    the axes are the particle number and the model parameter.
-    the colour is mapped from the relative position of a parameter value within the parameter range.
-
-    the chart should illustrate the diversity in the population.
-    converged parameters will show similar colours.
-    by comparing charts of different generations, the effect of the optimization algorithm can be examined.
-    though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well.
-
-    the function requires input in one of the following forms:
-    - a result (.dat) file or numpy structured array.
-      the array must contain regular parameters, as well as the _particle and _gen columns.
-      the function generates one chart per generation unless the generation argument is specified.
-    - a population (.pop) file or numpy structured array.
-      the array must contain regular parameters, as well as the _particle columns.
-    - a pmsco.optimizers.population.Population object with valid data.
-
-    the graphics file format can be changed by providing a specific canvas. default is PNG.
-
-    this function requires the matplotlib module.
-    if it is not available, the function raises an error.
-
-    @param output_file: path and base name of the output file without extension.
-        a generation index and the file extension according to the file format are appended.
-    @param input_data_or_file: a numpy structured ndarray of a population or result list from an optimization run.
-        alternatively, the file path of a result file (.dat) or population file (.pop) can be given.
-        file can be any object that numpy.genfromtxt() can handle.
-    @param model_space: model space can be a pmsco.project.ModelSpace object,
-        any object that contains the same min and max attributes as pmsco.project.ModelSpace,
-        or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
-        by default, the model space boundaries are derived from the input data.
-        if a model_space is specified, only the parameters listed in it are plotted.
-    @param generations: (int or sequence) generation index or list of indices.
-        this index is used in the output file name and for filtering input data by generation.
-        if the input data does not contain the generation, no filtering is applied.
-        by default, no filtering is applied, and one graph for each generation is produced.
-    @param title: (str) title of the chart.
-        the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
-        default: derived from file name.
-    @param cmap: (str) name of colour map supported by matplotlib.
-        default is 'jet'.
-        other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
-    @param canvas: a FigureCanvas class reference from a matplotlib backend.
-        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
-        some other options are:
-        matplotlib.backends.backend_pdf.FigureCanvasPdf or
-        matplotlib.backends.backend_svg.FigureCanvasSVG.
-
-    @return (str) path and name of the generated graphics file.
-        empty string if an error occurred.
-
-    @raise TypeError if matplotlib is not available.
-    """
-
-    try:
-        pos = np.copy(input_data_or_file.pos)
-        range_min = input_data_or_file.model_min
-        range_max = input_data_or_file.model_max
-        generations = [input_data_or_file.generation]
-    except AttributeError:
-        try:
-            pos = np.atleast_1d(np.genfromtxt(input_data_or_file, names=True))
-        except TypeError:
-            pos = np.copy(input_data_or_file)
-        range_min, range_max = _default_range(pos)
-    pnames = regular_params(pos.dtype.names)
-
-    if model_space is not None:
-        try:
-            # a ModelSpace-like object
-            range_min = model_space.min
-            range_max = model_space.max
-        except AttributeError:
-            # a dictionary-like object
-            range_min = model_space['min']
-            range_max = model_space['max']
-        try:
-            pnames = range_min.keys()
-        except AttributeError:
-            pnames = range_min.dtype.names
-
-    pnames = list(pnames)
-    _prune_constant_params(pnames, range_min, range_max)
-
-    if generations is None:
-        try:
-            generations = np.unique(pos['_gen'])
-        except ValueError:
-            pass
-
-    files = []
-    path, base = os.path.split(output_file)
-    if generations is not None and len(generations):
-        if title is None:
-            title = "{base} gen {gen}"
-
-        for generation in generations:
-            idx = np.where(pos['_gen'] == generation)
-            gpos = pos[idx]
-            gtitle = title.format(base=base, gen=int(generation))
-            out_filename = "{base}-{gen}".format(base=os.fspath(output_file), gen=int(generation))
-            out_filename = _render_genetic_chart_2(out_filename, gpos, pnames, range_min, range_max,
-                                                   gtitle, cmap, canvas)
-            files.append(out_filename)
-    else:
-        if title is None:
-            title = "{base}"
-        gtitle = title.format(base=base, gen="")
-        out_filename = "{base}".format(base=os.fspath(output_file))
-        out_filename = _render_genetic_chart_2(out_filename, pos, pnames, range_min, range_max, gtitle, cmap, canvas)
-        files.append(out_filename)
-
-    return files
-
-
-def _render_genetic_chart_2(out_filename, pos, pnames, range_min, range_max, title, cmap, canvas):
-    """
-    internal part of render_genetic_chart()
-
-    this function calculates the relative position in the model space,
-    sorts the positions array by particle index,
-    and calls plot_genetic_chart().
-
-    @param out_filename:
-    @param pos:
-    @param pnames:
-    @param range_max:
-    @param range_min:
-    @param cmap:
-    @param canvas:
-    @return: out_filename
-    """
-    spos = np.sort(pos, order='_particle')
-    rpos2d = np.zeros((spos.shape[0], len(pnames)))
-    for index, pname in enumerate(pnames):
-        rpos2d[:, index] = (spos[pname] - range_min[pname]) / (range_max[pname] - range_min[pname])
-    out_filename = plot_genetic_chart(out_filename, rpos2d, pnames, title=title, cmap=cmap, canvas=canvas)
-    return out_filename
-
-
-def plot_genetic_chart(filename, rpos2d, param_labels, title=None, cmap=None, canvas=None):
-    """
-    produce a genetic chart from the given data.
-
-    a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
-    the chart should highlight the amount of diversity in the population
-    and - by comparing charts of different generations - the changes due to mutation.
-    the axes are the model parameter (x) and particle number (y).
-    the colour is mapped from the relative position of a parameter value within the parameter range.
-
-    in contrast to render_genetic_chart() this function contains only the drawing code.
-    it requires input in the final form and does not do any checks, conversion or processing.
-
-    the graphics file format can be changed by providing a specific canvas. default is PNG.
-
-    this function requires the matplotlib module.
-    if it is not available, the function raises an error.
-
-    @param filename: path and name of the output file without extension.
-    @param rpos2d: (two-dimensional numpy array of numeric type)
-        relative positions of the particles in the model space.
-        dimension 0 (y-axis) is the particle index,
-        dimension 1 (x-axis) is the parameter index (in the order given by param_labels).
-        all values must be between 0 and 1.
-    @param param_labels: (sequence) list or tuple of parameter names.
-    @param title: (str) string to be printed as chart title. default is 'genetic chart'.
-    @param cmap: (str) name of colour map supported by matplotlib.
-        default is 'jet'.
-        other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
-    @param canvas: a FigureCanvas class reference from a matplotlib backend.
-        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
-        some other options are:
-        matplotlib.backends.backend_pdf.FigureCanvasPdf or
-        matplotlib.backends.backend_svg.FigureCanvasSVG.
-
-    @raise TypeError if matplotlib is not available.
-    """
-    if canvas is None:
-        canvas = FigureCanvas
-    if cmap is None:
-        cmap = 'jet'
-    if title is None:
-        title = 'genetic chart'
-
-    fig = Figure()
-    canvas(fig)
-    ax = fig.add_subplot(111)
-    im = ax.imshow(rpos2d, aspect='auto', cmap=cmap, origin='lower')
-    im.set_clim((0.0, 1.0))
-    ax.set_xticks(np.arange(len(param_labels)))
-    ax.set_xticklabels(param_labels, rotation=45, ha="right", rotation_mode="anchor")
-    ax.set_ylabel('particle')
-    ax.set_title(title)
-    cb = ax.figure.colorbar(im, ax=ax)
-    cb.ax.set_ylabel("relative value", rotation=-90, va="bottom")
-
-    out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
-    fig.savefig(out_filename)
-    return out_filename
-
-
-def render_swarm(output_file, input_data, model_space=None, title=None, cmap=None, canvas=None):
-    """
-    render a two-dimensional particle swarm population.
-
-    this function generates a schematic rendering of a particle swarm in two dimensions.
-    particles are represented by their position and velocity, indicated by an arrow.
-    the model space is projected on the first two (or selected two) variable parameters.
-    in the background, a scatter plot of results (dots with pseudocolor representing the R-factor) can be plotted.
-    the chart type is designed for the particle swarm optimization algorithm.
-
-    the function requires input in one of the following forms:
-    - position (.pos), velocity (.vel) and result (.dat) files or the respective numpy structured arrays.
-      the arrays must contain regular parameters, as well as the `_particle` column.
-      the result file must also contain an `_rfac` column.
-    - a pmsco.optimizers.population.Population object with valid data.
-
-    the graphics file format can be changed by providing a specific canvas. default is PNG.
-
-    this function requires the matplotlib module.
-    if it is not available, the function raises an error.
-
-    @param output_file: path and base name of the output file without extension.
-        a generation index and the file extension according to the file format are appended.
-    @param input_data: a pmsco.optimizers.population.Population object with valid data,
-        or a sequence of position, velocity and result arrays.
-        the arrays must be structured ndarrays corresponding to the respective Population members.
-        alternatively, the arrays can be referenced as file paths
-        in any format that numpy.genfromtxt() can handle.
-    @param model_space: model space can be a pmsco.project.ModelSpace object,
-        any object that contains the same min and max attributes as pmsco.project.ModelSpace,
-        or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
-        by default, the model space boundaries are derived from the input data.
-        if a model_space is specified, only the parameters listed in it are plotted.
-    @param title: (str) title of the chart.
-        the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
-        default: derived from file name.
-    @param cmap: (str) name of colour map supported by matplotlib.
-        default is 'plasma'.
-        other good-looking options are 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
-    @param canvas: a FigureCanvas class reference from a matplotlib backend.
-        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
-        some other options are:
-        matplotlib.backends.backend_pdf.FigureCanvasPdf or
-        matplotlib.backends.backend_svg.FigureCanvasSVG.
-
-    @return (str) path and name of the generated graphics file.
-        empty string if an error occurred.
-
-    @raise TypeError if matplotlib is not available.
-    """
-    try:
-        range_min = input_data.model_min
-        range_max = input_data.model_max
-        pos = np.copy(input_data.pos)
-        vel = np.copy(input_data.vel)
-        rfac = np.copy(input_data.results)
-        generation = input_data.generation
-    except AttributeError:
-        try:
-            pos = np.atleast_1d(np.genfromtxt(input_data[0], names=True))
-            vel = np.atleast_1d(np.genfromtxt(input_data[1], names=True))
-            rfac = np.atleast_1d(np.genfromtxt(input_data[2], names=True))
-        except TypeError:
-            pos = np.copy(input_data[0])
-            vel = np.copy(input_data[1])
-            rfac = np.copy(input_data[2])
-        range_min, range_max = _default_range(rfac)
-    pnames = regular_params(pos.dtype.names)
-
-    if model_space is not None:
-        try:
-            # a ModelSpace-like object
-            range_min = model_space.min
-            range_max = model_space.max
-        except AttributeError:
-            # a dictionary-like object
-            range_min = model_space['min']
-            range_max = model_space['max']
-        try:
-            pnames = range_min.keys()
-        except AttributeError:
-            pnames = range_min.dtype.names
-
-    pnames = list(pnames)
-    _prune_constant_params(pnames, range_min, range_max)
-    pnames = pnames[0:2]
-    files = []
-    if len(pnames) == 2:
-        params = {pnames[0]: [range_min[pnames[0]], range_max[pnames[0]]],
-                  pnames[1]: [range_min[pnames[1]], range_max[pnames[1]]]}
-        out_filename = plot_swarm(output_file, pos, vel, rfac, params, title=title, cmap=cmap, canvas=canvas)
-        files.append(out_filename)
-    else:
-        logging.warning("model space must be two-dimensional and non-degenerate.")
-
-    return files
-
-
-def plot_swarm(filename, pos, vel, rfac, params, title=None, cmap=None, canvas=None):
-    """
-    plot a two-dimensional particle swarm population.
-
-    this is a sub-function of render_swarm() containing just the plotting commands.
-
-    the graphics file format can be changed by providing a specific canvas. default is PNG.
-
-    this function requires the matplotlib module.
-    if it is not available, the function raises an error.
-
-    @param filename: path and base name of the output file without extension.
-        a generation index and the file extension according to the file format are appended.
-    @param pos: structured ndarray containing the positions of the particles.
-    @param vel: structured ndarray containing the velocities of the particles.
-    @param rfac: structured ndarray containing positions and R-factor values.
-        this array is independent of pos and vel.
-        it can also be set to None if results should be suppressed.
-    @param params: dictionary of two parameters to be plotted.
-        the keys correspond to columns of the pos, vel and rfac arrays.
-        the values are lists [minimum, maximum] that define the axis range.
-    @param title: (str) title of the chart.
-        the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
-        default: derived from file name.
-    @param cmap: (str) name of colour map supported by matplotlib.
-        default is 'plasma'.
-        other good-looking options are 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
-    @param canvas: a FigureCanvas class reference from a matplotlib backend.
-        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
-        some other options are:
-        matplotlib.backends.backend_pdf.FigureCanvasPdf or
-        matplotlib.backends.backend_svg.FigureCanvasSVG.
-
-    @return (str) path and name of the generated graphics file.
-        empty string if an error occurred.
-
-    @raise TypeError if matplotlib is not available.
-    """
-    if canvas is None:
-        canvas = FigureCanvas
-    if cmap is None:
-        cmap = 'plasma'
-    if title is None:
-        title = 'swarm map'
-
-    pnames = list(params.keys())
-    fig = Figure()
-    canvas(fig)
-    ax = fig.add_subplot(111)
-
-    if rfac is not None:
-        try:
-            s = ax.scatter(rfac[params[0]], rfac[params[1]], s=5, c=rfac['_rfac'], cmap=cmap, vmin=0, vmax=1)
-        except ValueError:
-            # _rfac column missing
-            pass
-        else:
-            cb = ax.figure.colorbar(s, ax=ax)
-            cb.ax.set_ylabel("R-factor", rotation=-90, va="bottom")
-
-    p = ax.plot(pos[pnames[0]], pos[pnames[1]], 'co')
-    q = ax.quiver(pos[pnames[0]], pos[pnames[1]], vel[pnames[0]], vel[pnames[1]], color='c')
-    ax.set_xlim(params[pnames[0]])
-    ax.set_ylim(params[pnames[1]])
-    ax.set_xlabel(pnames[0])
-    ax.set_ylabel(pnames[1])
-    ax.set_title(title)
-
-    out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
-    fig.savefig(out_filename)
-    return out_filename
--- a/pmsco/graphics/scan.py
+++ b/pmsco/graphics/scan.py
@@ -202,14 +202,16 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):

    cb = fig.colorbar(pc, shrink=0.4, pad=0.1)

-    dlo = np.nanpercentile(data['i'], 2)
-    dhi = np.nanpercentile(data['i'], 98)
+    clip = 2
+    dlo = np.nanpercentile(data['i'], clip)
+    dhi = np.nanpercentile(data['i'], 100 - clip)
+
    if is_modf:
        pc.set_cmap("RdBu_r")
        # im.set_cmap("coolwarm")
        dhi = max(abs(dlo), abs(dhi))
        dlo = -dhi
-        pc.set_clim((-1., 1.))
+        pc.set_clim((dlo, dhi))
        try:
            ti = cb.get_ticks()
            ti = [min(ti), 0., max(ti)]
--- a/pmsco/graphics/scattering.py
+++ b/pmsco/graphics/scattering.py
@@ -0,0 +1,210 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import logging
+import math
+import numpy as np
+import scipy.interpolate
+import scipy.special
+
+logger = logging.getLogger(__name__)
+
+try:
+    from matplotlib.figure import Figure
+    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+except ImportError:
+    Figure = None
+    FigureCanvas = None
+    logger.warning("error importing matplotlib. graphics rendering disabled.")
+
+
+class TMatrix(object):
+    def __init__(self):
+        """
+        self.en.shape = (n_e,)
+        self.tl.shape = (n_e, n_l)
+        """
+        self.en = None
+        self.tl = None
+
+    def load_test_data(self):
+        self.en = np.array([100.])
+        raw = [-0.052845, -0.003238, 0.478705, 0.672581, 0.137932, 0.981700, 0.323890, 0.805299, 0.291814, 0.776792,
+               0.369416, 0.351845, 0.199775, 0.113314, 0.062479, 0.025691, 0.013699, 0.005283]
+        re_tl = np.array(raw[0::2])
+        im_tl = np.array(raw[1::2])
+        self.tl = re_tl + 1j * im_tl
+
+    def load_edac_scattering(self, f, energy=math.nan):
+        """
+        load T matrix from EDAC scattering file
+
+        currently, only the 'tl' format is supported.
+
+        @param f: file path
+        @param energy: kinetic energy in eV if none is defined in the file
+        @return: None
+        """
+        with open(f, "r") as fi:
+            h = fi.readline().rstrip().split(' ')
+
+        ne = int(h[0])
+        if ne > 1:
+            assert h[1] == 'E(eV)'
+            del h[1]
+        lmax = int(h[1])
+        assert h[2] == 'regular'
+        assert h[3] == 'tl'
+
+        self.load_edac_tl(f, ne, lmax, energy=energy)
+
+    def load_edac_tl(self, f, ne, lmax, energy=math.nan):
+        """
+        load T matrix from EDAC scattering file in 'tl' format
+
+        @param f: file path
+        @param ne: number of energies (rows)
+        @param lmax: maximum l number (columns = 2 * (lmax + 1))
+        @param energy: kinetic energy in eV if none is defined in the file
+        @return: None
+        """
+        if ne > 1:
+            self.en = np.atleast_1d(np.genfromtxt(f, skip_header=1, usecols=[0]))
+            start_col = 1
+        else:
+            self.en = np.asarray(energy)
+            start_col = 0
+
+        re_cols = range(start_col, start_col + (lmax + 1) * 2, 2)
+        im_cols = range(start_col + 1, start_col + (lmax + 1) * 2, 2)
+        re_tl = np.atleast_1d(np.genfromtxt(f, skip_header=1, usecols=re_cols))
+        im_tl = np.atleast_1d(np.genfromtxt(f, skip_header=1, usecols=im_cols))
+        self.tl = re_tl + 1j * im_tl
+        assert self.tl.shape == (ne, lmax + 1), "array shape mismatch"
+
+    def planewave_amplitude(self, energy, angle):
+        """
+        total, complex plane wave scattering amplitude for given energy and angle
+
+        @param energy: kinetic energy in eV.
+            this can be a numeric value, a 1-dimensional numpy.ndarray,
+            or any value accepted by the numpy.asarray function.
+        @param angle: scattering angle in degrees (0..180).
+            this can be a numeric value, a 1-dimensional numpy.ndarray,
+            or any value accepted by the numpy.asarray function.
+        @return: 3 numpy arrays (amp, magnitude, phase) representing the scattering amplitude
+            versus energy and angle.
+            the shape of the three arrays is (n_energies, n_angles).
+            @arg amp: complex scattering amplitude.
+            @arg magnitude: magnitude (absolute value) of the scattering amplitude.
+            @arg phase: phase angle in radians of the scattering amplitude.
+        """
+        if not isinstance(energy, np.ndarray):
+            energy = np.atleast_1d(np.asarray(energy))
+        ne = len(energy)
+        if not isinstance(angle, np.ndarray):
+            angle = np.atleast_1d(np.array(angle))
+        na = len(angle)
+
+        kinv = 1. / (0.513019932 * np.sqrt(energy))
+        f_tl = scipy.interpolate.interp1d(self.en, self.tl, axis=0, copy=False)
+        tl = f_tl(energy)
+
+        cos_angle = np.cos(np.radians(angle))
+        lmax = self.tl.shape[1] - 1
+        l = np.arange(0, lmax + 1)
+
+        amp = np.zeros((ne, na), dtype=complex)
+        for ia, ca in enumerate(cos_angle):
+            lpmn, __ = scipy.special.lpmn(0, lmax, ca)
+            fpart = np.outer(kinv, (2 * l + 1) * lpmn[0]) * tl
+            ftot = np.sum(fpart, axis=-1)
+            amp[:, ia] = ftot
+
+        mag = np.abs(amp)
+        pha = np.angle(amp)
+
+        return amp, mag, pha
+
+
+def render_scattering_1d(filename, tmatrix, energy=None):
+    if energy is None:
+        en = tmatrix.en[0]
+    else:
+        en = energy
+    an = np.arange(0, 181, 2)
+    __, mag, pha = tmatrix.planewave_amplitude(en, an)
+    pha = pha / math.pi
+
+    canvas = FigureCanvas
+    fig = Figure()
+    canvas(fig)
+
+    ax = fig.add_subplot(211)
+    ax.plot(an, mag[0])
+    ax.set_xlabel('th (deg)')
+    ax.set_ylabel('mag (arb)')
+
+    ax = fig.add_subplot(212)
+    ax.plot(an, pha[0])
+    ax.set_xlabel('th (deg)')
+    ax.set_ylabel('pha (1/pi)')
+
+    out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
+    fig.savefig(out_filename)
+    return out_filename
+
+
+def render_scattering_2d(filename, tmatrix):
+    en = tmatrix.en
+    an = np.arange(0, 181, 2)
+    __, mag, pha = tmatrix.planewave_amplitude(en, an)
+    pha = pha / math.pi
+
+    canvas = FigureCanvas
+    fig = Figure()
+    canvas(fig)
+
+    ax = fig.add_subplot(211)
+    im = ax.imshow(mag, origin='lower', aspect='auto', interpolation='none')
+    im.set_extent((an[0], an[-1], en[0], en[-1]))
+    im.set_cmap("magma")
+    ax.set_xlabel('th (deg)')
+    ax.set_ylabel('E (eV)')
+    # cb = ax.colorbar(im, shrink=0.4, pad=0.1)
+    # ti = cb.get_ticks()
+    # ti = [0., max(ti)]
+    # cb.set_ticks(ti)
+
+    ax = fig.add_subplot(212)
+    im = ax.imshow(pha, origin='lower', aspect='auto', interpolation='none')
+    im.set_extent((an[0], an[-1], en[0], en[-1]))
+    im.set_cmap("RdBu_r")
+    ax.set_xlabel('th (deg)')
+    ax.set_ylabel('E (eV)')
+    # cb = ax.colorbar(im, shrink=0.4, pad=0.1)
+
+    dlo = np.nanpercentile(mag, 2)
+    dhi = np.nanpercentile(mag, 98)
+    dhi = max(abs(dlo), abs(dhi))
+    dlo = -dhi
+    im.set_clim((dlo, dhi))
+    # ti = cb.get_ticks()
+    # ti = [min(ti), 0., max(ti)]
+    # cb.set_ticks(ti)
+
+    out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
+    fig.savefig(out_filename)
+    return out_filename
+
+
+def render_scattering_map(filename, energy):
+    tmatrix = TMatrix()
+    tmatrix.load_edac_scattering(filename, energy)
+
+    if tmatrix.tl.shape[0] == 1:
+        out_filename = render_scattering_1d(filename, tmatrix)
+    else:
+        out_filename = render_scattering_2d(filename, tmatrix)
+
+    return out_filename
--- a/pmsco/handlers.py
+++ b/pmsco/handlers.py
@@ -55,7 +55,6 @@ import numpy as np
 import os
 from pathlib import Path

-from pmsco.compat import open
 import pmsco.data as md
 import pmsco.dispatch as dispatch
 import pmsco.graphics.scan as mgs
@@ -375,7 +374,7 @@ class SingleModelHandler(ModelHandler):
        keys.sort(key=lambda t: t[0].lower())
        vals = (str(self.result[key]) for key in keys)
        filename = Path(self._project.output_file).with_suffix(".dat")
-        with open(filename, "w") as outfile:
+        with open(filename, "wt", encoding="latin1") as outfile:
            outfile.write("# ")
            outfile.write(" ".join(keys))
            outfile.write("\n")
@@ -1002,27 +1001,3 @@ class EnergyRegionHandler(RegionHandler):
            logger.error("no region tasks generated. this is probably a bug.")

        return out_tasks
-
-
-def choose_region_handler_class(project):
-    """
-    choose a suitable region handler for the project.
-
-    the function returns the EnergyRegionHandler class
-    if the project includes an energy scan with at least 10 steps.
-    Otherwise, it returns the SingleRegionHandler.
-
-    angle scans do not benefit from region splitting in EDAC.
-
-    @param project: Project instance.
-    @return: SingleRegionHandler or EnergyRegionHandler class.
-    """
-    energy_scans = 0
-    for scan in project.scans:
-        if scan.energies.shape[0] >= 10:
-            energy_scans += 1
-
-    if energy_scans >= 1:
-        return EnergyRegionHandler
-    else:
-        return SingleRegionHandler
--- a/pmsco/helpers.py
+++ b/pmsco/helpers.py
@@ -6,6 +6,13 @@ a collection of small and generic code bits mostly collected from the www.

 """

+import contextlib
+import ctypes
+import io
+import os
+import sys
+from typing import BinaryIO
+

 class BraceMessage(object):
    """
@@ -22,3 +29,40 @@ class BraceMessage(object):

    def __str__(self):
        return self.fmt.format(*self.args, **self.kwargs)
+
+
+libc = ctypes.CDLL(None)
+c_stdout = ctypes.c_void_p.in_dll(libc, 'stdout')
+
+
+@contextlib.contextmanager
+def stdout_redirected(dest_file: BinaryIO):
+    """
+    A context manager to temporarily redirect stdout to a file.
+
+    Redirects all standard output from Python and the C library to the specified file.
+    This can be used, e.g., to capture output from Fortran code.
+
+    credit: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
+
+    @param dest_file: binary file open for writing ('wb' mode).
+        This function requires just the fileno function.
+    @return: None
+    """
+
+    original_stdout_fd = sys.stdout.fileno()
+
+    def _redirect_stdout(to_fd):
+        """Redirect stdout to the given file descriptor."""
+        libc.fflush(c_stdout)
+        sys.stdout.close()
+        os.dup2(to_fd, original_stdout_fd)
+        sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
+
+    saved_stdout_fd = os.dup(original_stdout_fd)
+    try:
+        _redirect_stdout(dest_file.fileno())
+        yield
+        _redirect_stdout(saved_stdout_fd)
+    finally:
+        os.close(saved_stdout_fd)
--- a/pmsco/igor.py
+++ b/pmsco/igor.py
@@ -6,21 +6,15 @@ this module provides functions for loading/saving pmsco data in igor pro.

@author Matthias Muntwiler

-@copyright (c) 2019 by Paul Scherrer Institut @n
+@copyright (c) 2019-23 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import numpy as np

-from pmsco.compat import open
-

 def _escape_igor_string(s):
    s = s.replace('\\', '\\\\')
@@ -91,7 +85,7 @@ class IgorExport(object):
        """
        write to igor file.
        """
-        with open(filename, 'w') as f:
+        with open(filename, 'wt', encoding="utf8") as f:
            self._write_header(f)
            self._write_data(f)

--- a/pmsco/loess/init.py
+++ b/pmsco/loess/init.py
@@ -1 +0,0 @@
-__author__ = 'matthias muntwiler'
--- a/pmsco/loess/makefile
+++ b/pmsco/loess/makefile
@@ -1,74 +0,0 @@
-SHELL=/bin/sh
-
-# makefile for the LOESS module
-#
-# required libraries: libblas, liblapack, libf2c
-# (you may have to set soft links so that linker finds them)
-#
-# the makefile calls python-config to get the compilation flags and include path.
-# you may override the corresponding variables on the command line or by environment variables:
-#
-# PYTHON_INC: specify additional include directories. each dir must start with -I prefix.
-# PYTHON_CFLAGS: specify the C compiler flags.
-#
-# see the top-level makefile for additional information.
-
-.SUFFIXES:
-.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .x
-.PHONY: all loess test gas madeup ethanol air galaxy
-
-OBJ=loessc.o loess.o predict.o misc.o loessf.o dqrsl.o dsvdc.o fix_main.o
-
-FFLAGS?=-O
-LIB=-lblas -lm -lf2c
-LIBPATH?=
-CC?=gcc
-CCOPTS?=
-SWIG?=swig
-SWIGOPTS?=
-PYTHON?=python
-PYTHONOPTS?=
-PYTHON_CONFIG = ${PYTHON}-config
-#PYTHON_LIB ?= $(shell ${PYTHON_CONFIG} --libs)
-#PYTHON_INC ?= $(shell ${PYTHON_CONFIG} --includes)
-PYTHON_INC ?=
-PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
-#PYTHON_LDFLAGS ?= $(shell ${PYTHON_CONFIG} --ldflags)
-
-all: loess
-
-loess: _loess.so
-
-loess.py _loess.so: loess.c loess.i
-	$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
-
-examples: gas madeup ethanol air galaxy
-
-gas: gas.x
-
-gas.x: gas.o $(OBJ)
-	$(CC) -o gas.x gas.o $(OBJ) $(LIB)
-
-madeup: madeup.x
-
-madeup.x: madeup.o $(OBJ)
-	$(CC) -o madeup.x madeup.o $(OBJ) $(LIB)
-
-ethanol: ethanol.x
-
-ethanol.x: ethanol.o $(OBJ)
-	$(CC) -o ethanol.x ethanol.o $(OBJ) $(LIB)
-
-air: air.x
-
-air.x: air.o $(OBJ)
-	$(CC) -o air.x air.o $(OBJ) $(LIB)
-
-galaxy: galaxy.x
-
-galaxy.x: galaxy.o $(OBJ)
-	$(CC) -o galaxy.x galaxy.o $(OBJ) $(LIB)
-
-clean:
-	rm -f *.o *.so *.x core *.pyc
-	rm -f loess.py loess_wrap.c
--- a/pmsco/loess/setup.py
+++ b/pmsco/loess/setup.py
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-
-"""
-@package loess.setup
-setup.py file for LOESS
-
-the LOESS code included here was developed at Bell Labs by
-William S. Cleveland, Eric Grosse, Ming-Jen Shyu,
-and is dated 18 August 1992.
-the code is available in the public domain
-from http://www.netlib.org/a/dloess.
-see the README file for details.
-
-the Python wrapper was set up by M. Muntwiler
-with the help of the SWIG toolkit
-and other incredible goodies available in the Linux world.
-
-@bug numpy.distutils.build_src in python 2.7 treats all Fortran files with f2py
-so that they are compiled via both f2py and swig.
-this produces extra object files which cause the linker to fail.
-to fix this issue, this module hacks the build_src class.
-this hack does not work with python 3. perhaps it's even unnecessary.
-
-@author Matthias Muntwiler
-
-@copyright (c) 2015-18 by Paul Scherrer Institut @n
-Licensed under the Apache License, Version 2.0 (the "License"); @n
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-"""
-
-import numpy
-try:
-    numpy_include = numpy.get_include()
-except AttributeError:
-    numpy_include = numpy.get_numpy_include()
-
-def configuration(parent_package='', top_path=None):
-    from numpy.distutils.misc_util import Configuration
-    config = Configuration('loess', parent_package, top_path)
-    lib = ['blas', 'm', 'f2c']
-    src = ['loess.c', 'loessc.c', 'predict.c', 'misc.c', 'loessf.f', 'dqrsl.f', 'dsvdc.f', 'fix_main.c', 'loess.i']
-    inc_dir = [numpy_include]
-    config.add_extension('_loess',
-                         sources=src,
-                         libraries=lib,
-                         include_dirs=inc_dir
-                         )
-    return config
-
-def ignore_sources(self, sources, extension):
-    return sources
-
-if __name__ == '__main__':
-    try:
-        from numpy.distutils.core import numpy_cmdclass
-        numpy_cmdclass['build_src'].f2py_sources = ignore_sources
-    except ImportError:
-        pass
-    from numpy.distutils.core import setup
-    setup(**configuration(top_path='').todict())
-
--- a/pmsco/makefile
+++ b/pmsco/makefile
@@ -1,38 +0,0 @@
-SHELL=/bin/sh
-
-# makefile for external programs and modules
-#
-# see the top-level makefile for additional information.
-
-.PHONY: all clean edac loess msc mufpot phagen
-
-EDAC_DIR = edac
-MSC_DIR = msc
-MUFPOT_DIR = mufpot
-LOESS_DIR = loess
-PHAGEN_DIR = calculators/phagen
-
-all: edac loess phagen
-
-edac:
-	$(MAKE) -C $(EDAC_DIR)
-
-loess:
-	$(MAKE) -C $(LOESS_DIR)
-
-msc:
-	$(MAKE) -C $(MSC_DIR)
-
-mufpot:
-	$(MAKE) -C $(MUFPOT_DIR)
-
-phagen:
-	$(MAKE) -C $(PHAGEN_DIR)
-
-clean:
-	$(MAKE) -C $(EDAC_DIR) clean
-	$(MAKE) -C $(LOESS_DIR) clean
-	$(MAKE) -C $(MSC_DIR) clean
-	$(MAKE) -C $(MUFPOT_DIR) clean
-	$(MAKE) -C $(PHAGEN_DIR) clean
-	rm -f *.pyc
--- a/pmsco/msc/.gitignore
+++ b/pmsco/msc/.gitignore
@@ -1 +0,0 @@
-revision.f
--- a/pmsco/msc/init.py
+++ b/pmsco/msc/init.py
@@ -1 +0,0 @@
-__author__ = 'muntwiler_m'
--- a/pmsco/msc/makefile
+++ b/pmsco/msc/makefile
@@ -1,50 +0,0 @@
-SHELL=/bin/sh
-
-# makefile for MSC program and module
-#
-# the MSC source code is not included in the public distribution.
-# please obtain the MSC code from the original author,
-# and copy it to this directory before compilation.
-#
-# see the top-level makefile for additional information.
-
-.SUFFIXES:
-.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
-.PHONY: all clean edac msc mufpot
-
-FC?=gfortran
-FCCOPTS?=
-F2PY?=f2py
-F2PYOPTS?=
-CC?=gcc
-CCOPTS?=
-SWIG?=swig
-SWIGOPTS?=
-PYTHON?=python
-PYTHONOPTS?=
-PYTHONINC?=
-
-all: msc
-
-msc: msc.exe msc.so
-
-msc.exe: msc.f param.f common.f phases.f angles.f revision.f
-	$(FC) $(FCOPTS) -o msc.exe msc.f phases.f angles.f
-
-#msc.pyf currently needs a manual edit before compiling.
-#this target should execute only if it doesn't exist.
-msc.pyf: | msc.f phases.f angles.f
-	$(F2PY) -h msc.pyf -m msc msc.f phases.f angles.f only: mscmain anglesarray anglesfile ps
-	$(error msc.pyf auto-generated - must be edited manually before build can continue!)
-
-msc.so: msc.f param.f common.f phases.f angles.f revision.f msc.pyf
-	$(F2PY) -c $(F2PYOPTS) msc.pyf msc.f phases.f angles.f -m msc
-
-revision.f: msc.f
-	echo "      character*50 coderev" > revision.f
-	echo "      parameter(coderev=" >> revision.f
-	git log --pretty=format:"     ='Code revision %h, %ad')" --date=iso -1 $< >> $@ || echo "     ='Code revision unknown, "`date +"%F %T %z"`"')" >> $@
-
-clean:
-	rm -f *.so *.o *.exe
-	rm -f revision.f
--- a/pmsco/mufpot/init.py
+++ b/pmsco/mufpot/init.py
@@ -1 +0,0 @@
-__author__ = 'muntwiler_m'
--- a/pmsco/mufpot/makefile
+++ b/pmsco/mufpot/makefile
@@ -1,46 +0,0 @@
-SHELL=/bin/sh
-
-# makefile for MUFPOT program and module
-#
-# the MUFPOT source code is not included in the public distribution.
-# please obtain the MUFPOT code from the original author,
-# and copy it to this directory before compilation.
-#
-# see the top-level makefile for additional information.
-
-.SUFFIXES:
-.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
-.PHONY: all clean edac msc mufpot
-
-FC=gfortran
-FCCOPTS=
-F2PY=f2py
-F2PYOPTS=
-CC=gcc
-CCOPTS=
-SWIG=swig
-SWIGOPTS=
-PYTHON=python2
-PYTHONOPTS=
-
-all: mufpot
-
-mufpot: mufpot.exe mufpot.so
-
-mufpot.exe: mufpot.f
-	$(FC) $(FCOPTS) -o mufpot.exe mufpot.f
-
-mufpot.pyf: | mufpot.f
-	$(F2PY) -h mufpot.pyf -m mufpot mufpot.f only: mufpot
-
-mufpot.so: mufpot.f mufpot.pyf
-	$(F2PY) -c $(F2PYOPTS) mufpot.pyf mufpot.f -m mufpot
-
-revision.f: msc.f
-	echo "      character*50 coderev" > revision.f
-	echo "      parameter(coderev=" >> revision.f
-	git log --pretty=format:"     ='Code revision %h, %ad')" --date=iso -1 $< >> $@ || echo "     ='Code revision unknown, "`date +"%F %T %z"`"')" >> $@
-
-clean:
-	rm -f *.so *.o *.exe
-	rm -f revision.f
--- a/pmsco/optimizers/init.py
+++ b/pmsco/optimizers/init.py
--- a/pmsco/optimizers/genetic.py
+++ b/pmsco/optimizers/genetic.py
@@ -13,21 +13,17 @@ and R-factor based selection.

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2018 by Paul Scherrer Institut @n
+@copyright (c) 2018-21 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
 import logging
 import numpy as np
 import random
 import pmsco.optimizers.population as population
-from pmsco.helpers import BraceMessage as BMsg

 logger = logging.getLogger(__name__)

@@ -112,7 +108,7 @@ class GeneticPopulation(population.Population):

    def setup(self, size, model_space, **kwargs):
        """
-        @copydoc Population.setup()
+        @copydoc pmsco.optimizers.population.Population.setup()

        in addition to the inherited behaviour, this method initializes self.mutation_step.
        mutation_step of a parameter is set to its model_space.step if non-zero.
--- a/pmsco/optimizers/gradient.py
+++ b/pmsco/optimizers/gradient.py
@@ -47,6 +47,7 @@ TAG_NEW_RESULT = 1
 # currently not used
 TAG_FINISHED = 2

+
 class MscProcess(object):
    """
    Code shared by MscoMaster and MscoSlave
@@ -79,7 +80,6 @@ class MscProcess(object):

            all other calculation results are discarded.
        """
-        rev = "rank %u, iteration %u" % (self.comm.rank, self.iteration)

        # create parameter and cluster structures
        clu = self.project.create_cluster(pars)
@@ -101,6 +101,7 @@ class MscProcess(object):

        return pars

+
 class MscMaster(MscProcess):
    def __init__(self, comm):
        super(MscMaster, self).__init__(comm)
@@ -235,6 +236,7 @@ class MscMaster(MscProcess):
            self.comm.send(None, dest=rank, tag=TAG_FINISH)
        super(MscMaster, self).cleanup()

+
 class MscSlave(MscProcess):

    def run(self):
@@ -258,6 +260,7 @@ class MscSlave(MscProcess):
        self.comm.send(result, dest=0, tag=TAG_NEW_RESULT)
        self.iteration += 1

+
 def optimize(project):
    """
    main entry point for optimization
--- a/pmsco/optimizers/grid.py
+++ b/pmsco/optimizers/grid.py
@@ -6,23 +6,19 @@ the module starts multiple MSC calculations and varies parameters on a fixed coo

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2015 by Paul Scherrer Institut @n
+@copyright (c) 2015-21 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import datetime
 import math
 import numpy as np
+from pathlib import Path
 import logging

-from pmsco.compat import open
 import pmsco.handlers as handlers
 import pmsco.graphics as graphics
 from pmsco.helpers import BraceMessage as BMsg
@@ -287,6 +283,7 @@ class GridSearchHandler(handlers.ModelHandler):
        self._timeout = False
        self._invalid_limit = 10
        self._next_model = 0
+        self._results_path = None

    def setup(self, project, slots):
        """
@@ -307,10 +304,13 @@ class GridSearchHandler(handlers.ModelHandler):
        self._pop.setup(self._project.model_space)
        self._invalid_limit = max(slots, self._invalid_limit)

-        self._outfile = open(self._project.output_file + ".dat", "w")
-        self._outfile.write("# ")
-        self._outfile.write(" ".join(self._pop.positions.dtype.names))
-        self._outfile.write("\n")
+        of = Path(self._project.output_file)
+        self._results_path = of.with_suffix(".dat")
+
+        with open(self._results_path, "wt", encoding="latin1") as outfile:
+            outfile.write("# ")
+            outfile.write(" ".join(self._pop.positions.dtype.names))
+            outfile.write("\n")

        return self._pop.model_count

@@ -388,11 +388,10 @@ class GridSearchHandler(handlers.ModelHandler):
            task.model['_rfac'] = task.rfac
            self._pop.add_result(task.model, task.rfac)

-            if self._outfile:
+            with open(self._results_path, "at", encoding="latin1") as outfile:
                s = (str(task.model[name]) for name in self._pop.positions.dtype.names)
-                self._outfile.write(" ".join(s))
-                self._outfile.write("\n")
-                self._outfile.flush()
+                outfile.write(" ".join(s))
+                outfile.write("\n")

        self._project.files.update_model_rfac(task.id.model, task.rfac)
        self._project.files.set_model_complete(task.id.model, True)
@@ -422,6 +421,6 @@ class GridSearchHandler(handlers.ModelHandler):
        """
        super(GridSearchHandler, self).save_report(root_task)

-        files = graphics.rfactor.render_results(self._project.output_file + ".dat", self._pop.positions)
+        files = graphics.rfactor.render_results(self._results_path, self._pop.positions)
        for f in files:
            self._project.files.add_file(f, root_task.id.model, "report")
--- a/pmsco/optimizers/population.py
+++ b/pmsco/optimizers/population.py
@@ -21,18 +21,14 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
 import datetime
 import logging
 import math
 import numpy as np
+from pathlib import Path
 import os
 import time

-from pmsco.compat import open
 import pmsco.handlers as handlers
 import pmsco.graphics.rfactor as grfactor
 from pmsco.helpers import BraceMessage as BMsg
@@ -761,7 +757,9 @@ class Population(object):
            self.pos_import = np.delete(self.pos_import, range(first, last))
            self.size_act = last - first
            self.update_particle_info()
-        return last - first
+        else:
+            self.size_act = 0
+        return self.size_act

    def update_particle_info(self, index=None, inc_model=True):
        """
@@ -1083,9 +1081,9 @@ class Population(object):

        the file name extensions are .pos, .vel, and .best
        """
-        self.save_array(base_filename + ".pos", self.pos)
-        self.save_array(base_filename + ".vel", self.vel)
-        self.save_array(base_filename + ".best", self.best)
+        self.save_array(Path(base_filename).with_suffix(".pos"), self.pos)
+        self.save_array(Path(base_filename).with_suffix(".vel"), self.vel)
+        self.save_array(Path(base_filename).with_suffix(".best"), self.best)

    def load_population(self, base_filename):
        """
@@ -1096,9 +1094,9 @@ class Population(object):
        the files must have the same format as produced by save_population.
        the files must have the same number of rows.
        """
-        self.pos = self.load_array(base_filename + ".pos", self.pos)
-        self.vel = self.load_array(base_filename + ".vel", self.vel)
-        self.best = self.load_array(base_filename + ".best", self.best)
+        self.pos = self.load_array(Path(base_filename).with_suffix(".pos"), self.pos)
+        self.vel = self.load_array(Path(base_filename).with_suffix(".vel"), self.vel)
+        self.best = self.load_array(Path(base_filename).with_suffix(".best"), self.best)
        self.size_act = self.pos.shape[0]

    def save_results(self, filename):
@@ -1107,6 +1105,9 @@ class Population(object):
        """
        self.save_array(filename, self.results)

+    def render_population(self, base_filename):
+        pass
+

 class PopulationHandler(handlers.ModelHandler):
    """
@@ -1167,6 +1168,8 @@ class PopulationHandler(handlers.ModelHandler):
        self._invalid_limit = 10
        self.patch_file = "pmsco_patch.pop"
        self._patch_last_mtime = 0
+        self._diag_path = None
+        self._results_path = None

    def setup(self, project, slots):
        """
@@ -1200,8 +1203,13 @@ class PopulationHandler(handlers.ModelHandler):
        self._pop_size = _req_size if _req_size >= _min_size else _def_size
        self.setup_population()
        self._invalid_limit = self._pop_size * 10
+        of = Path(self._project.output_file)
+        dp = of.parent / "diag"
+        dp.mkdir(exist_ok=True)
+        self._diag_path = dp / of.name
+        self._results_path = of.with_suffix(".dat")

-        with open(self._project.output_file + ".dat", "w") as outfile:
+        with open(self._results_path, "wt", encoding="latin1") as outfile:
            outfile.write("# ")
            outfile.write(" ".join(self._pop.results.dtype.names))
            outfile.write("\n")
@@ -1256,7 +1264,7 @@ class PopulationHandler(handlers.ModelHandler):
            self._check_patch_file()
            self._pop.advance_population()

-            for pos in self._pop.pos_gen():
+            for pos, vel in zip(self._pop.pos_gen(), self._pop.vel_gen()):
                time_pending += self._model_time
                if time_pending > time_avail:
                    self._timeout = True
@@ -1268,6 +1276,7 @@ class PopulationHandler(handlers.ModelHandler):
                    new_task = parent_task.copy()
                    new_task.parent_id = parent_id
                    new_task.model = pos
+                    new_task.delta = vel
                    new_task.change_id(model=pos['_model'])
                    new_tasks.append(new_task)

@@ -1322,9 +1331,8 @@ class PopulationHandler(handlers.ModelHandler):
            assert not math.isnan(task.rfac)
            task.model['_rfac'] = task.rfac
            self._pop.add_result(task.model, task.rfac)
-            self._pop.save_population(self._project.output_file + ".pop")

-            with open(self._project.output_file + ".dat", "a") as outfile:
+            with open(self._results_path, "at", encoding="latin1") as outfile:
                s = (str(task.model[name]) for name in self._pop.results.dtype.names)
                outfile.write(" ".join(s))
                outfile.write("\n")
@@ -1364,6 +1372,6 @@ class PopulationHandler(handlers.ModelHandler):
        """
        super(PopulationHandler, self).save_report(root_task)

-        files = grfactor.render_results(self._project.output_file + ".dat", self._pop.results)
+        files = grfactor.render_results(Path(self._project.output_file).with_suffix(".dat"), self._pop.results)
        for f in files:
            self._project.files.add_file(f, root_task.id.model, "report")
--- a/pmsco/optimizers/swarm.py
+++ b/pmsco/optimizers/swarm.py
@@ -10,20 +10,16 @@ D. A. Duncan et al., Surface Science 606, 278 (2012)

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2015-18 by Paul Scherrer Institut @n
+@copyright (c) 2015-21 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
 import logging
 import numpy as np
 import pmsco.optimizers.population as population
-from pmsco.helpers import BraceMessage as BMsg

 logger = logging.getLogger(__name__)

--- a/pmsco/optimizers/table.py
+++ b/pmsco/optimizers/table.py
@@ -130,6 +130,8 @@ class TablePopulation(population.Population):
        """
        super(TablePopulation, self).setup(size, model_space, **kwargs)
        self.table_source = kwargs['table_source']
+        self.size_act = 0
+        self.model_count = 0

    def advance_population(self):
        """
@@ -141,6 +143,7 @@ class TablePopulation(population.Population):
        @return: None
        """
        self.import_positions(self.table_source)
+        self.generation += 1
        self.advance_from_import()
        super(TablePopulation, self).advance_population()

--- a/pmsco/pmsco.py
+++ b/pmsco/pmsco.py
@@ -1,31 +1,35 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3

 """
@package pmsco.pmsco
-PEARL Multiple-Scattering Calculation and Structural Optimization
+PSI Multiple-Scattering Calculation and Structural Optimization

-this is the top-level interface of the PMSCO package.
-all calculations (any mode, any project) start by calling the run_project() function of this module.
-the module also provides a command line and a run-file/run-dict interface.
+This is the top-level interface of the PMSCO package.
+All calculations (any mode, any project) start by calling the run_project function of this module.
+The module also provides a command line, a run-file, and a run-dict interface.
+They all, in one way or another, set up an instance of a Project class and call the run_project function.

-for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
-note that in parallel mode, one process takes the role of the coordinator (master).
-the master does not run calculations and is idle most of the time.
-to benefit from parallel execution on a work station, NN should be the number of processors.
-on a cluster, the number of processes is chosen according to the available resources.
+For parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
+Note that in parallel mode, one process takes the role of the coordinator (master).
+The master does not run calculations and is idle most of the time.
+To benefit from parallel execution on a work station, NN should be the number of processors.
+On a cluster, the number of processes should be chosen according to the available resources.

-all calculations can also be run in a single process.
+All calculations can also be run in a single process.
 PMSCO serializes the calculations automatically.

-the code of the main module is independent of a particular calculation project.
-all project-specific code must be in a separate python module.
-the project module must implement a class derived from pmsco.project.Project,
-and call run_project() with an instance of the project class.
-refer to the projects folder for examples.
+The code of the main module is independent of a particular calculation project.
+All project-specific code must be in a separate python module.
+The project module must implement a class derived from pmsco.project.Project.
+The project module and class must be referenced in the run-file, or passed to the suitable run-function.
+
+While they are not strictly necessary, run-files help to separate code and data.
+Code is usually version-controlled, run-files contain metadata of calculations and should be kept with the results.
+A git hash can be used to refer to the code used to execute the calculation.

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2015-21 by Paul Scherrer Institut @n
+@copyright (c) 2015-23 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
@@ -33,12 +37,16 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
 """

 import argparse
-from builtins import range
-import logging
+from collections.abc import Mapping
 import importlib
-import commentjson as json
+import importlib.util
+import json
+import jsonschema
+import logging
+import os
 from pathlib import Path
 import sys
+import typing

 try:
    from mpi4py import MPI
@@ -55,10 +63,9 @@ pmsco_root = Path(__file__).resolve().parent.parent
 if str(pmsco_root) not in sys.path:
    sys.path.insert(0, str(pmsco_root))

+from pmsco.database.git import get_git_hash
 import pmsco.dispatch as dispatch
-import pmsco.files as files
-import pmsco.handlers as handlers
-from pmsco.optimizers import genetic, swarm, grid, table
+from pmsco.project import Project

 # the module-level logger
 logger = logging.getLogger(__name__)
@@ -94,6 +101,7 @@ def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
    numeric_level = getattr(logging, level.upper(), logging.WARNING)
    root_logger = logging.getLogger()
    root_logger.setLevel(numeric_level)
+    logging.getLogger('matplotlib').setLevel(logging.WARNING)

    if enable:
        if mpi_size > 1:
@@ -112,49 +120,6 @@ def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
    root_logger.addHandler(handler)


-def set_common_args(project, args):
-    """
-    set common project arguments from parsed command line.
-
-    this function translates and distributes the common arguments from the command line parser
-    to the respective destinations.
-    as of this writing, there are two destinations: the global logger and the project instance.
-
-    note that run_project() is called with the project instance as the only argument.
-    all project-related arguments from the command line must therefore be copied to the project object.
-
-    @param args: a namespace object containing the necessary parameters.
-        this can be an instance of Args, or the return value of parse_cli(),
-        or any object which has the same attributes as the Args class.
-
-    @return: None
-    """
-
-    if args.data_dir:
-        project.data_dir = args.data_dir
-    if args.output_file:
-        project.output_file = args.output_file
-    if args.db_file:
-        project.db_file = args.db_file
-    if args.log_file:
-        project.log_file = args.log_file
-    if args.log_level:
-        project.log_level = args.log_level
-    if not args.log_enable:
-        project.log_file = ""
-        project.log_level = ""
-    if args.mode:
-        project.mode = args.mode.lower()
-    if args.time_limit:
-        project.time_limit = args.time_limit
-    if args.keep_files:
-        project.keep_files = args.keep_files
-    if args.keep_levels:
-        project.keep_levels = max(args.keep_levels, project.keep_levels)
-    if args.keep_best:
-        project.keep_best = max(args.keep_best, project.keep_best)
-
-
 def run_project(project):
    """
    run a calculation project.
@@ -179,36 +144,18 @@ def run_project(project):
    if mpi_rank == 0:
        project.log_project_args()

+    if not project.git_hash:
+        project.git_hash = get_git_hash()
+
    project.validate()

-    optimizer_class = None
-    if project.mode == 'single':
-        optimizer_class = handlers.SingleModelHandler
-    elif project.mode == 'grid':
-        optimizer_class = grid.GridSearchHandler
-    elif project.mode == 'swarm':
-        optimizer_class = swarm.ParticleSwarmHandler
-    elif project.mode == 'genetic':
-        optimizer_class = genetic.GeneticOptimizationHandler
-    elif project.mode == 'gradient':
-        logger.error("gradient search not implemented")
-        # TODO: implement gradient search
-        # optimizer_class = gradient.GradientSearchHandler
-    elif project.mode == 'table':
-        optimizer_class = table.TableModelHandler
-    else:
-        logger.error("invalid optimization mode '%s'.", project.mode)
-    project.handler_classes['model'] = optimizer_class
-
-    project.handler_classes['region'] = handlers.choose_region_handler_class(project)
-
-    if project and optimizer_class:
+    if project:
        logger.info("starting calculations")
        try:
            dispatch.run_calculations(project)
        except (SystemExit, KeyboardInterrupt):
            raise
-        except Exception as __:
+        except Exception:
            logger.exception("unhandled exception during calculations.")
            raise
        else:
@@ -223,6 +170,8 @@ def schedule_project(project, run_dict):

    the function validates the project and submits a job to the scheduler.

+    placeholders in run-file's directories dict are resolved.
+
    @param project: fully initialized project object.
        the validate method is called as part of this function.

@@ -234,117 +183,234 @@ def schedule_project(project, run_dict):
    setup_logging(enable=False)

    project.validate()
+    try:
+        dirs = run_dict['project']['directories']
+        for k in dirs:
+            dirs[k] = str(project.directories[k])
+    except KeyError:
+        pass
+    if project.git_hash:
+        run_dict['project']['git_hash'] = project.git_hash
+    elif hsh := get_git_hash():
+        run_dict['project']['git_hash'] = hsh
+    if project.db_file:
+        run_dict['project']['db_file'] = str(project.db_file)
+    if sf := project.optimizer_params['seed_file']:
+        run_dict['project']['optimizer_params']['seed_file'] = str(sf)

    schedule_dict = run_dict['schedule']
-    module = importlib.import_module(schedule_dict['__module__'])
+    module = _load_module(schedule_dict['__module__'])
    schedule_class = getattr(module, schedule_dict['__class__'])
    schedule = schedule_class(project)
-    schedule.set_properties(module, schedule_dict, project)
+    schedule.set_properties(vars(module), schedule_dict, project)
    schedule.run_dict = run_dict
    schedule.validate()
    schedule.submit()


-class Args(object):
+def _load_runfile(runfile: typing.Union[typing.Dict, str, bytes, os.PathLike, typing.TextIO]) -> typing.Mapping:
    """
-    arguments of the main function.
-    
-    this class can be used to set up an arguments object for the main 
-    function as an alternative to the __main__ function which parses
-    command line arguments.
-    
-    the constructor initializes the attributes with the same default 
-    values as the command line parser.
+    Load a runfile
+
+    The function loads a runfile from a dictionary, an open json file object, or a json file specified by a file path.
+    If the source is a file, the directory is added to the project directories under the `run` key.
+
+    @param runfile: Dictionary with contents of a runfile, an open file object, or a path-like.
+    @return: Dictionary with the contents of the runfile.
    """

-    def __init__(self):
-        """
-        constructor.
-        
-        the parameters are the same as for the command line interface.
-        project and mode are mandatory.
-        other parameters may be required depending on the project
-        and/or the calculation mode.
-        """
-        self.data_dir = ""
-        self.output_file = ""
-        self.db_file = ""
-        self.time_limit = 24.0
-        self.keep_files = files.FILE_CATEGORIES_TO_KEEP
-        self.keep_best = 10
-        self.keep_levels = 1
-        self.log_level = "WARNING"
-        self.log_file = ""
-        self.log_enable = True
+    def set_run_dir(fileobj):
+        try:
+            p = Path(fileobj.name).parent.resolve(True)
+            rf['project']['directories']['run'] = p
+        except (AttributeError, FileNotFoundError):
+            pass
+
+    if isinstance(runfile, Mapping):
+        rf = runfile
+    elif hasattr(runfile, 'read'):
+        rf = json.load(runfile)
+        set_run_dir(runfile)
+    else:
+        with open(runfile, 'r') as f:
+            rf = json.load(f)
+            set_run_dir(f)
+
+    schema_dir = Path(__file__).parent / "schema"
+    schema_file = schema_dir / "runfile.schema.json"
+    schema_url = f"file://{schema_dir}/"
+    with open(schema_file) as f:
+        schema = json.load(f)
+
+    resolver = jsonschema.RefResolver(schema_url, None)
+    jsonschema.validate(rf, schema, resolver=resolver)
+
+    return rf
+
+
+def _load_module(name_or_path: typing.Union[str, bytes, os.PathLike]):
+    """
+    Load a Python module
+
+    @param name_or_path: Module name or file path of the module.
+        If a module name is given, the module must be in the Python module search path.
+    @return: module
+    @raise ValueError if the module is not found
+    """
+
+    try:
+        return importlib.import_module(name_or_path)
+    except ImportError:
+        p = Path(name_or_path)
+        module_name = p.stem
+        spec = importlib.util.spec_from_file_location(module_name, name_or_path)
+        try:
+            module = importlib.util.module_from_spec(spec)
+        except AttributeError:
+            msg = f"Can't find module {name_or_path}"
+            print(msg, sys.stderr)
+            print("sys.path:", sys.path, sys.stderr)
+            raise ValueError(msg)
+
+        sys.modules[module_name] = module
+        spec.loader.exec_module(module)
+        return module
+
+
+def main_project(symbols: typing.Optional[typing.Dict[str, typing.Any]] = None,
+                 project: typing.Optional[Project] = None,
+                 project_module: typing.Optional[typing.Union[str, os.PathLike]] = None,
+                 project_class: typing.Optional[typing.Union[str, typing.Type[Project]]] = None,
+                 runfile: typing.Union[typing.Dict, str, bytes, os.PathLike, typing.TextIO] = None):
+
+    """
+    Main function with optional arguments.
+
+    This function starts the whole process based on function arguments.
+    The arguments can be a an existing project instance, a project class, and/or a runfile.
+
+    The function carries out the following steps:
+
+    1. Load a runfile - if specified.
+    2. Create a project object.
+    3. Apply the runfile to the project.
+    4. Run or schedule the project.
+
+    The project instance is produced from the first match of the following conditions:
+
+    1. `project` argument is a Project instance.
+    2. `project_class` is a Project class.
+    3. `__class__` entry from runfile.
+        The class must be listed in symbols,
+        or the runfile must also contain a `__module__` entry
+        with the name or file path of the project module that declares the class.
+
+    The project is scheduled rather than executed if the corresponding section in the runfile is present.
+
+    @param symbols: Namespace of the project module, which contains project, cluster and calculator classes.
+        This is the basis for class resolution from runfiles.
+        If called by the project module, it should pass vars().
+    @param project: project instance.
+    @param project_class: project class or name of a project class defined in `symbols`.
+    @param project_module: name or file path of the project module.
+        This is required if symobls is not defined
+        and the project class is given as a string (project_class argument or runfile value).
+    @param runfile: A file-like, path-like or dict with runfile contents.
+        Runfiles must be in json-format.
+    @return: None
+    """
+
+    if runfile is not None:
+        rf = _load_runfile(runfile)
+        rfp = rf['project']
+    else:
+        rf = None
+        rfp = None
+
+    if project is None:
+        if project_class is None or not issubclass(project_class, Project):
+            project_classname = project_class
+            if not project_classname:
+                project_classname = rfp['__class__']
+
+            if not symbols:
+                if project_module:
+                    module = _load_module(project_module)
+                    symbols = vars(module)
+                else:
+                    module = _load_module(rfp['__module__'])
+                    symbols = vars(module)
+
+            project_class = symbols[project_classname]
+
+        project = project_class()
+
+    project.directories['pmsco'] = Path(__file__).parent
+    try:
+        project.directories['project'] = Path(module.__file__).parent
+    except AttributeError:
+        pass
+
+    if rfp:
+        project.set_properties(symbols, rfp, project)
+
+    try:
+        schedule_enabled = rf['schedule']['enabled']
+    except KeyError:
+        schedule_enabled = False
+    if schedule_enabled:
+        schedule_project(project, rf)
+    else:
+        run_project(project)


 def get_cli_parser():
-    KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}
-
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="""
-        multiple-scattering calculations and optimization
+        PSI multiple-scattering calculations and optimization (PMSCO)
+        
+        This is the main command line entry point for PMSCO calculation jobs.
+        Alternative entry points can be provided by project modules.
+        The command line requires at least a run-file to define the project parameters.

-        you must call pmsco.py from a project file which defines the calculation project.
-        the project file must be a regular Python module and define:
+        The command can run a calculation job directly or submit it to a job queue
+        via the `schedule` section in the run-file.
+        The program detects whether it runs in a single-process or OpenMPI multi-process environment
+        and coordinates parallel processes automatically.

-        1) a project class derived from pmsco.project.Project.
-           the class implements/overrides all necessary methods of the calculation project,
-           in particular create_model_space, create_cluster, and create_params.
-
-        2) a global function named create_project.
-           the function accepts a namespace object from the argument parser.
-           it may evaluate extra, project-specific arguments.
-           it does not need to evaluate the common parameters described below.
-           the function must return an instance of the project class described above.
-
-        3) main code that parses the command line and calls pmsco.pmsco.main_pmsco().
-           (see the projects folder for examples).
+        All arguments should preferably be declared in the run-file.
+        A small number of options can be passed on the command line
+        to override the corresponding parameter of the run-file.
+        
+        Please see the documentation that is compiled in docs/html/index.html
+        for instructions how to set up a project module and run-files.
+        See also the projects folder for examples.
        """)
-    # the required argument list may depend on the calculation mode.
-    # for simplicity, the parser does not check these requirements.
-    # all parameters are optional and accepted regardless of mode.
-    # errors may occur if implicit requirements are not met.
-    parser.add_argument('project_module', nargs='?',
-                        help="path to custom module that defines the calculation project")
+
    parser.add_argument('-r', '--run-file',
-                        help="path to run-time parameters file which contains all program arguments. " +
-                        "must be in JSON format.")
-    parser.add_argument('-m', '--mode',
-                        choices=['single', 'grid', 'swarm', 'genetic', 'table'],
-                        help='calculation mode')
-    parser.add_argument('-d', '--data-dir',
-                        help='directory path for experimental data files (if required by project). ' +
-                             'default: working directory')
-    parser.add_argument('-o', '--output-file',
-                        help='base path for intermediate and output files.')
-    parser.add_argument('-b', '--db-file',
-                        help='name of an sqlite3 database file where the results should be stored.')
-    parser.add_argument('-k', '--keep-files', nargs='*',
-                        choices=KEEP_FILES_CHOICES,
-                        help='output file categories to keep after the calculation. '
-                             'by default, cluster and model (simulated data) '
-                             'of a limited number of best models are kept.')
-    parser.add_argument('--keep-best', type=int,
-                        help='number of best models for which to keep result files '
-                             '(at each node from root down to keep-levels).')
-    parser.add_argument('--keep-levels', type=int, choices=range(5),
-                        help='task level down to which result files of best models are kept. '
-                             '0 = model, 1 = scan, 2 = domain, 3 = emitter, 4 = region.')
-    parser.add_argument('-t', '--time-limit', type=float,
-                        help='wall time limit in hours. the optimizers try to finish before the limit.')
-    parser.add_argument('--log-file',
-                        help='name of the main log file. ' +
-                             'under MPI, the rank of the process is inserted before the extension.')
-    parser.add_argument('--log-level',
-                        help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.')
-    feature_parser = parser.add_mutually_exclusive_group(required=False)
-    feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
-                        help="enable logging. by default, logging is on.")
-    feature_parser.add_argument('--log-disable', dest='log_enable', action='store_false',
-                        help="disable logging. by default, logging is on.")
-    parser.set_defaults(log_enable=True)
+                        help="Path to a run-file in JSON format which contains all calculation parameters. "
+                             "This argument is mandatory. "
+                        )
+    parser.add_argument('-m', '--module',
+                        help="File name of the custom project module. "
+                             "The module must declare the project class and other project-specific classes. "
+                             "This optional argument overrides the __module__ entry of the run-file. "
+                        )
+    parser.add_argument('-c', '--project-class',
+                        help="Project class. Requires --module to be specified. "
+                             "The project class is resolved in the namespace of the module. "
+                             "This optional argument corresponds to the __class__ entry of the run-file. "
+                        )
+    parser.add_argument('-o', '--output-dir',
+                        help="Output directory. "
+                             "This optional argument overrides the directories['output'] entry of the run-file."
+                        )
+    parser.add_argument('-j', '--job-name',
+                        help="Job name. Should be short and valid as a part of directory and file names. "
+                             "If a persistent database is used, it must not exist in the database yet. "
+                             "This optional argument overrides the job_name of the run-file."
+                        )

    return parser

@@ -362,129 +428,52 @@ def parse_cli():
    return args, unknown_args


-def import_module(module_name):
+def main(symbols: typing.Optional[typing.Dict[str, typing.Any]] = None):
    """
-    import a custom module by name.
+    Main function with command line parsing

-    import a module given its file path or module name (like in an import statement).
+    This function starts the whole process with parameters from the command line.

-    preferably, the module name should be given as in an import statement.
-    as the top-level pmsco directory is on the python path,
-    the module name will begin with `projects` for a custom project module or `pmsco` for a core pmsco module.
-    in this case, the function just calls importlib.import_module.
+    If the command line contains a run-file parameter, it determines the project class and the project parameters.

-    if a file path is given, i.e., `module_name` links to an existing file and has a `.py` extension,
-    the function extracts the directory path,
-    inserts it into the python path,
-    and calls importlib.import_module on the stem of the file name.
-
-    @note the file path remains in the python path.
-    this option should be used carefully to avoid breaking file name resolution.
-
-    @param module_name: file path or module name.
-        file path is interpreted relative to the working directory.
-
-    @return: the loaded module as a python object
-    """
-    p = Path(module_name)
-    if p.is_file() and p.suffix == ".py":
-        path = p.parent.resolve()
-        module_name = p.stem
-        if path not in sys.path:
-            sys.path.insert(0, path)
-
-    module = importlib.import_module(module_name)
-    return module
-
-
-def main_dict(run_params):
-    """
-    main function with dictionary run-time parameters
-
-    this starts the whole process with all direct parameters.
-    the command line is not parsed.
-    no run-file is loaded (just the project module).
-
-    @param run_params: dictionary with the same structure as the JSON run-file.
+    The project class can be specified either in the run-file, on the command line or the function arguments.
+    If the run-file specifies a class name, that class is instantiated.

    @return: None
    """
-    project_params = run_params['project']

-    module = importlib.import_module(project_params['__module__'])
-    try:
-        project_class = getattr(module, project_params['__class__'])
-    except KeyError:
-        project = module.create_project()
-    else:
-        project = project_class()
-
-    project._module = module
-    project.directories['pmsco'] = Path(__file__).parent
-    project.directories['project'] = Path(module.__file__).parent
-    project.set_properties(module, project_params, project)
-    run_project(project)
-
-
-def main():
-    """
-    main function with command line parsing
-
-    this function starts the whole process with parameters from the command line.
-
-    if the command line contains a run-file parameter, it determines the module to load and the project parameters.
-    otherwise, the command line parameters apply.
-
-    the project class can be specified either in the run-file or the project module.
-    if the run-file specifies a class name, that class is looked up in the project module and instantiated.
-    otherwise, the module's create_project is called.
-
-    @return: None
-    """
    args, unknown_args = parse_cli()

    try:
-        with open(args.run_file, 'r') as f:
-            rf = json.load(f)
+        rf = _load_runfile(args.run_file)
    except AttributeError:
-        rfp = {'__module__': args.project_module}
-    else:
-        rfp = rf['project']
-
-    module = import_module(rfp['__module__'])
-    try:
-        project_args = module.parse_project_args(unknown_args)
-    except AttributeError:
-        project_args = None
+        rf = {'project': {}}

    try:
-        project_class = getattr(module, rfp['__class__'])
-    except (AttributeError, KeyError):
-        project = module.create_project()
-    else:
-        project = project_class()
-        project_args = None
-
-    project._module = module
-    project.directories['pmsco'] = Path(__file__).parent
-    project.directories['project'] = Path(module.__file__).parent
-    project.set_properties(module, rfp, project)
-
-    set_common_args(project, args)
-    try:
-        if project_args:
-            module.set_project_args(project, project_args)
+        if args.module:
+            rf['project']['__module__'] = args.module
    except AttributeError:
        pass

    try:
-        schedule_enabled = rf['schedule']['enabled']
-    except KeyError:
-        schedule_enabled = False
-    if schedule_enabled:
-        schedule_project(project, rf)
-    else:
-        run_project(project)
+        if args.project_class:
+            rf['project']['__class__'] = args.project_class
+    except AttributeError:
+        pass
+
+    try:
+        if args.output_dir:
+            rf['project']['directories']['output'] = args.output_dir
+    except (AttributeError, KeyError):
+        pass
+
+    try:
+        if args.job_name:
+            rf['project']['job_name'] = args.job_name
+    except (AttributeError, KeyError):
+        pass
+
+    main_project(symbols=symbols, runfile=rf)


 if __name__ == '__main__':
--- a/pmsco/project.py
+++ b/pmsco/project.py
--- a/pmsco/projects/common/clusters/crystals.py
+++ b/pmsco/projects/common/clusters/crystals.py
--- a/pmsco/projects/common/empty-hemiscan.etpi
+++ b/pmsco/projects/common/empty-hemiscan.etpi
--- a/pmsco/projects/demo/cu111-single.json
+++ b/pmsco/projects/demo/cu111-single.json
@@ -0,0 +1,46 @@
+{
+  "project": {
+    "__module__": "pmsco.projects.demo.fcc",
+    "__class__": "FCC111Project",
+    "job_name": "cu111-0002",
+    "job_tags": {},
+    "description": "edac phases, ${mode}",
+    "directories": {
+      "output": "${project}/../../work/demo/${job_name}"
+    },
+    "mode": "single",
+    "time_limit": 24,
+    "log_level": "WARNING",
+    "keep_files": [
+      "cluster",
+      "output",
+      "atomic"
+    ],
+    "element": "Cu",
+    "atomic_scattering_factory": "InternalAtomicCalculator",
+    "multiple_scattering_factory": "EdacCalculator",
+    "domains": [
+      {
+        "default": 0.0
+      }
+    ],
+    "scans": [
+      {
+        "__class__": "ScanLoader",
+        "filename": "${project}/demo_holo_scan.etpi",
+        "is_modf": true,
+        "emitter": "Cu",
+        "initial_state": "3s",
+        "patch": {"e": "26."}
+      }
+    ],
+    "model_space": {
+      "rmax": {"start": 5.0},
+      "dlat": {"start": 3.6149},
+      "dl1l2": {"start": "3.6149 / math.sqrt(3.0)"},
+      "phi": {"start": 0.0},
+      "V0": {"start": 10.0},
+      "Zsurf": {"start": 1.0}
+    }
+  }
+}
--- a/pmsco/projects/demo/demo_alpha_scan.etpai
+++ b/pmsco/projects/demo/demo_alpha_scan.etpai
--- a/pmsco/projects/demo/demo_holo_scan.etpi
+++ b/pmsco/projects/demo/demo_holo_scan.etpi
--- a/pmsco/projects/demo/fcc.py
+++ b/pmsco/projects/demo/fcc.py
@@ -4,39 +4,55 @@ scattering calculation project for the (111) surface of an arbitrary face-center

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2015-19 by Paul Scherrer Institut @n
+@copyright (c) 2015-22 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
+import logging
 import math
 import numpy as np
-import os.path
 import periodictable as pt
-import argparse
-import logging

-import pmsco.cluster as mc
-import pmsco.project as mp
+# noinspection PyUnresolvedReferences
+from pmsco.calculators.calculator import InternalAtomicCalculator
+# noinspection PyUnresolvedReferences
+from pmsco.calculators.edac import EdacCalculator
+# noinspection PyUnresolvedReferences
+from pmsco.calculators.phagen.runner import PhagenCalculator
+# noinspection PyUnresolvedReferences
+from pmsco.cluster import Cluster, ClusterGenerator
+# noinspection PyUnresolvedReferences
+import pmsco.elements.bindingenergy
+from pmsco.graphics.scan import render_scan
+from pmsco.project import Project, ModelSpace, CalculatorParams
+# noinspection PyUnresolvedReferences
+from pmsco.scan import ScanKey, ScanLoader, ScanCreator
+# noinspection PyUnresolvedReferences
+from pmsco.dispatch import CalcID
 from pmsco.helpers import BraceMessage as BMsg

 logger = logging.getLogger(__name__)


-class FCC111Project(mp.Project):
+class FCC111Project(Project):
    def __init__(self):
        """
        initialize a project instance
+
+        the element attribute must be set directly after creation, e.g. via run-file.
+
+        unlike previous versions, the current version of this class does not define a scan_dict.
+        the scans should be declared in the run-file using the ScanLoader and ScanCreator classes.
+        the demo_holo_scan.etpi and demo_alpha_scan.etpai files can be used as templates.
        """
        super(FCC111Project, self).__init__()
-        self.scan_dict = {}
        self.element = "Ni"
+        self.scan_dict = {}
+        self.phase_files = {}
+        self.rme_files = {}

    def create_cluster(self, model, index):
        """
@@ -48,7 +64,7 @@ class FCC111Project(mp.Project):
            @arg    model['rmax']     cluster radius
            @arg    model['phi']      azimuthal rotation angle in degrees
        """
-        clu = mc.Cluster()
+        clu = Cluster()
        clu.comment = "{0} {1}".format(self.__class__, index)
        clu.set_rmax(model['rmax'])
        # fcc lattice constant
@@ -91,45 +107,46 @@ class FCC111Project(mp.Project):
        par['V0']  = inner potential
        par['Zsurf'] = position of surface
        """
-        params = mp.CalculatorParams()
+        params = CalculatorParams()

        params.title = "fcc(111)"
        params.comment = "{0} {1}".format(self.__class__, index)
        params.cluster_file = ""
        params.output_file = ""
-        params.initial_state = self.scans[index.scan].initial_state
-        params.spherical_order = 2
+        initial_state = self.scans[index.scan].initial_state
+        params.initial_state = initial_state
+        emitter = self.scans[index.scan].emitter
+        params.binding_energy = pt.elements.symbol(emitter).binding_energy[initial_state]
        params.polarization = "H"
-        params.scattering_level = 5
-        params.fcut = 15.0
-        params.cut = 15.0
-        params.angular_resolution = 0.0
-        params.lattice_constant = 1.0
        params.z_surface = model['Zsurf']
-        params.atom_types = 3
-        params.atomic_number = [pt.elements.symbol(self.element).number]
-        params.phase_file = []
-        params.msq_displacement = [0.00]
-        params.planewave_attenuation = 1.0
        params.inner_potential = model['V0']
        params.work_function = 4.5
-        params.symmetry_range = 360.0
        params.polar_incidence_angle = 60.0
        params.azimuthal_incidence_angle = 0.0
-        params.vibration_model = "P"
-        params.substrate_atomic_mass = pt.elements.symbol(self.element).mass
+        params.angular_resolution = 5.0
        params.experiment_temperature = 300.0
        params.debye_temperature = 400.0
-        params.debye_wavevector = 1.7558
-        params.rme_minus_value = 0.0
+
+        if self.phase_files:
+            state = emitter + initial_state
+            try:
+                params.phase_files = self.phase_files[state]
+            except KeyError:
+                params.phase_files = {}
+                logger.warning("no phase files found for {} - using default calculator".format(state))
+
+        params.rme_files = {}
+        params.rme_minus_value = 0.1
        params.rme_minus_shift = 0.0
        params.rme_plus_value = 1.0
        params.rme_plus_shift = 0.0
-        # used by EDAC only
+
+        # edac_interface only
        params.emitters = []
        params.lmax = 15
        params.dmax = 5.0
        params.orders = [25]
+        # params.phase_output_classes = self.cluster_generator.create_cluster(model, index).get_atom_count()

        return params

@@ -137,7 +154,7 @@ class FCC111Project(mp.Project):
        """
        define the model space of the optimization parameters.
        """
-        dom = mp.ModelSpace()
+        dom = ModelSpace()

        if self.mode == "single":
            dom.add_param('rmax',     5.00,    5.00, 15.00, 2.50)
@@ -169,92 +186,3 @@ class FCC111Project(mp.Project):
            dom.add_param('Zsurf',    1.00,    0.00,  2.00, 0.50)

        return dom
-
-
-def create_project():
-    """
-    create an FCC111Project calculation project.
-    """
-
-    project = FCC111Project()
-
-    project_dir = os.path.dirname(os.path.abspath(__file__))
-    project.data_dir = project_dir
-
-    # scan dictionary
-    # to select any number of scans, add their dictionary keys as scans option on the command line
-    project.scan_dict['default'] = {'filename': os.path.join(project_dir, "demo_holo_scan.etp"),
-                                    'emitter': "Ni", 'initial_state': "3s"}
-    project.scan_dict['holo'] = {'filename': os.path.join(project_dir, "demo_holo_scan.etp"),
-                                 'emitter': "Ni", 'initial_state': "3s"}
-    project.scan_dict['alpha'] = {'filename': os.path.join(project_dir, "demo_alpha_scan.etp"),
-                                  'emitter': "Ni", 'initial_state': "3s"}
-
-    project.add_domain({'default': 0.0})
-
-    return project
-
-
-def set_project_args(project, project_args):
-    """
-    set the project arguments of a MnGeTeProject calculation project.
-
-    @param project: project instance
-
-    @param project_args: (Namespace object) project arguments.
-    """
-
-    scans = ['default']
-    try:
-        if project_args.scans:
-            scans = project_args.scans
-        else:
-            logger.warning(BMsg("missing scan argument, using {0}", scans[0]))
-    except AttributeError:
-        logger.warning(BMsg("missing scan argument, using {0}", scans[0]))
-
-    for scan_key in scans:
-        scan_spec = project.scan_dict[scan_key]
-        project.add_scan(**scan_spec)
-        logger.info(BMsg("add scan {filename} ({emitter} {initial_state})", **scan_spec))
-
-    try:
-        if project_args.element:
-            project.element = project_args.element
-            for scan in project.scans:
-                scan.emitter = project_args.element
-            logger.warning(BMsg("override emitters to {0}", project.emitter))
-    except AttributeError:
-        pass
-
-    try:
-        if project_args.initial_state:
-            for scan in project.scans:
-                scan.initial_state = project_args.initial_state
-            logger.warning(f"override initial states of all scans to {project_args.initial_state}")
-    except AttributeError:
-        pass
-
-    try:
-        if project_args.energy:
-            for scan in project.scans:
-                scan.energies = np.asarray((project_args.energy, ))
-            logger.warning(BMsg("override scan energy, set to {0}", project_args.energy))
-    except AttributeError:
-        pass
-
-
-def parse_project_args(_args):
-    parser = argparse.ArgumentParser()
-
-    # main arguments
-    parser.add_argument('-e', '--element', help="chemical element symbol")
-    parser.add_argument('-s', '--scans', nargs="*", default=['default'],
-                        help="nick names of scans to use in calculation (see create_project function)")
-    parser.add_argument('-i', '--initial-state',
-                        help="inital state of photoelectron")
-    parser.add_argument('--energy', type=float,
-                        help="kinetic energy of photoelectron (override scan file)")
-
-    parsed_args = parser.parse_args(_args)
-    return parsed_args
--- a/pmsco/projects/demo/molecule.json
+++ b/pmsco/projects/demo/molecule.json
@@ -0,0 +1,85 @@
+{
+  "#description": "template runfile for angle scans of molecules (i.e., clusters defined in a molecule file)",
+  "project": {
+    "__module__": "pmsco.projects.demo.molecule",
+    "__class__": "MoleculeProject",
+    "job_name": "molecule0001",
+    "job_tags": [],
+    "description": "",
+    "mode": "single",
+    "directories": {
+      "data": "",
+      "output": ""
+    },
+    "keep_files": [
+      "cluster",
+      "model",
+      "scan",
+      "report",
+      "population"
+    ],
+    "keep_best": 10,
+    "keep_levels": 1,
+    "time_limit": 24,
+    "log_file": "",
+    "log_level": "WARNING",
+    "cluster_generator": {
+      "__class__": "MoleculeFileCluster",
+      "atom_types": {
+        "A": "N",
+        "B": "Ni"
+      },
+      "model_dict": {
+        "dAB": "dNNi",
+        "th": "pNNi",
+        "ph": "aNNi"
+      }
+    },
+    "cluster_file": "TODO",
+    "emitter_file": "TODO",
+    "atomic_scattering_factory": "InternalAtomicCalculator",
+    "multiple_scattering_factory": "EdacCalculator",
+    "model_space": {
+        "zsurf": {
+          "start": 1.5,
+          "min": 0.5,
+          "max": 2.0,
+          "step": 0.25
+        },
+        "Texp": {"start": 300.0},
+        "Tdeb": {"start": 100.0},
+        "V0": {"start": 10.0},
+        "rmax": {"start": 50.0},
+        "ares": {"start": 5.0},
+        "distm": {"start": 5.0},
+        "wdom1": {"start": 1.0},
+        "wdom2": {"start": 1.0}
+    },
+    "domains": [
+      {"xrot": 0.0, "yrot": 0.0, "zrot": 0.0},
+      {"xrot": 0.0, "yrot": 0.0, "zrot": 120.0},
+      {"xrot": 0.0, "yrot": 0.0, "zrot": 240.0}
+    ],
+    "scans": [
+      {
+        "__class__": "HoloScanCreator",
+        "filename": "${project}/molecule.etpi",
+        "emitter": "N",
+        "initial_state": "1s",
+        "generator": "pmsco.data.holo_grid",
+        "generator_args": {
+          "theta_start": 90,
+          "theta_step": 1
+        },
+        "other_positions": {"e": 250, "a": 0}
+      }
+    ],
+    "optimizer_params": {
+      "pop_size": 0,
+      "seed_file": "",
+      "seed_limit": 0,
+      "recalc_seed": true,
+      "table_file": ""
+    }
+  }
+}
--- a/pmsco/projects/demo/molecule.py
+++ b/pmsco/projects/demo/molecule.py
@@ -1,11 +1,33 @@
 """
@package pmsco.projects.demo.molecule
-scattering calculation project for single molecules
+scattering calculation project for single molecules or coordinate files from other programs

-the atomic positions are read from a molecule file.
-cluster file, emitter (by chemical symbol), initial state and kinetic energy are specified on the command line.
+the atomic positions are read from a molecule (.xyz) file.
+emitters are selected by chemical element symbol or by an additional molecule file (emitter file)
+that contains only those atoms of the cluster file which are inequivalent emitters.
+
+cluster, emitters, initial state and kinetic energy are specified on the command line.
 there are no structural parameters.

+example 1: molecule from XYZ file
+---------------------------------
+
+the cluster file contains all atomic positions necessary for calculating the diffraction pattern.
+emitters are selected by chemical element symbol.
+the cluster is not trimmed.
+normal emission is along the z-axis.
+
+
+example 2: periodic structure from external program (e.g. Vesta)
+----------------------------------------------------------------
+
+the cluster file contains the unit cells spanned by at least 3 unit vectors in the surface plane.
+the emitter file contains only one unit cell as a sub-set of the cluster file.
+emitters can be narrowed down further by chemical element symbol.
+an rmax parameter can be specified to trim the cluster.
+normal emission is along the z-axis.
+
+
@author Matthias Muntwiler, matthias.muntwiler@psi.ch

@copyright (c) 2015-20 by Paul Scherrer Institut @n
@@ -17,10 +39,8 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n

 import math
 import numpy as np
-import os.path
 from pathlib import Path
 import periodictable as pt
-import argparse
 import logging

 # noinspection PyUnresolvedReferences
@@ -30,11 +50,11 @@ from pmsco.calculators.edac import EdacCalculator
 # noinspection PyUnresolvedReferences
 from pmsco.calculators.phagen.runner import PhagenCalculator
 import pmsco.cluster as cluster
-from pmsco.data import calc_modfunc_loess
+import pmsco.data
 # noinspection PyUnresolvedReferences
 import pmsco.elements.bindingenergy
 from pmsco.helpers import BraceMessage as BMsg
-import pmsco.project as project
+import pmsco.project

 logger = logging.getLogger(__name__)

@@ -48,10 +68,11 @@ class MoleculeFileCluster(cluster.ClusterGenerator):
    def __init__(self, project):
        super(MoleculeFileCluster, self).__init__(project)
        self.base_cluster = None
+        self.emitter_cluster = None

-    def load_base_cluster(self):
+    def get_base_cluster(self):
        """
-        load and cache the project-defined coordinate file.
+        return the project-defined atom coordinates, load from file if necessary.

        the file path is set in self.project.cluster_file.
        the file must be in XYZ (.xyz) or PMSCO cluster (.clu) format (cf. pmsco.cluster module).
@@ -60,7 +81,7 @@ class MoleculeFileCluster(cluster.ClusterGenerator):
        """
        if self.base_cluster is None:
            clu = cluster.Cluster()
-            clu.set_rmax(120.0)
+            clu.set_rmax(np.inf)
            p = Path(self.project.cluster_file)
            ext = p.suffix
            if ext == ".xyz":
@@ -74,6 +95,36 @@ class MoleculeFileCluster(cluster.ClusterGenerator):

        return self.base_cluster

+    def get_emitter_cluster(self):
+        """
+        return the project-defined emitter coordinates, load from file if necessary.
+
+        the file path is set in self.project.emitter_file.
+        if the file path is None, the method loads the base cluster self.project.cluster_file.
+        the file must be in XYZ (.xyz) or PMSCO cluster (.clu) format (cf. pmsco.cluster module).
+
+        @return: Cluster object (also referenced by self.emitter_cluster).
+            None if no emitter file was specified.
+        """
+        if self.emitter_cluster is None:
+            clu = cluster.Cluster()
+            clu.set_rmax(np.inf)
+            try:
+                p = Path(self.project.emitter_file)
+            except TypeError:
+                p = Path(self.project.cluster_file)
+            ext = p.suffix
+            if ext == ".xyz":
+                fmt = cluster.FMT_XYZ
+            elif ext == ".clu":
+                fmt = cluster.FMT_PMSCO
+            else:
+                raise ValueError(f"unknown cluster file extension {ext}")
+            clu.load_from_file(self.project.emitter_file, fmt=fmt)
+            self.emitter_cluster = clu
+
+        return self.emitter_cluster
+
    def count_emitters(self, model, index):
        """
        count the number of emitter configurations.
@@ -86,8 +137,14 @@ class MoleculeFileCluster(cluster.ClusterGenerator):
            or the number of emitters in the specified configuration (>= 0).
        @return: number of emitter configurations.
        """
-        clu = self.create_cluster(model, index)
-        return clu.get_emitter_count()
+        if index.emit == -1:
+            clu = self.get_emitter_cluster()
+            sel_emit = clu.data['s'] == self.project.scans[index.scan].emitter
+            return np.sum(sel_emit)
+        elif index.emit >= 0:
+            return 1
+        else:
+            raise ValueError(f"invalid emitter index {index.emit}")

    def create_cluster(self, model, index):
        """
@@ -107,45 +164,55 @@ class MoleculeFileCluster(cluster.ClusterGenerator):

        @return pmsco.cluster.Cluster object
        """
-        self.load_base_cluster()
        clu = cluster.Cluster()
-        clu.copy_from(self.base_cluster)
+        clu.copy_from(self.get_base_cluster())
        clu.comment = f"{self.__class__}, {index}"
        dom = self.project.domains[index.domain]

-        # trim
-        clu.set_rmax(model['rmax'])
-        clu.trim_sphere(clu.rmax)
-
        # emitter selection
-        idx_emit = np.where(clu.data['s'] == self.project.scans[index.scan].emitter)
+        ems = cluster.Cluster()
+        ems.copy_from(self.get_emitter_cluster())
+        ems.set_rmax(model['rmax'] + 0.1)
+        ems.trim_cylinder(clu.rmax, clu.rmax)
+
+        idx_emit = np.where(ems.data['s'] == self.project.scans[index.scan].emitter)
        assert isinstance(idx_emit, tuple)
        idx_emit = idx_emit[0]
        if index.emit >= 0:
            idx_emit = idx_emit[index.emit]
-        clu.data['e'][idx_emit] = 1
+            origin = ems.get_position(idx_emit)
+            clu.translate(-origin)
+            clu.data['e'] = 0
+            clu.set_emitter(pos=np.array((0.0, 0.0, 0.0)))
+        else:
+            for idx in idx_emit:
+                clu.set_emitter(pos=ems.get_position(idx))

        # rotation
        if 'xrot' in model:
-            clu.rotate_z(model['xrot'])
+            clu.rotate_x(model['xrot'])
        elif 'xrot' in dom:
-            clu.rotate_z(dom['xrot'])
+            clu.rotate_x(dom['xrot'])
        if 'yrot' in model:
-            clu.rotate_z(model['yrot'])
+            clu.rotate_y(model['yrot'])
        elif 'yrot' in dom:
-            clu.rotate_z(dom['yrot'])
+            clu.rotate_y(dom['yrot'])
        if 'zrot' in model:
            clu.rotate_z(model['zrot'])
        elif 'zrot' in dom:
            clu.rotate_z(dom['zrot'])

+        # trim
+        clu.set_rmax(model['rmax'] + 0.1)
+        clu.trim_paraboloid(clu.rmax, -clu.rmax)
+
        logger.info(f"cluster for calculation {index}: "
                    f"{clu.get_atom_count()} atoms, {clu.get_emitter_count()} emitters")

        return clu


-class MoleculeProject(project.Project):
+class MoleculeProject(pmsco.project.Project):
    """
    general molecule project.

@@ -176,15 +243,29 @@ class MoleculeProject(project.Project):
        initialize a project instance
        """
        super(MoleculeProject, self).__init__()
-        self.model_space = project.ModelSpace()
-        self.scan_dict = {}
+        self.model_space = pmsco.project.ModelSpace()
        self.cluster_file = "demo-cluster.xyz"
+        self.emitter_file = None
        self.cluster_generator = MoleculeFileCluster(self)
        self.atomic_scattering_factory = PhagenCalculator
        self.multiple_scattering_factory = EdacCalculator
        self.phase_files = {}
        self.rme_files = {}
-        self.modf_smth_ei = 0.5
+
+    def validate(self):
+        """
+        Validate project parameters
+
+        Resolve paths of cluster and emitter files after calling the inherited method.
+
+        @return: None
+        """
+
+        super().validate()
+        self.cluster_file = self.directories.resolve_path(self.cluster_file)
+        self.emitter_file = self.directories.resolve_path(self.emitter_file)
+        logger.warning(f"cluster_file: {self.cluster_file}")
+        logger.warning(f"emitter_file: {self.emitter_file}")

    def create_params(self, model, index):
        """
@@ -196,7 +277,7 @@ class MoleculeProject(project.Project):
        @param index (named tuple CalcID) calculation index.
            this method formats the index into the comment line.
        """
-        params = project.CalculatorParams()
+        params = pmsco.project.CalculatorParams()

        params.title = "molecule demo"
        params.comment = f"{self.__class__} {index}"
@@ -215,8 +296,21 @@ class MoleculeProject(project.Project):
        params.angular_resolution = model['ares']
        params.experiment_temperature = model['Texp']
        params.debye_temperature = model['Tdeb']
-        params.phase_files = self.phase_files
-        params.rme_files = self.rme_files
+
+        if self.phase_files:
+            state = emitter + initial_state
+            try:
+                params.phase_files = self.phase_files[state]
+            except KeyError:
+                params.phase_files = {}
+                logger.warning("no phase files found for {} - using default calculator".format(state))
+
+        params.rme_files = {}
+        params.rme_minus_value = 0.1
+        params.rme_minus_shift = 0.0
+        params.rme_plus_value = 1.0
+        params.rme_plus_shift = 0.0
+
        # edac_interface only
        params.emitters = []
        params.lmax = 15
@@ -233,152 +327,3 @@ class MoleculeProject(project.Project):
        """

        return self.model_space
-
-    # noinspection PyUnusedLocal
-    def calc_modulation(self, data, model):
-        """
-        calculate the modulation function with project-specific smoothing factor
-
-        see @ref pmsco.pmsco.project.calc_modulation.
-
-        @param data: (numpy.ndarray) experimental data in ETPI, or ETPAI format.
-
-        @param model: (dict) model parameters of the calculation task. not used.
-
-        @return copy of the data array with the modulation function in the 'i' column.
-        """
-        return calc_modfunc_loess(data, smth=self.modf_smth_ei)
-
-
-def create_model_space(mode):
-    """
-    define the model space.
-    """
-    dom = project.ModelSpace()
-
-    if mode == "single":
-        dom.add_param('zsurf',   1.20)
-        dom.add_param('Texp',  300.00)
-        dom.add_param('Tdeb',  100.00)
-        dom.add_param('V0',     10.00)
-        dom.add_param('rmax',   50.00)
-        dom.add_param('ares',    5.00)
-        dom.add_param('distm',   5.00)
-        dom.add_param('wdom1', 1.0)
-        dom.add_param('wdom2', 1.0)
-        dom.add_param('wdom3', 1.0)
-        dom.add_param('wdom4', 1.0)
-        dom.add_param('wdom5', 1.0)
-    else:
-        raise ValueError(f"undefined model space for {mode} optimization")
-
-    return dom
-
-
-def create_project():
-    """
-    create the project instance.
-    """
-
-    proj = MoleculeProject()
-    proj_dir = os.path.dirname(os.path.abspath(__file__))
-    proj.project_dir = proj_dir
-
-    # scan dictionary
-    # to select any number of scans, add their dictionary keys as scans option on the command line
-    proj.scan_dict['empty'] = {'filename': os.path.join(proj_dir, "../common/empty-hemiscan.etpi"),
-                               'emitter': "N", 'initial_state': "1s"}
-
-    proj.mode = 'single'
-    proj.model_space = create_model_space(proj.mode)
-    proj.job_name = 'molecule0000'
-    proj.description = 'molecule demo'
-
-    return proj
-
-
-def set_project_args(project, project_args):
-    """
-    set the project arguments.
-
-    @param project: project instance
-
-    @param project_args: (Namespace object) project arguments.
-    """
-
-    scans = []
-    try:
-        if project_args.scans:
-            scans = project_args.scans
-        else:
-            logger.error("missing scan argument")
-            exit(1)
-    except AttributeError:
-        logger.error("missing scan argument")
-        exit(1)
-
-    for scan_key in scans:
-        scan_spec = project.scan_dict[scan_key]
-        project.add_scan(**scan_spec)
-
-    try:
-        project.cluster_file = os.path.abspath(project_args.cluster_file)
-        project.cluster_generator = MoleculeFileCluster(project)
-    except (AttributeError, TypeError):
-        logger.error("missing cluster-file argument")
-        exit(1)
-
-    try:
-        if project_args.emitter:
-            for scan in project.scans:
-                scan.emitter = project_args.emitter
-            logger.warning(f"override emitters of all scans to {project_args.emitter}")
-    except AttributeError:
-        pass
-
-    try:
-        if project_args.initial_state:
-            for scan in project.scans:
-                scan.initial_state = project_args.initial_state
-            logger.warning(f"override initial states of all scans to {project_args.initial_state}")
-    except AttributeError:
-        pass
-
-    try:
-        if project_args.energy:
-            for scan in project.scans:
-                scan.energies = np.asarray((project_args.energy, ))
-            logger.warning(f"override scan energy of all scans to {project_args.energy}")
-    except AttributeError:
-        pass
-
-    try:
-        if project_args.symmetry:
-            for angle in np.linspace(0, 360, num=project_args.symmetry, endpoint=False):
-                project.add_domain({'xrot': 0., 'yrot': 0., 'zrot': angle})
-                logger.warning(f"override rotation symmetry to {project_args.symmetry}")
-    except AttributeError:
-        pass
-
-
-def parse_project_args(_args):
-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    # main arguments
-    parser.add_argument('--scans', nargs="*",
-                        help="nick names of scans to use in calculation (see create_project function)")
-    parser.add_argument('--cluster-file',
-                        help="path name of molecule file (xyz format).")
-
-    # conditional arguments
-    parser.add_argument('--emitter',
-                        help="emitter: chemical symbol")
-    parser.add_argument('--initial-state',
-                        help="initial state term: e.g. 2p1/2")
-    parser.add_argument('--energy', type=float,
-                        help="kinetic energy (eV)")
-    parser.add_argument('--symmetry', type=int, default=1,
-                        help="n-fold rotational symmetry")
-
-    parsed_args = parser.parse_args(_args)
-    return parsed_args
--- a/pmsco/projects/twoatom/twoatom-energy.json
+++ b/pmsco/projects/twoatom/twoatom-energy.json
@@ -1,15 +1,24 @@
 {
-  // line comments using // or # prefix are allowed as an extension of JSON syntax
+  "schedule": {
+    "__module__": "pmsco.schedule",
+    "__class__": "PsiRaSchedule",
+    "tasks": 4,
+    "nodes": 1,
+    "wall_time": "1:00",
+    "manual": true,
+    "enabled": true,
+    "overwrite_job_dir": true
+  },
  "project": {
-    "__module__": "projects.twoatom.twoatom",
+    "__module__": "pmsco.projects.twoatom.twoatom",
    "__class__": "TwoatomProject",
    "job_name": "twoatom0002",
-    "job_tags": [],
+    "job_tags": {},
    "description": "",
    "mode": "single",
    "directories": {
      "data": "",
-      "output": ""
+      "output": "${work}/${job_name}"
    },
    "keep_files": [
      "cluster",
@@ -70,7 +79,7 @@
    ],
    "scans": [
      {
-        "__class__": "mp.ScanCreator",
+        "__class__": "ScanCreator",
        "filename": "twoatom_energy_alpha.etpai",
        "emitter": "N",
        "initial_state": "1s",
--- a/pmsco/projects/twoatom/twoatom-hemi.json
+++ b/pmsco/projects/twoatom/twoatom-hemi.json
@@ -1,17 +1,28 @@
 {
-  // line comments using // or # prefix are allowed as an extension of JSON syntax
+  "#comment": "keys starting with a non-alphabetic character are treated as a comment",
+  "schedule": {
+    "__module__": "pmsco.schedule",
+    "__class__": "PsiRaSchedule",
+    "tasks": 1,
+    "nodes": 1,
+    "wall_time": "1:00",
+    "manual": true,
+    "enabled": true,
+    "overwrite_job_dir": true
+  },
  "project": {
-    "__module__": "projects.twoatom.twoatom",
+    "__module__": "pmsco.projects.twoatom.twoatom",
    "__class__": "TwoatomProject",
    "job_name": "twoatom0001",
-    "job_tags": [],
+    "job_tags": {},
    "description": "",
    "mode": "single",
    "directories": {
      "data": "",
-      "output": ""
+      "output": "${work}/${job_name}"
    },
    "keep_files": [
+      "all",
      "cluster",
      "model",
      "scan",
@@ -70,13 +81,23 @@
    ],
    "scans": [
      {
-        // class name as it would be used in the project module
-        "__class__": "mp.ScanLoader",
-        // any placeholder key from project.directories can be used
-        "filename": "{project}/twoatom_hemi_250e.etpi",
+        "__class__": "HoloScanCreator",
+        "filename": "${project}/twoatom_demo.etpi",
        "emitter": "N",
        "initial_state": "1s",
-        "is_modf": false
+        "generator": "pmsco.data.holo_grid",
+        "generator_args": {
+          "theta_start": 90,
+          "theta_step": 1,
+          "theta_range": 90,
+          "phi_start": 0,
+          "phi_range": 360,
+          "phi_refinement": 1
+        },
+        "other_positions": {"e": 250, "a": 0},
+        "modulation_func": "pmsco.data.calc_modfunc_loess",
+        "modulation_args": {"smth": 0.5},
+        "rfactor_func": "pmsco.data.square_diff_rfactor"
      }
    ],
    "optimizer_params": {
--- a/pmsco/projects/twoatom/twoatom.py
+++ b/pmsco/projects/twoatom/twoatom.py
@@ -1,33 +1,32 @@
 """
@package projects.twoatom
 Two-atom demo scattering calculation project
-
-this file is specific to the project and the state of the data analysis,
-as it contains particular parameter values.
 """

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
 import logging
 import math
 import numpy as np
-import os.path
 import periodictable as pt

+# noinspection PyUnresolvedReferences
+from pmsco.pmsco import main
 from pmsco.calculators.calculator import InternalAtomicCalculator
 from pmsco.calculators.edac import EdacCalculator
 from pmsco.calculators.phagen.runner import PhagenCalculator
-import pmsco.cluster as mc
-import pmsco.project as mp
+from pmsco.cluster import Cluster, ClusterGenerator
+from pmsco.project import CalculatorParams, ModelSpace, Project
 from pmsco.helpers import BraceMessage as BMsg

+# the following imports are used in run files
+# noinspection PyUnresolvedReferences
+import pmsco.data
+# noinspection PyUnresolvedReferences
+from pmsco.scan import ScanKey, ScanLoader, ScanCreator, HoloScanCreator
+
 logger = logging.getLogger(__name__)


-class TwoatomCluster(mc.ClusterGenerator):
+class TwoatomCluster(ClusterGenerator):
    """
    cluster of two atoms.

@@ -119,7 +118,7 @@ class TwoatomCluster(mc.ClusterGenerator):
        dy = r * math.sin(th) * math.sin(ph)
        dz = r * math.cos(th)

-        clu = mc.Cluster()
+        clu = Cluster()
        clu.comment = "{0} {1}".format(self.__class__, index)
        clu.set_rmax(r * 2.0)

@@ -132,7 +131,7 @@ class TwoatomCluster(mc.ClusterGenerator):
        return clu


-class TwoatomProject(mp.Project):
+class TwoatomProject(Project):
    """
    two-atom calculation project class.

@@ -146,9 +145,9 @@ class TwoatomProject(mp.Project):
    @arg @c model['V0']      : inner potential
    @arg @c model['Zsurf']   : position of surface
    """
+
    def __init__(self):
        super(TwoatomProject, self).__init__()
-        self.scan_dict = {}
        self.cluster_generator = TwoatomCluster(self)
        self.cluster_generator.set_atom_type('A', 'N')
        self.cluster_generator.set_atom_type('B', 'Ni')
@@ -173,13 +172,12 @@ class TwoatomProject(mp.Project):

        @param model: (dict) optimizable parameters
        """
-        params = mp.CalculatorParams()
+        params = CalculatorParams()

        params.title = "two-atom demo"
        params.comment = "{0} {1}".format(self.__class__, index)
        params.cluster_file = ""
        params.output_file = ""
-        params.initial_state = self.scans[index.scan].initial_state
        initial_state = self.scans[index.scan].initial_state
        params.initial_state = initial_state
        emitter = self.scans[index.scan].emitter
@@ -219,7 +217,7 @@ class TwoatomProject(mp.Project):
        """
        define the domain of the optimization parameters.
        """
-        dom = mp.ModelSpace()
+        dom = ModelSpace()

        if self.mode == "single":
            dom.add_param('dNNi',     2.109,  2.000,  2.250, 0.050)
@@ -264,80 +262,3 @@ def example_intensity(e, t, p, a):
        np.cos(np.radians(p)) ** 2 * \
        np.sin(e / 1000. * np.pi * 0.1 / np.sqrt(e)) ** 2
    return i
-
-
-def create_project():
-    """
-    create a new TwoatomProject calculation project.
-
-    the default experimental data file is @c twoatom_hemi_scan_250e.etpi
-    in the same directory as this Python module.
-    it defines a classic hemispherical angle scan grid
-    but does not include measured data for optimization.
-
-    @return project instance.
-    """
-
-    project = TwoatomProject()
-
-    project_dir = os.path.dirname(os.path.abspath(__file__))
-    project.data_dir = project_dir
-
-    # scan dictionary
-    # to select any number of scans, add their dictionary keys as scans option on the command line
-    project.scan_dict['ea'] = {'filename': os.path.join(project_dir, "twoatom_energy_alpha.etpai"),
-                               'emitter': "N", 'initial_state': "1s"}
-    project.scan_dict['et0p'] = {'filename': os.path.join(project_dir, "twoatom_energy_theta_0p.etpi"),
-                                 'emitter': "N", 'initial_state': "1s"}
-    project.scan_dict['et180p'] = {'filename': os.path.join(project_dir, "twoatom_energy_theta_180p.etpi"),
-                                   'emitter': "N", 'initial_state': "1s"}
-    project.scan_dict['tp215e'] = {'filename': os.path.join(project_dir, "twoatom_hemi_215e.etpi"),
-                                   'emitter': "N", 'initial_state': "1s"}
-    project.scan_dict['tp250e'] = {'filename': os.path.join(project_dir, "twoatom_hemi_250e.etpi"),
-                                   'emitter': "N", 'initial_state': "1s"}
-
-    return project
-
-
-def set_project_args(project, project_args):
-    """
-    set the project-specific arguments.
-
-    @param project: project instance
-
-    @param project_args: (Namespace object) project arguments.
-    """
-
-    scans = []
-    try:
-        if project_args.scans:
-            scans = project_args.scans
-    except AttributeError:
-        pass
-
-    for scan_key in scans:
-        scan_spec = project.scan_dict[scan_key]
-        project.add_scan(**scan_spec)
-        logger.info(BMsg("add scan {filename} ({emitter} {initial_state})", **scan_spec))
-
-    project.add_domain({'default': 0.0})
-
-
-def parse_project_args(_args):
-    """
-    parse project-specific command line arguments.
-
-    @param _args: list of project-specific arguments from the command line.
-        this is typically the unknown_args return value from argparse.ArgumentParser.parse_known_args().
-
-    @return: namespace object containing the specified arguments as attributes.
-    """
-    parser = argparse.ArgumentParser()
-
-    # main arguments
-    parser.add_argument('-s', '--scans', nargs="*",
-                        help="nick names of scans to use in calculation (see create_project function)")
-
-    parsed_args = parser.parse_args(_args)
-
-    return parsed_args
--- a/pmsco/projects/twoatom/twoatom_energy_alpha.etpai
+++ b/pmsco/projects/twoatom/twoatom_energy_alpha.etpai
--- a/pmsco/projects/twoatom/twoatom_energy_theta_0p.etpi
+++ b/pmsco/projects/twoatom/twoatom_energy_theta_0p.etpi
--- a/pmsco/projects/twoatom/twoatom_energy_theta_180p.etpi
+++ b/pmsco/projects/twoatom/twoatom_energy_theta_180p.etpi
--- a/pmsco/projects/twoatom/twoatom_hemi_215e.etpi
+++ b/pmsco/projects/twoatom/twoatom_hemi_215e.etpi
--- a/pmsco/projects/twoatom/twoatom_hemi_250e.etpi
+++ b/pmsco/projects/twoatom/twoatom_hemi_250e.etpi
--- a/Show More
+++ b/Show More