public distro 2.1.0

2019-07-19 12:54:54 +02:00 · 2019-07-19 12:54:54 +02:00 · fbd2d4fa8c
commit fbd2d4fa8c
parent acea809e4e
40 changed files with 2813 additions and 345 deletions
--- a/bin/pmsco.ra-git.template
+++ b/bin/pmsco.ra-git.template
@ -0,0 +1,136 @@
+#!/bin/bash
+#
+# Slurm script template for PMSCO calculations on the Ra cluster
+# based on run_mpi_HPL_nodes-2.sl by V. Markushin 2016-03-01
+#
+# this version checks out the source code from a git repository
+# to a temporary location and compiles the code.
+# this is to minimize conflicts between different jobs
+# but requires that each job has its own git commit.
+#
+# Use:
+# - enter the appropriate parameters and save as a new file.
+# - call the sbatch command to pass the job script.
+#   request a specific number of nodes and tasks.
+#   example:
+#   sbatch --nodes=2  --ntasks-per-node=24 --time=02:00:00 run_pmsco.sl
+# the qpmsco script does all this for you.
+#
+# PMSCO arguments
+# copy this template to a new file, and set the arguments
+#
+# PMSCO_WORK_DIR
+#   path to be used as working directory.
+#   contains the script derived from this template
+#   and a copy of the pmsco code in the 'pmsco' directory.
+#   receives output and temporary files.
+#
+# PMSCO_PROJECT_FILE
+#   python module that declares the project and starts the calculation.
+#   must include the file path relative to $PMSCO_WORK_DIR.
+#
+# PMSCO_OUT
+#   name of output file. should not include a path.
+#
+# all paths are relative to $PMSCO_WORK_DIR or (better) absolute.
+#
+#
+# Further arguments
+#
+# PMSCO_JOBNAME (required)
+#   the job name is the base name for output files.
+#
+# PMSCO_WALLTIME_HR (integer, required)
+#   wall time limit in hours. must be integer, minimum 1.
+#   this value is passed to PMSCO.
+#   it should specify the same amount of wall time as requested from the scheduler.
+#
+# PMSCO_PROJECT_ARGS (optional)
+#   extra arguments that are parsed by the project module.
+#
+#SBATCH --job-name="_PMSCO_JOBNAME"
+#SBATCH --output="_PMSCO_JOBNAME.o.%j"
+#SBATCH --error="_PMSCO_JOBNAME.e.%j"
+
+PMSCO_WORK_DIR="_PMSCO_WORK_DIR"
+PMSCO_JOBNAME="_PMSCO_JOBNAME"
+PMSCO_WALLTIME_HR=_PMSCO_WALLTIME_HR
+
+PMSCO_PROJECT_FILE="_PMSCO_PROJECT_FILE"
+PMSCO_OUT="_PMSCO_JOBNAME"
+PMSCO_PROJECT_ARGS="_PMSCO_PROJECT_ARGS"
+
+module load psi-python36/4.4.0
+module load gcc/4.8.5
+module load openmpi/3.1.3
+source activate pmsco3
+
+echo '================================================================================'
+echo "=== Running $0 at the following time and place:"
+date
+/bin/hostname
+cd $PMSCO_WORK_DIR
+pwd
+ls -lA
+#the intel compiler is currently not compatible with mpi4py. -mm 170131
+#echo
+#echo '================================================================================'
+#echo "=== Setting the environment to use Intel Cluster Studio XE 2016 Update 2 intel/16.2:"
+#cmd="source /opt/psi/Programming/intel/16.2/bin/compilervars.sh intel64"
+#echo $cmd
+#$cmd
+echo
+echo '================================================================================'
+echo "=== The environment is set as following:"
+env
+echo
+echo '================================================================================'
+echo "BEGIN test"
+which mpirun
+cmd="mpirun /bin/hostname"
+echo $cmd
+$cmd
+echo "END test"
+echo
+echo '================================================================================'
+echo "BEGIN mpirun pmsco"
+echo
+
+cd "$PMSCO_WORK_DIR"
+cd pmsco
+echo "code revision"
+git log --pretty=tformat:'%h %ai %d' -1
+make -C pmsco all
+python -m compileall pmsco
+python -m compileall projects
+echo
+
+cd "$PMSCO_WORK_DIR"
+PMSCO_CMD="python pmsco/pmsco $PMSCO_PROJECT_FILE"
+PMSCO_ARGS="$PMSCO_PROJECT_ARGS"
+if [ -n "$PMSCO_SCAN_FILES" ]; then
+    PMSCO_ARGS="-s $PMSCO_SCAN_FILES $PMSCO_ARGS"
+fi
+if [ -n "$PMSCO_OUT" ]; then
+    PMSCO_ARGS="-o $PMSCO_OUT $PMSCO_ARGS"
+fi
+if [ "$PMSCO_WALLTIME_HR" -ge 1 ]; then
+    PMSCO_ARGS="-t $PMSCO_WALLTIME_HR $PMSCO_ARGS"
+fi
+if [ -n "$PMSCO_LOGLEVEL" ]; then
+    PMSCO_ARGS="--log-level $PMSCO_LOGLEVEL --log-file $PMSCO_JOBNAME.log $PMSCO_ARGS"
+fi
+
+# Do no use the OpenMPI specific options, like "-x LD_LIBRARY_PATH", with the Intel mpirun.
+cmd="mpirun $PMSCO_CMD $PMSCO_ARGS"
+echo $cmd
+$cmd
+echo "END mpirun pmsco"
+echo '================================================================================'
+cd "$PMSCO_WORK_DIR"
+rm -rf pmsco
+date
+ls -lAtr
+echo '================================================================================'
+
+exit 0
--- a/bin/pmsco.ra.template
+++ b/bin/pmsco.ra.template
@ -75,10 +75,10 @@ PMSCO_OUT="_PMSCO_JOBNAME"
 PMSCO_LOGLEVEL="_PMSCO_LOGLEVEL"
 PMSCO_PROJECT_ARGS="_PMSCO_PROJECT_ARGS"

-module load psi-python27/2.4.1
+module load psi-python36/4.4.0
 module load gcc/4.8.5
-module load openmpi/1.10.2
-source activate pmsco
+module load openmpi/3.1.3
+source activate pmsco3

 echo '================================================================================'
 echo "=== Running $0 at the following time and place:"
--- a/bin/qpmsco.ra-git.sh
+++ b/bin/qpmsco.ra-git.sh
@ -0,0 +1,145 @@
+#!/bin/sh
+#
+# submission script for PMSCO calculations on the Ra cluster
+#
+# this version clones the current git repository at HEAD to the work directory.
+# thus, version conflicts between jobs are avoided.
+#
+
+if [ $# -lt 1 ]; then
+  echo "Usage: $0 [NOSUB] GIT_TAG DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT [ARGS [ARGS [...]]]"
+  echo ""
+  echo "       NOSUB (optional): do not submit the script to the queue. default: submit."
+  echo "       GIT_TAG: git tag or branch name of the code. HEAD for current code."
+  echo "       DESTDIR: destination directory. must exist. a sub-dir \$JOBNAME is created."
+  echo "       JOBNAME (text): name of job. use only alphanumeric characters, no spaces."
+  echo "       NODES (integer): number of computing nodes. (1 node = 24 or 32 processors)."
+  echo "          do not specify more than 2."
+  echo "       TASKS_PER_NODE (integer): 1...24, or 32."
+  echo "          24 or 32 for full-node allocation."
+  echo "          1...23 for shared node allocation."
+  echo "       WALLTIME:HOURS (integer): requested wall time."
+  echo "          1...24 for day partition"
+  echo "          24...192 for week partition"
+  echo "          1...192 for shared partition"
+  echo "       PROJECT: python module (file path) that declares the project and starts the calculation."
+  echo "       ARGS (optional): any number of further PMSCO or project arguments (except time)."
+  echo ""
+  echo "the job script is written to \$DESTDIR/\$JOBNAME which is also the destination of calculation output."
+  exit 1
+fi
+
+# location of the pmsco package is derived from the path of this script
+SCRIPTDIR="$(dirname $(readlink -f $0))"
+SOURCEDIR="$(readlink -f $SCRIPTDIR/..)"
+PMSCO_SOURCE_DIR="$SOURCEDIR"
+
+# read arguments
+if [ "$1" == "NOSUB" ]; then
+  NOSUB="true"
+  shift
+else
+  NOSUB="false"
+fi
+
+if [ "$1" == "HEAD" ]; then
+    BRANCH_ARG=""
+else
+    BRANCH_ARG="-b $1"
+fi
+shift
+
+DEST_DIR="$1"
+shift
+
+PMSCO_JOBNAME=$1
+shift
+
+PMSCO_NODES=$1
+PMSCO_TASKS_PER_NODE=$2
+PMSCO_TASKS=$(expr $PMSCO_NODES \* $PMSCO_TASKS_PER_NODE)
+shift 2
+
+PMSCO_WALLTIME_HR=$1
+PMSCO_WALLTIME_MIN=$(expr $PMSCO_WALLTIME_HR \* 60)
+shift
+
+# select partition
+if [ $PMSCO_WALLTIME_HR -ge 25 ]; then
+    PMSCO_PARTITION="week"
+else
+    PMSCO_PARTITION="day"
+fi
+if [ $PMSCO_TASKS_PER_NODE -lt 24 ]; then
+    PMSCO_PARTITION="shared"
+fi
+
+PMSCO_PROJECT_FILE="$(readlink -f $1)"
+shift
+
+PMSCO_PROJECT_ARGS="$*"
+
+# set up working directory
+cd "$DEST_DIR"
+if [ ! -d "$PMSCO_JOBNAME" ]; then
+    mkdir "$PMSCO_JOBNAME"
+fi
+cd "$PMSCO_JOBNAME"
+WORKDIR="$(pwd)"
+PMSCO_WORK_DIR="$WORKDIR"
+
+# copy code
+PMSCO_SOURCE_REPO="file://$PMSCO_SOURCE_DIR"
+echo "$PMSCO_SOURCE_REPO"
+
+cd "$PMSCO_WORK_DIR"
+git clone $BRANCH_ARG --single-branch --depth 1 $PMSCO_SOURCE_REPO pmsco || exit
+cd pmsco
+PMSCO_REV=$(git log --pretty=format:"%h, %ai" -1) || exit
+cd "$WORKDIR"
+echo "$PMSCO_REV" > revision.txt
+
+# generate job script from template
+sed -e "s:_PMSCO_WORK_DIR:$PMSCO_WORK_DIR:g" \
+    -e "s:_PMSCO_JOBNAME:$PMSCO_JOBNAME:g" \
+    -e "s:_PMSCO_NODES:$PMSCO_NODES:g" \
+    -e "s:_PMSCO_WALLTIME_HR:$PMSCO_WALLTIME_HR:g" \
+    -e "s:_PMSCO_PROJECT_FILE:$PMSCO_PROJECT_FILE:g" \
+    -e "s:_PMSCO_PROJECT_ARGS:$PMSCO_PROJECT_ARGS:g" \
+    "$SCRIPTDIR/pmsco.ra-git.template" > $PMSCO_JOBNAME.job
+
+chmod u+x "$PMSCO_JOBNAME.job" || exit
+
+# request nodes and tasks
+#
+# The option --ntasks-per-node is meant to be used with the --nodes option.
+# (For the --ntasks option, the default is one task per node, use the --cpus-per-task option to change this default.)
+#
+# sbatch options
+# --cores-per-socket=16
+#   32 cores per node
+# --partition=[shared|day|week]
+# --time=8-00:00:00
+#   override default time limit (2 days in long queue)
+#   time formats: "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes", "days-hours:minutes:seconds"
+# --mail-type=ALL
+# --test-only
+#   check script but do not submit
+#
+SLURM_ARGS="--nodes=$PMSCO_NODES --ntasks-per-node=$PMSCO_TASKS_PER_NODE"
+
+if [ $PMSCO_TASKS_PER_NODE -gt 24 ]; then
+    SLURM_ARGS="--cores-per-socket=16 $SLURM_ARGS"
+fi
+
+SLURM_ARGS="--partition=$PMSCO_PARTITION $SLURM_ARGS"
+
+SLURM_ARGS="--time=$PMSCO_WALLTIME_HR:00:00 $SLURM_ARGS"
+
+CMD="sbatch $SLURM_ARGS $PMSCO_JOBNAME.job"
+echo $CMD
+if [ "$NOSUB" != "true" ]; then
+  $CMD
+fi
+
+exit 0
--- a/bin/qpmsco.ra.sh
+++ b/bin/qpmsco.ra.sh
@ -1,6 +1,12 @@
 #!/bin/sh
 #
 # submission script for PMSCO calculations on the Ra cluster
+#
+# CAUTION: the job will execute the pmsco code which is present in the directory tree
+#          of this script _at the time of job execution_, not submission!
+#          before changing the code, make sure that all pending jobs have started execution,
+#          otherwise you will experience version conflicts.
+#          it's better to use the qpmsco.ra-git.sh script which clones the code.

 if [ $# -lt 1 ]; then
  echo "Usage: $0 [NOSUB] DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]"
@ -87,9 +93,9 @@ PMSCO_WORK_DIR="$WORKDIR"

 # provide revision information, requires git repository
 cd "$SOURCEDIR"
-PMSCO_REV=$(git log --pretty=format:"Data revision %h, %ai" -1)
+PMSCO_REV=$(git log --pretty=format:"%h, %ai" -1)
 if [ $? -ne 0 ]; then
-   PMSCO_REV="Data revision unknown, "$(date +"%F %T %z")
+   PMSCO_REV="revision unknown, "$(date +"%F %T %z")
 fi
 cd "$WORKDIR"
 echo "$PMSCO_REV" > revision.txt
--- a/bin/qpmsco.sge
+++ b/bin/qpmsco.sge
@ -86,9 +86,9 @@ PHD_WORK_DIR="$WORKDIR"

 # provide revision information, requires git repository
 cd "$SOURCEDIR"
-PHD_REV=$(git log --pretty=format:"Data revision %h, %ad" --date=iso -1)
+PHD_REV=$(git log --pretty=format:"%h, %ad" --date=iso -1)
 if [ $? -ne 0 ]; then
-   PHD_REV="Data revision unknown, "$(date +"%F %T %z")
+   PHD_REV="revision unknown, "$(date +"%F %T %z")
 fi
 cd "$WORKDIR"
 echo "$PHD_REV" > revision.txt
--- a/docs/config.dox
+++ b/docs/config.dox
@ -763,6 +763,7 @@ src/introduction.dox \
 src/concepts.dox \
 src/concepts-tasks.dox \
 src/concepts-emitter.dox \
+src/concepts-atomscat.dox \
 src/installation.dox \
 src/execution.dox \
 src/commandline.dox \
--- a/docs/src/commandline.dox
+++ b/docs/src/commandline.dox
@ -21,9 +21,6 @@ Do not include the extension <code>.py</code> or a trailing slash.
@c path/to/project.py should be the path and name to your project module.
 Common args and project args are described below.

-Note: In contrast to earlier versions, the project module is not executed directly any more.
-Rather, it is loaded by the main pmsco module as a 'plug-in'.
-

 \subsection sec_common_args Common Arguments

@ -43,15 +40,14 @@ The following table is ordered by importance.
 | --log-level | DEBUG, INFO, WARNING (default), ERROR, CRITICAL | Minimum level of messages that should be added to the log. |
 | --log-file | file system path | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. Default: output-file + log, or pmsco.log. |
 | --log-disable | | Disable logging. By default, logging is on. |
-| --pop-size | integer | Population size (number of particles) in swarm optimization mode. The default value is the greater of 4 or two times the number of calculation processes. |
+| --pop-size | integer | Population size (number of particles) in swarm and genetic optimization mode. The default value is the greater of 4 or the number of parallel calculation processes. |
 | --seed-file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.project.Project.seed_file. |
 | --table-file | file system path | Name of the model table file in table scan mode. |
-| -c, --code | edac (default) | Scattering code. At the moment, only edac is supported. |


 \subsubsection sec_file_categories File Categories

-The following category names can be used with the @c --keep-files option.
+The following category names can be used with the `--keep-files` option.
 Multiple names can be specified and must be separated by spaces.

 | Category | Description | Default Action |
@ -59,7 +55,7 @@ Multiple names can be specified and must be separated by spaces.
 | all | shortcut to include all categories | |
 | input |      raw input files for calculator, including cluster and phase files in custom format | delete |
 | output |     raw output files from calculator | delete |
-| phase |      phase files in portable format for report |  delete |
+| atomic |     atomic scattering and emission files in portable format | delete |
 | cluster |    cluster files in portable XYZ format for report | keep |
 | debug |      debug files |  delete |
 | model |       output files in ETPAI format: complete simulation  (a_-1_-1_-1_-1) | keep |
@ -67,9 +63,20 @@ Multiple names can be specified and must be separated by spaces.
 | symmetry |   output files in ETPAI format: symmetry (a_b_c_-1_-1) |  delete |
 | emitter |    output files in ETPAI format: emitter (a_b_c_d_-1) |  delete |
 | region |     output files in ETPAI format: region (a_b_c_d_e) |  delete |
-| report|      final report of results |  keep |
+| report|      final report of results | keep always |
 | population |  final state of particle population | keep |
-| rfac |        files related to models which give bad r-factors | delete |
+| rfac |        files related to models which give bad r-factors, see warning below | delete |
+
+\note
+The `report` category is always kept and cannot be turned off.
+The `model` category is always kept in single calculation mode.
+
+\warning
+If you want to specify `rfac` with the `--keep-files` option,
+you have to add the file categories that you want to keep, e.g.,
+`--keep-files rfac cluster model scan population`
+(to return the default categories for all calculated models).
+Do not specify `rfac` alone as this will effectively not return any file.


 \subsection sec_project_args Project Arguments
@ -125,4 +132,4 @@ The job script is written to @c $DESTDIR/$JOBNAME which is also the destination
 | MODE | single, swarm, grid, genetic | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
 | ARGS (optional) | | Any further arguments are passed on verbatim to PMSCO. You don't need to specify the mode and time limit here. |

-*/
+*/
--- a/docs/src/concepts-atomscat.dox
+++ b/docs/src/concepts-atomscat.dox
@ -0,0 +1,114 @@
+/*! @page pag_concepts_atomscat Atomic scattering
+
+\section sec_atomscat Atomic scattering
+
+\subsection sec_atomscat_intro Introduction
+
+The process of calculating atomic scattering factors (phase shifts) can be customized in several ways.
+
+1. Internal processing.
+   Some multiple scattering programs, like EDAC, contain a built-in facility to calculate phase shifts.
+   This is the most simple and default behaviour.
+2. Automatic calculation in a separate program.
+   PMSCO has an interface to run the PHAGEN program from
+   the [MsSpec-1.0 package](https://ipr.univ-rennes1.fr/msspec) to calculate scattering factors.
+   Note that the PHAGEN code is not included in the public distribution of PMSCO.
+3. Manual calculation.
+   Scattering files created manually using an external program can be used by providing the file names.
+   The files must have the format required by the multiple scattering code,
+   and they must be linked to the corresponding atoms of the cluster.
+
+In the case of automatic calculation, the project code can optionally hook into the process
+and modify clusters before and after scattering factors are calculated.
+For instance, it may provide an extended cluster in order to reduce boundary effects,
+or it may modify the assignment of scattering files to cluster atoms
+so that the scattering factors of selected atom classes are used
+(cf. section \ref sec_atomscat_atomclass).
+
+
+\subsection sec_atomscat_usage Usage
+
+\subsubsection sec_atomscat_internal Internal processing
+
+This is the default behaviour selected in the inherited pmsco.project.Project class.
+Make sure not to override the `atomic_scattering_factory` attribute.
+Its default value is pmsco.calculators.calculator.InternalAtomicCalculator.
+
+\subsubsection sec_atomscat_external Automatic calculation in a separate program
+
+To select the atomic scattering calculator,
+assign its interface class to the project's `atomic_scattering_factory` attribute.
+For example, to use PHAGEN, add the following code to your project's `__init__` constructor:
+
+@code{.py}
+    from pmsco.calculators.phagen import PhagenCalculator
+    self.atomic_scattering_factory = PhagenCalculator
+@endcode
+
+\subsubsection sec_atomscat_manual Manual calculation
+
+If you want to keep the scattering factors constant during an optimization,
+you should run PMSCO in _single_ mode and provide the model parameters and cluster
+that will return the desired scattering files.
+In the `create_params` method of your project,
+you should then set the `phase_files` attribute,
+which is a dictionary that maps atom classes to the names of the scattering files.
+Unless you set specific values in the cluster object, the atom class defaults to the element number.
+The file names should include a path relative to the working directory.
+
+
+\subsection sec_atomscat_implement Implementation
+
+\subsubsection sec_atomscat_atomclass Atom classes
+
+Atomic scattering programs classify atoms based on chemical element, charge state and symmetry of the local environment.
+This means that two atoms of the same chemical element may have different scattering factors.
+For example, if you have EDAC output the cluster after calculation of the muffin tin potential,
+you will find that the chemical element number has been replaced by an arbitrary integer.
+
+By default, PMSCO will do the linking of atom classes and scattering files transparently.
+However, if you want to reduce the number of atom classes,
+or if you have the scattering factors calculated on a reference cluster,
+you will have to provide project code to do the assignment.
+This is described further below.
+
+
+\subsubsection sec_atomscat_calculator Atomic scattering calculator
+
+The project selects the atomic scattering calculation mode by specifying its `atomic_scattering_factory` attributed.
+This is the name of a class that inherits from @ref pmsco.calculators.calculator.AtomicCalculator.
+
+The following calculators are currently implemented:
+
+| Class | Description |
+| --- | --- |
+| pmsco.calculators.calculator.InternalAtomicCalculator | Calculate the atomic scattering factors in the multiple-scattering program. |
+| pmsco.calculators.phagen.PhagenCalculator | Calculate the atomic scattering factors in the PHAGEN program. |
+
+An atomic calculator class essentially defines a `run` method that operates on a cluster and scattering parameters object.
+It generates the necessary scattering files, updates the cluster with the new atom classes
+and updates the parameters with the file names of the scattering files.
+Note that the scattering files have to be in the correct format for the multiple scattering calculator.
+
+
+\subsubsection sec_atomscat_hooks Project hooks
+
+Before and after calculation of the scattering factors,
+the project's `before_atomic_scattering` and `after_atomic_scattering` methods are called
+with the cluster and input parameters.
+
+The _before_ method provides the cluster to be used for atomic scattering calculations.
+It may,
+1. just return the original cluster,
+2. modify the provided cluster to include additional atoms or modify the charge state of the emitter,
+3. create a completely different cluster,
+4. return None to suppress the atomic scattering calculation.
+The method is called once at the beginning of the PMSCO job with model -1,
+where it may return the global reference cluster.
+Later on it is called once for each calculation task with the specific task index.
+
+Similarly, the _after_ method collects the results and updates the `phase_files` dictionary of the input parameters.
+It is free to consolidate atom classes and remove unwanted atoms.
+However, it must make sure that for each atom class in the cluster,
+there is a corresponding link to a scattering file.
+*/
--- a/docs/src/installation.dox
+++ b/docs/src/installation.dox
@ -39,8 +39,8 @@ The code depends on the following libraries:
 - BLAS
 - LAPACK
 - Python 2.7 or 3.6
- Numpy >= 1.11
- Python packages from PyPI listed in the requirements.txt file
+- Numpy >= 1.13
+- Python packages listed in the requirements.txt file

 Most of these requirements are available from the Linux distribution.
 For an easily maintainable Python environment, [Miniconda](https://conda.io/miniconda.html) is recommended.
@ -50,11 +50,11 @@ and it's difficult to switch between different Python versions.
 On the PSI cluster machines, the environment must be set using the module system and conda (on Ra).
 Details are explained in the PEARL Wiki.

-PMSCO runs under Python 2.7 or Python 3.6 or higher.
-Since Python 2 is being deprecated, the code has been ported to Python 3.6.
-Compatibility with Python 2.7 is maintained by using
-the [future package](http://python-future.org/compatible_idioms.html).
-New code should be written according to their guidelines.
+PMSCO runs under Python 2.7 or Python 3.6.
+Since Python 2 is being deprecated, Python 3.6 is recommended.
+Compatibility with Python 2.7 is currently maintained by using
+the [future package](http://python-future.org/compatible_idioms.html)
+but may be dropped at any time.


 \subsection sec_install_instructions Instructions
@ -86,7 +86,6 @@ nano \
 openmpi-bin \
 openmpi-common \
 sqlite3 \
-swig \
 wget
@endcode

@ -102,11 +101,11 @@ Install Miniconda according to their [instructions](https://conda.io/docs/user-g
 then configure the Python environment:

@code{.sh}
-conda create -q --yes -n pmsco python=2.7
+conda create -q --yes -n pmsco python=3.6
 source activate pmsco
 conda install -q --yes -n pmsco \
    pip \
-    numpy \
+    "numpy>=1.13" \
    scipy \
    ipython \
    mpi4py \
@ -114,7 +113,9 @@ conda install -q --yes -n pmsco \
    nose \
    mock \
    future \
-    statsmodels
+    statsmodels \
+    swig \
+    gitpython
 pip install periodictable attrdict fasteners
@endcode

--- a/docs/src/introduction.dox
+++ b/docs/src/introduction.dox
@ -9,13 +9,15 @@ The actual scattering calculation is done by code developed by other parties.
 While the scattering program typically calculates a diffraction pattern based on a set of static parameters and a specific coordinate file in a single process,
 PMSCO wraps around that program to facilitate parameter handling, cluster building, structural optimization and parallel processing.

-In the current version, the [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/) code
-developed by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley (1999) is used for scattering calculations.
-Other code can be integrated as well.
-Initially, support for the MSC program by Kaduwela, Friedman, and Fadley was planned but is currently not maintained.
-PMSCO is written in Python 2.7.
-EDAC is written in C++, MSC in Fortran.
-PMSCO interacts with the calculation programs through Python wrappers for C++ or Fortran.
+In the current version, PMSCO can make use of the following programs.
+Other programs may be integrated as well.
+
+- [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/)
+  by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley,
+  [Phys. Rev. B 63 (2001) 075404](http://dx.doi.org/10.1103/PhysRevB.63.075404)
+- PHAGEN from the [MsSpec package](https://ipr.univ-rennes1.fr/msspec)
+  by C. R. Natoli and D. Sébilleau,
+  [Comp. Phys. Comm. 182 (2011) 2567](http://dx.doi.org/10.1016/j.cpc.2011.07.012)


 \section sec_intro_highlights Highlights
@ -63,11 +65,11 @@ An open distribution of PMSCO is available under the [Apache License, Version 2.
 - Please acknowledge the use of the code.
 - Please share your development of the code with the original author.

-Due to different copyright, the MSC and EDAC programs are not contained in the public software repository.
+Due to different copyright terms, the third-party calculation programs are not contained in the public software repository.
 These programs may not be used without an explicit agreement by the respective original authors.

 \author    Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
 \version   This documentation is compiled from version $(REVISION).
-\copyright 2015-2018 by [Paul Scherrer Institut](http://www.psi.ch)
+\copyright 2015-2019 by [Paul Scherrer Institut](http://www.psi.ch)
 \copyright Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
 */
--- a/docs/src/optimizers.dox
+++ b/docs/src/optimizers.dox
@ -30,6 +30,55 @@ The domain parameters have the following meanings:
 | step | Not used. |


+\subsubsection sec_opt_seed Seeding a population
+
+By default, one particle is initialized with the start value declared in the parameter domain,
+and the other are set to random values within the domain.
+You may initialize more particles of the population with specific values by providing a seed file.
+
+The seed file must have a similar format as the result `.dat` files
+with a header line specifying the column names and data rows containing the values for each particle.
+A good practice is to use a previous `.dat` file and remove unwanted rows.
+To continue an interrupted optimization,
+the `.dat` file from the previous optimization can be used as is.
+
+The seeding procedure can be tweaked by several optimizer parameters (see above).
+PMSCO normally loads the first rows up to population size - 1 or up to the `seed_limit` parameter,
+whichever is lower.
+If an `_rfac` column is present, the file is first sorted by R-factor and only the best models are loaded.
+Models that resulted in an R-factor above the `rfac_limit` parameter are always ignored.
+
+During the optimization process, all models loaded from the seed file are normally re-calculated.
+This may waste CPU time if the calculation is run under the same conditions
+and would result in exactly the same R-factor,
+as is the case if the seed is used to continue a previous optimization, for example.
+In these situations, the `recalc_seed` parameter can be set to False,
+and PMSCO will use the R-factor value from the seed file rather than calculating the model again.
+
+
+\subsubsection sec_opt_patch Patching a running optimization
+
+While an optimization process is running, the user can manually patch the population with arbitrary values,
+for instance, to kick the population out of a local optimum or to drive it to a less sampled parameter region.
+To patch a running population, prepare a population file named `pmsco_patch.pop` and copy it to the work directory.
+
+The file must have a similar format as the result `.dat` files
+with a header line specifying the column names and data rows containing the values.
+It should contain as many rows as particles to be patched but not more than the size of the population.
+The columns must include a `_particle` column which specifies the particle to patch
+as well as the model parameters to be changed.
+Parameters that should remain unaffected can be left out,
+extra columns including `_gen`, `_rfac` etc. are ignored.
+
+PMSCO checks the file for syntax errors and ignores it if errors are present.
+Parameter values that lie outside the domain boundary are ignored.
+Successful or failed patching is logged at warning level.
+The patch file is re-applied whenever its time stamp has changed.
+
+\attention Do not edit the patch file in the working directory
+to prevent it from being read in an unfinished state or multiple times.
+
+
 \subsection sec_opt_genetic Genetic optimization

 The genetic algorithm evolves a population of individuals 
@ -73,11 +122,14 @@ The domain parameters have the following meanings:
 | max | Upper limit of the parameter range. |
 | step | Standard deviation of the Gaussian distribution of weak mutations. The step should not be much lower than the the parameter range divided by the population size and not greater than one third of the parameter range. |

+The population of the genetic optimizer can be seeded and patched in the same way as the particle swarm,
+cf. sections @ref sec_opt_seed and @ref sec_opt_swarm.
+

 \subsection sec_opt_grid Grid search

 The grid search algorithm samples the parameter space at equidistant steps.
-The order of calculations is randomized so that early results represent various parts of the parameter space.
+The order of calculations is randomized so that distant parts of the parameter space are sampled at an early stage.

 | Parameter | Description |
 | --- | --- |
--- a/docs/src/uml/project-classes.puml
+++ b/docs/src/uml/project-classes.puml
@ -25,7 +25,7 @@ class Scan {
    thetas
    phis
    alphas
-    set_scan()
+    import_scan_file()
 }

 class Domain {
--- a/extras/singularity/singularity_python2
+++ b/extras/singularity/singularity_python2
@ -64,7 +64,6 @@ try agent forwarding (-A option to ssh).
        openmpi-bin \
        openmpi-common \
        sqlite3 \
-        swig \
        wget
    apt-get clean

@ -76,7 +75,7 @@ try agent forwarding (-A option to ssh).
    . /usr/local/miniconda3/bin/activate pmsco
    conda install -q --yes -n pmsco \
        pip \
-        numpy \
+        "numpy>=1.13" \
        scipy \
        ipython \
        mpi4py \
@ -84,7 +83,8 @@ try agent forwarding (-A option to ssh).
        nose \
        mock \
        future \
-        statsmodels
+        statsmodels \
+        swig
    conda clean --all -y
    /usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners
    
--- a/extras/singularity/singularity_python3
+++ b/extras/singularity/singularity_python3
@ -63,7 +63,6 @@ try agent forwarding (-A option to ssh).
        openmpi-bin \
        openmpi-common \
        sqlite3 \
-        swig \
        wget
    apt-get clean

@ -75,7 +74,7 @@ try agent forwarding (-A option to ssh).
    . /usr/local/miniconda3/bin/activate pmsco
    conda install -q --yes -n pmsco \
        pip \
-        numpy \
+        "numpy>=1.13" \
        scipy \
        ipython \
        mpi4py \
@ -83,7 +82,8 @@ try agent forwarding (-A option to ssh).
        nose \
        mock \
        future \
-        statsmodels
+        statsmodels \
+        swig
    conda clean --all -y
    /usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners

--- a/4
+++ b/4
@ -35,7 +35,7 @@ SHELL=/bin/sh
 # make all
 #

-.PHONY: all bin docs clean edac loess msc mufpot
+.PHONY: all bin docs clean edac loess msc mufpot phagen

 PMSCO_DIR = pmsco
 DOCS_DIR = docs
@ -44,7 +44,7 @@ all: edac loess docs

 bin: edac loess

-edac loess msc mufpot:
+edac loess msc mufpot phagen:
 	$(MAKE) -C $(PMSCO_DIR)

 docs:
--- a/pmsco/calculators/calculator.py
+++ b/pmsco/calculators/calculator.py
@ -11,7 +11,7 @@ TestCalcInterface is provided for testing the PMSCO code quickly without calling

@author Matthias Muntwiler

-@copyright (c) 2015-18 by Paul Scherrer Institut @n
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
@ -56,11 +56,25 @@ class Calculator(object):
            @arg the first element is the name of the main ETPI or ETPAI result file to be further processed.
            @arg the second element is a dictionary that lists the names of all created data files with their category.
                 the dictionary key is the file name,
-                 the value is the file category (cluster, phase, etc.).
+                 the value is the file category (cluster, atomic, etc.).
        """
        return None, None


+class AtomicCalculator(Calculator):
+    """
+    abstract interface class to the atomic scattering calculation program.
+    """
+    pass
+
+
+class InternalAtomicCalculator(AtomicCalculator):
+    """
+    dummy atomic scattering class if scattering factors are calculated internally by the multiple scattering calculator.
+    """
+    pass
+
+
 class TestCalculator(Calculator):
    """
    interface class producing random data for testing the MSCO code without calling an external program.
--- a/pmsco/calculators/edac.py
+++ b/pmsco/calculators/edac.py
@ -55,7 +55,10 @@ class EdacCalculator(calculator.Calculator):

        @param filepath: (str) name and path of the file to be created.

+        @return dictionary of created files {filename: category}
        """
+        files = {}
+
        with open(filepath, "w") as f:
            f.write("verbose off\n")
            f.write("cluster input {0}\n".format(params.cluster_file))
@ -146,9 +149,16 @@ class EdacCalculator(calculator.Calculator):
            scatterers = ["scatterer {at} {fi}\n".format(at=at, fi=fi)
                          for (at, fi) in params.phase_files.items()
                          if os.path.isfile(fi)]
-            if scatterers:
+            rme = ["rmat {fi}\n".format(fi=fi)
+                   for (at, fi) in params.rme_files.items()
+                   if at == params.emitters[0][3] and os.path.isfile(fi)] or \
+                  ["rmat inline 1 regular1 {l0} {pv} {pd} {mv} {md}\n".format(l0=params.l_init,
+                   pv=params.rme_plus_value, pd=params.rme_plus_shift,
+                   mv=params.rme_minus_value, md=params.rme_minus_shift)]
+            if scatterers and rme:
                for scat in scatterers:
                    f.write(scat)
+                f.write(rme[0])
            else:
                f.write("muffin-tin\n")

@ -162,16 +172,27 @@ class EdacCalculator(calculator.Calculator):
            f.write("orders {0:d} ".format(len(params.orders)))
            f.write(" ".join(format(order, "d") for order in params.orders) + "\n")
            f.write("emission angle window {0:F}\n".format(params.angular_resolution / 2.0))
-            # f.write("cluster output l(A) out.clu")
-            # problems:
-            # - muffin-tin relabels atoms
-            # - there can be multiple atom types for the same chemical element
-            # - we have to compare coordinates to find the mapping between input and output cluster
-            # f.write("scan scatterer i phase-shifts i.pha")
-            # f.write("scan scatterer i potential i.pot")
+
+            # scattering factor output (see project.Params.phase_output_classes)
+            if params.phase_output_classes is not None:
+                fn = "{0}.clu".format(params.output_file)
+                f.write("cluster output l(A) {fn}\n".format(fn=fn))
+                files[fn] = "output"
+                try:
+                    cls = (cl for cl in params.phase_output_classes)
+                except TypeError:
+                    cls = range(params.phase_output_classes)
+                for cl in cls:
+                    fn = "{of}.{cl}.scat".format(cl=cl, of=params.output_file)
+                    f.write("scan scatterer {cl} phase-shifts {fn}\n".format(cl=cl, fn=fn))
+                    files[fn] = "output"
+
            f.write("scan pd {0}\n".format(params.output_file))
+            files[params.output_file] = "output"
            f.write("end\n")

+        return files
+
    def run(self, params, cluster, scan, output_file):
        """
        run EDAC with the given parameters and cluster.
@ -205,13 +226,13 @@ class EdacCalculator(calculator.Calculator):
        params.cluster_file = clu_filename
        params.output_file = out_filename
        params.data_file = dat_filename
-        params.emitters = cluster.get_emitters()
+        params.emitters = cluster.get_emitters(['x', 'y', 'z', 'c'])

        # save parameter files
        logger.debug("writing cluster file %s", clu_filename)
        cluster.save_to_file(clu_filename, fmt=mc.FMT_EDAC)
        logger.debug("writing input file %s", par_filename)
-        self.write_input_file(params, scan, par_filename)
+        files = self.write_input_file(params, scan, par_filename)

        # run EDAC
        logger.info("calling EDAC with input file %s", par_filename)
@ -244,6 +265,9 @@ class EdacCalculator(calculator.Calculator):
        logger.debug("save result to file %s", etpi_filename)
        md.save_data(etpi_filename, result_etpi)

-        files = {clu_filename: 'input', par_filename: 'input', dat_filename: 'output',
-                 etpi_filename: 'region'}
+        files[clu_filename] = 'input'
+        files[par_filename] = 'input'
+        files[dat_filename] = 'output'
+        files[etpi_filename] = 'region'
+
        return etpi_filename, files
--- a/pmsco/calculators/phagen/init.py
+++ b/pmsco/calculators/phagen/init.py
--- a/pmsco/calculators/phagen/makefile
+++ b/pmsco/calculators/phagen/makefile
@ -0,0 +1,43 @@
+SHELL=/bin/sh
+
+# makefile for PHAGEN program and module
+#
+# the PHAGEN source code is not included in the public distribution.
+# please obtain the PHAGEN code from the original author,
+# and copy it to this directory before compilation.
+#
+# see the top-level makefile for additional information.
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
+.PHONY: all clean phagen
+
+FC?=gfortran
+F2PY?=f2py
+F2PYOPTS?=
+CC?=gcc
+CCOPTS?=
+SWIG?=swig
+SWIGOPTS?=
+PYTHON?=python
+PYTHONOPTS?=
+PYTHONINC?=
+PYTHON_CONFIG = ${PYTHON}-config
+PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
+PYTHON_EXT_SUFFIX ?= $(shell ${PYTHON_CONFIG} --extension-suffix)
+
+all: phagen
+
+phagen: phagen.exe phagen$(EXT_SUFFIX)
+
+phagen.exe: phagen_scf.f msxas3.inc msxasc3.inc
+	$(FC) $(FCOPTS) -o phagen.exe phagen_scf.f
+
+phagen.pyf: | phagen_scf.f
+	$(F2PY) -h phagen.pyf -m phagen phagen_scf.f only: libmain
+
+phagen$(EXT_SUFFIX): phagen_scf.f phagen.pyf msxas3.inc msxasc3.inc
+	$(F2PY) -c $(F2PYOPTS) -m phagen phagen.pyf phagen_scf.f
+
+clean:
+	rm -f *.so *.o *.exe
--- a/pmsco/calculators/phagen/phagen_scf.f.patch
+++ b/pmsco/calculators/phagen/phagen_scf.f.patch
@ -0,0 +1,102 @@
+--- phagen_scf.orig.f	2019-06-05 16:45:52.977855859 +0200
+++ phagen_scf.f	2019-05-09 16:32:35.790286429 +0200
+@@ -174,6 +174,99 @@
+  1100 format(//,1x,' ** phagen terminated normally ** ',//)
+       end
+ 
+
+c-----------------------------------------------------------------------
+      subroutine libmain(infile,outfile,etcfile)
+c      main calculation routine
+c      entry point for external callers
+c
+c      infile: name of parameter input file
+c
+c      outfile: base name of output files
+c        output files with endings .list, .clu, .pha, .tl, .rad
+c        will be created
+c-----------------------------------------------------------------------
+      implicit real*8 (a-h,o-z)
+c
+      include 'msxas3.inc'
+      include 'msxasc3.inc'
+
+      character*60 infile,outfile,etcfile
+      character*70 listfile,clufile,tlfile,radfile,phafile
+
+c
+c.. constants
+      antoau  = 0.52917715d0
+      pi      = 3.141592653589793d0
+      ev      = 13.6058d0
+      zero    = 0.d0
+c.. threshold for linearity
+      thresh  = 1.d-4
+c.. fortran io units
+      idat = 5
+      iwr = 6
+      iphas = 30
+      iedl0 = 31
+      iwf = 32
+      iof = 17
+
+      iii=LnBlnk(outfile)+1
+      listfile=outfile
+      listfile(iii:)='.list'
+      clufile=outfile
+      clufile(iii:)='.clu'
+      phafile=outfile
+      phafile(iii:)='.pha'
+      tlfile=outfile
+      tlfile(iii:)='.tl'
+      radfile=outfile
+      radfile(iii:)='.rad'
+
+      open(idat,file=infile,form='formatted',status='old')
+      open(iwr,file=listfile,form='formatted',status='unknown')
+      open(10,file=clufile,form='formatted',status='unknown')
+      open(35,file=tlfile,form='formatted',status='unknown')
+      open(55,file=radfile,form='formatted',status='unknown')
+      open(iphas,file=phafile,form='formatted',status='unknown')
+
+      open(iedl0,form='unformatted',status='scratch')
+      open(iof,form='unformatted',status='scratch')
+      open(unit=21,form='unformatted',status='scratch')
+      open(60,form='formatted',status='scratch')
+      open(50,form='formatted',status='scratch')
+      open(unit=13,form='formatted',status='scratch')
+      open(unit=14,form='formatted',status='scratch')
+      open(unit=11,status='scratch')
+      open(unit=iwf,status='scratch')
+      open(unit=33,status='scratch')
+      open(unit=66,status='scratch')
+
+      call inctrl
+      call intit(iof)
+      call incoor
+      call calphas
+
+      close(idat)
+      close(iwr)
+      close(10)
+      close(35)
+      close(55)
+      close(iphas)
+      close(iedl0)
+      close(iof)
+      close(60)
+      close(50)
+      close(13)
+      close(14)
+      close(11)
+      close(iwf)
+      close(33)
+      close(66)
+      close(21)
+
+      endsubroutine
+
+
+       subroutine inctrl
+       implicit real*8 (a-h,o-z)
+       include 'msxas3.inc'
--- a/pmsco/calculators/phagen/runner.py
+++ b/pmsco/calculators/phagen/runner.py
@ -0,0 +1,153 @@
+"""
+@package pmsco.calculators.phagen.runner
+Natoli/Sebilleau PHAGEN interface
+
+this module runs the PHAGEN program to calculate scattering factors and radial matrix element.
+
+@author Matthias Muntwiler
+
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import shutil
+import tempfile
+
+from pmsco.calculators.calculator import AtomicCalculator
+from pmsco.calculators.phagen.phagen import libmain
+from pmsco.calculators.phagen.translator import Translator
+import pmsco.cluster
+
+logger = logging.getLogger(__name__)
+
+
+class PhagenCalculator(AtomicCalculator):
+    """
+    use the PHAGEN program to calculate scattering factors and radial matrix element.
+
+    this produces scatterer, radial matrix element and cluster files for EDAC.
+    """
+
+    def run(self, params, cluster, scan, output_file):
+        """
+        create the input file, run PHAGEN, and translate the output to EDAC format.
+
+        the following files are created in the job work directory:
+        - scattering factor files in EDAC format.
+          their names are `output_file + "_{atomclass}.scat"`.
+        - radial matrix element file in EDAC format.
+          its name is `output_file + ".rme"`.
+        - cluster file in PMSCO format.
+          its name is `output_file + ".clu"`.
+
+        the cluster and params objects are updated and linked to the scattering files
+        so that they can be passed to EDAC without further modification.
+        the radial matrix element is currently not used.
+
+        note that the scattering files are numbered according to the atomic environment and not chemical element.
+        this means that the updated cluster (cluster object or ".clu" file)
+        must be used in the scattering calculation.
+        atomic index is not preserved - atoms in the input and output clusters can only be related by coordinate!
+
+        because PHAGEN generates a lot of files with hard-coded names,
+        the function creates a temporary directory for PHAGEN and deletes it before returning.
+
+        @param params: pmsco.project.Params object.
+            the phase_files attribute is updated with the paths of the scattering files.
+
+        @param cluster: pmsco.cluster.Cluster object.
+            the cluster is updated with the one returned from PHAGEN.
+            the atom classes are linked to the scattering files.
+
+        @param scan: pmsco.project.Scan object.
+            the scan object is used to determine the kinetic energy range.
+
+        @param output_file: base path and name of the output files.
+
+        @return (None, dict) where dict is a list of output files with their category.
+            the category is "atomic" for all output files.
+        """
+        transl = Translator()
+        transl.params.set_params(params)
+        transl.params.set_cluster(cluster)
+        transl.params.set_scan(scan)
+        phagen_cluster = pmsco.cluster.Cluster()
+
+        files = {}
+        prev_wd = os.getcwd()
+        try:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                os.chdir(temp_dir)
+                os.mkdir("div")
+                os.mkdir("div/wf")
+                os.mkdir("plot")
+                os.mkdir("data")
+
+                # prepare input for phagen
+                infile = "phagen.in"
+                outfile = "phagen.out"
+
+                try:
+                    transl.write_input(infile)
+                    report_infile = os.path.join(prev_wd, output_file + ".phagen.in")
+                    shutil.copy(infile, report_infile)
+                    files[report_infile] = "input"
+                except IOError:
+                    logger.warning("error writing phagen input file {fi}.".format(fi=infile))
+
+                # call phagen
+                libmain(infile, outfile)
+
+                # collect results
+                try:
+                    phafile = outfile + ".pha"
+                    transl.parse_phagen_phase(phafile)
+                    report_phafile = os.path.join(prev_wd, output_file + ".phagen.pha")
+                    shutil.copy(phafile, report_phafile)
+                    files[report_phafile] = "output"
+                except IOError:
+                    logger.error("error loading phagen phase file {fi}".format(fi=phafile))
+
+                try:
+                    radfile = outfile + ".rad"
+                    transl.parse_radial_file(radfile)
+                    report_radfile = os.path.join(prev_wd, output_file + ".phagen.rad")
+                    shutil.copy(radfile, report_radfile)
+                    files[report_radfile] = "output"
+                except IOError:
+                    logger.error("error loading phagen radial file {fi}".format(fi=radfile))
+
+                try:
+                    clufile = outfile + ".clu"
+                    phagen_cluster.load_from_file(clufile, pmsco.cluster.FMT_PHAGEN_OUT)
+                except IOError:
+                    logger.error("error loading phagen cluster file {fi}".format(fi=clufile))
+
+        finally:
+            os.chdir(prev_wd)
+
+        # write edac files
+        scatfile = output_file + "_{}.scat"
+        scatfiles = transl.write_edac_scattering(scatfile)
+        params.phase_files = {c: scatfiles[c] for c in scatfiles}
+        files.update({scatfiles[c]: "atomic" for c in scatfiles})
+
+        rmefile = output_file + ".rme"
+        transl.write_edac_emission(rmefile)
+        files[rmefile] = "atomic"
+
+        cluster.update_atoms(phagen_cluster, {'c'})
+        clufile = output_file + ".pmsco.clu"
+        cluster.save_to_file(clufile, pmsco.cluster.FMT_PMSCO)
+        files[clufile] = "cluster"
+
+        return None, files
--- a/pmsco/calculators/phagen/translator.py
+++ b/pmsco/calculators/phagen/translator.py
@ -0,0 +1,411 @@
+"""
+@package pmsco.calculators.phagen.translator
+Natoli/Sebilleau PHAGEN interface
+
+this module provides conversion between input/output files of PHAGEN and EDAC.
+
+@author Matthias Muntwiler
+
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from pmsco.compat import open
+
+## rydberg energy in electron volts
+ERYDBERG = 13.6056923
+
+
+def state_to_edge(state):
+    """
+    translate spectroscopic notation to edge notation.
+
+    @param state: spectroscopic notation: "1s", "2s", "2p1/2", etc.
+    @return: edge notation: "k", "l1", "l2", etc.
+        note: if the j-value is not given, the lower j edge is returned.
+    """
+    jshells = ['s', 'p1/2', 'p3/2', 'd3/2', 'd5/2', 'f5/2', 'f7/2']
+    lshells = [s[0] for s in jshells]
+    shell = int(state[0])
+    try:
+        subshell = jshells.index(state[1:]) + 1
+    except ValueError:
+        subshell = lshells.index(state[1]) + 1
+    except IndexError:
+        subshell = 1
+    edge = "klmnop"[shell-1]
+    if shell > 1:
+        edge += str(subshell)
+    return edge
+
+
+class TranslationParams(object):
+    """
+    project parameters needed for translation.
+
+    energy unit is eV.
+    """
+    def __init__(self):
+        self.initial_state = "1s"
+        self.binding_energy = 0.
+        self.cluster = None
+        self.kinetic_energies = np.empty(0, dtype=np.float)
+
+    @property
+    def l_init(self):
+        return "spdf".index(self.initial_state[1])
+
+    @property
+    def edge(self):
+        return state_to_edge(self.initial_state)
+
+    def set_params(self, params):
+        """
+        set the translation parameters.
+
+        @param params: a pmsco.project.Params object or
+                       a dictionary containing some or all public fields of this class.
+        @return: None
+        """
+        try:
+            self.initial_state = params.initial_state
+            self.binding_energy = params.binding_energy
+        except AttributeError:
+            for key in params:
+                self.__setattr__(key, params[key])
+
+    def set_scan(self, scan):
+        """
+        set the scan parameters.
+
+        @param scan: a pmsco.project.Scan object
+        @return: None
+        """
+        try:
+            energies = scan.energies
+        except AttributeError:
+            try:
+                energies = scan['e']
+            except KeyError:
+                energies = scan
+        if not isinstance(energies, np.ndarray):
+            energies = np.array(energies)
+        self.kinetic_energies = np.resize(self.kinetic_energies, energies.shape)
+        self.kinetic_energies = energies
+
+    def set_cluster(self, cluster):
+        """
+        set the initial cluster.
+
+        @param cluster: a pmsco.cluster.Cluster object
+        @return: None
+        """
+        self.cluster = cluster
+
+
+class Translator(object):
+    """
+    data conversion to/from phagen input/output files.
+
+    usage:
+    1. set the translation parameters self.params.
+    2. call write_input_file to create the phagen input files.
+    3. call phagen on the input file.
+    4. call parse_phagen_phase.
+    5. call parse_radial_file.
+    6. call write_edac_scattering to produce the EDAC scattering matrix files.
+    7. call write_edac_emission to produce the EDAC emission matrix file.
+    """
+    def __init__(self):
+        """
+        initialize the object instance.
+        """
+        self.params = TranslationParams()
+        dt = [('e', 'f4'), ('a', 'i4'), ('l', 'i4'), ('t', 'c16')]
+        self.scattering = np.empty(0, dtype=dt)
+        dt = [('e', 'f4'), ('dw', 'c16'), ('up', 'c16')]
+        self.emission = np.empty(0, dtype=dt)
+
+    def write_cluster(self, f):
+        """
+        write the cluster section of the PHAGEN input file.
+
+        requires a valid pmsco.cluster.Cluster in self.params.cluster.
+
+        @param f: file or output stream (an object with a write method)
+
+        @return: None
+        """
+        for atom in self.params.cluster.data:
+            d = {k: atom[k] for k in atom.dtype.names}
+            f.write("{s} {t} {x} {y} {z}\n".format(**d))
+        f.write("-1 -1 0. 0. 0.\n")
+
+    def write_ionicity(self, f):
+        """
+        write the ionicity section of the PHAGEN input file.
+
+        ionicity is read from the 'q' column of the cluster.
+        all atoms of a chemical element must have the same charge state
+        because ionicity has to be specified per element.
+        this function writes the average of all charge states of an element.
+
+        @param f: file or output stream (an object with a write method)
+
+        @return: None
+        """
+        data = self.params.cluster.data
+        elements = np.unique(data['t'])
+        for element in elements:
+            idx = np.where(data['t'] == element)
+            charge = np.mean(data['q'][idx])
+            f.write("{t} {q}\n".format(t=element, q=charge))
+
+        f.write("-1\n")
+
+    def write_input(self, f):
+        """
+        write the PHAGEN input file.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @return: None
+        """
+        phagen_params = {}
+        phagen_params['emin'] = self.params.kinetic_energies.min() / ERYDBERG
+        phagen_params['emax'] = self.params.kinetic_energies.max() / ERYDBERG
+        phagen_params['delta'] = (phagen_params['emax'] - phagen_params['emin']) / \
+                                 (self.params.kinetic_energies.shape[0] - 1)
+        if phagen_params['delta'] < 0.0001:
+            phagen_params['delta'] = 0.1
+        phagen_params['edge'] = state_to_edge(self.params.initial_state)  # possibly not used
+        phagen_params['edge1'] = 'm4'  # auger not supported
+        phagen_params['edge2'] = 'm4'  # auger not supported
+        phagen_params['cip'] = self.params.binding_energy / ERYDBERG
+        if phagen_params['cip'] < 0.001:
+            raise ValueError("binding energy parameter is zero.")
+
+        if np.sum(np.abs(self.params.cluster.data['q']) >= 0.001) > 0:
+            phagen_params['ionzst'] = 'ionic'
+        else:
+            phagen_params['ionzst'] = 'neutral'
+
+        if hasattr(f, "write"):
+            f.write("&job\n")
+            f.write("calctype='xpd',\n")
+            f.write("coor='angs',\n")
+            f.write("cip={cip},\n".format(**phagen_params))
+            f.write("edge='{edge}',\n".format(**phagen_params))
+            f.write("edge1='{edge1}',\n".format(**phagen_params))
+            f.write("edge2='{edge1}',\n".format(**phagen_params))
+            f.write("gamma=0.03,\n")
+            f.write("lmax_mode=2,\n")
+            f.write("lmaxt=50,\n")
+            f.write("emin={emin},\n".format(**phagen_params))
+            f.write("emax={emax},\n".format(**phagen_params))
+            f.write("delta={delta},\n".format(**phagen_params))
+            f.write("potgen='in',\n")
+            f.write("potype='hedin',\n")
+            f.write("norman='stdcrm',\n")
+            f.write("ovlpfac=0.0,\n")
+            f.write("ionzst='{ionzst}',\n".format(**phagen_params))
+            f.write("charelx='ex',\n")
+            f.write("l2h=4\n")
+            f.write("&end\n")
+            f.write("comment 1\n")
+            f.write("comment 2\n")
+            f.write("\n")
+
+            self.write_cluster(f)
+            self.write_ionicity(f)
+        else:
+            with open(f, "w") as fi:
+                self.write_input(fi)
+
+    def parse_phagen_phase(self, f):
+        """
+        parse the phase output file from PHAGEN.
+
+        the phase file is written to div/phases.dat.
+        it contains the following columns:
+
+        @arg e energy (Ry)
+        @arg x1 unknown 1
+        @arg x2 unknown 2
+        @arg na atom index (1-based)
+        @arg nl angular momentum quantum number l
+        @arg tr real part of the scattering matrix element
+        @arg ti imaginary part of the scattering matrix element
+        @arg ph phase shift
+
+        the data is translated into the self.scattering array.
+
+        @arg e energy (eV)
+        @arg a atom index (1-based)
+        @arg l angular momentum quantum number l
+        @arg t complex scattering matrix element
+
+        @param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
+
+        @return: None
+        """
+        dt = [('e', 'f4'), ('x1', 'f4'), ('x2', 'f4'), ('na', 'i4'), ('nl', 'i4'),
+              ('tr', 'f8'), ('ti', 'f8'), ('ph', 'f4')]
+        data = np.genfromtxt(f, dtype=dt)
+
+        self.scattering = np.resize(self.scattering, data.shape)
+        scat = self.scattering
+        scat['e'] = data['e'] * ERYDBERG
+        scat['a'] = data['na']
+        scat['l'] = data['nl']
+        scat['t'] = data['tr'] + 1j * data['ti']
+
+    def write_edac_scattering(self, filename_format, phases=False):
+        """
+        write scatterer files for EDAC.
+
+        produces one file for each atom class in self.scattering.
+
+        @param filename_format: file name including a placeholder {} for the atom class.
+
+        @param phases: write phase files instead of t-matrix files.
+
+        @return: dictionary that maps atom classes to file names
+        """
+        if phases:
+            write = self.write_edac_phase_file
+        else:
+            write = self.write_edac_scattering_file
+        scat = self.scattering
+        atoms = np.unique(scat['a'])
+        files = {}
+        for atom in atoms:
+            f = filename_format.format(atom)
+            sel = scat['a'] == atom
+            idx = np.where(sel)
+            atom_scat = scat[idx]
+            write(f, atom_scat)
+            files[atom] = f
+
+        return files
+
+    def write_edac_scattering_file(self, f, scat):
+        """
+        write a scatterer file for EDAC.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @param scat: a slice of the self.scattering array belonging to the same atom class.
+
+        @return: None
+        """
+        if hasattr(f, "write"):
+            energies = np.unique(scat['e'])
+            ne = energies.shape[0]
+            lmax = scat['l'].max()
+            if ne == 1:
+                f.write("1 {lmax} regular tl\n".format(lmax=lmax))
+            else:
+                f.write("{nk} E(eV) {lmax} regular tl\n".format(nk=ne, lmax=lmax))
+            for energy in energies:
+                sel = scat['e'] == energy
+                idx = np.where(sel)
+                energy_scat = scat[idx]
+                if ne > 1:
+                    f.write("{0:.3f} ".format(energy))
+                for item in energy_scat:
+                    f.write(" {0:.6f} {1:.6f}".format(item['t'].real, item['t'].imag))
+                for i in range(len(energy_scat), lmax + 1):
+                    f.write(" 0 0")
+                f.write("\n")
+        else:
+            with open(f, "w") as fi:
+                self.write_edac_scattering_file(fi, scat)
+
+    def write_edac_phase_file(self, f, scat):
+        """
+        write a phase file for EDAC.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @param scat: a slice of the self.scattering array belonging to the same atom class.
+
+        @return: None
+        """
+        if hasattr(f, "write"):
+            energies = np.unique(scat['e'])
+            ne = energies.shape[0]
+            lmax = scat['l'].max()
+            if ne == 1:
+                f.write("1 {lmax} regular real\n".format(lmax=lmax))
+            else:
+                f.write("{nk} E(eV) {lmax} regular real\n".format(nk=ne, lmax=lmax))
+            for energy in energies:
+                sel = scat['e'] == energy
+                idx = np.where(sel)
+                energy_scat = scat[idx]
+                if ne > 1:
+                    f.write("{0:.3f} ".format(energy))
+                for item in energy_scat:
+                    f.write(" {0:.6f}".format(np.angle(item['t'])))
+                for i in range(len(energy_scat), lmax + 1):
+                    f.write(" 0")
+                f.write("\n")
+        else:
+            with open(f, "w") as fi:
+                self.write_edac_phase_file(fi, scat)
+
+    def parse_radial_file(self, f):
+        """
+        parse the radial matrix element output file from phagen.
+
+        @param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
+
+        @return: None
+        """
+        dt = [('ar', 'f8'), ('ai', 'f8'), ('br', 'f8'), ('bi', 'f8')]
+        data = np.genfromtxt(f, dtype=dt)
+
+        self.emission = np.resize(self.emission, data.shape)
+        emission = self.emission
+        emission['dw'] = data['ar'] + 1j * data['ai']
+        emission['up'] = data['br'] + 1j * data['bi']
+
+    def write_edac_emission(self, f):
+        """
+        write the radial photoemission matrix element in EDAC format.
+
+        requires self.emission, self.params.kinetic_energies and self.params.initial_state.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @return: None
+        """
+        if hasattr(f, "write"):
+            l0 = self.params.l_init
+            energies = self.params.kinetic_energies
+            emission = self.emission
+            emission['e'] = energies
+            ne = energies.shape[0]
+            if ne == 1:
+                f.write("1 regular2 {l0}\n".format(l0=l0))
+            else:
+                f.write("{nk} E(eV) regular2 {l0}\n".format(nk=ne, l0=l0))
+            for item in emission:
+                if ne > 1:
+                    f.write("{0:.3f} ".format(item['e']))
+                f.write(" {0:.6f} {1:.6f}".format(item['up'].real, item['up'].imag))
+                f.write(" {0:.6f} {1:.6f}".format(item['dw'].real, item['dw'].imag))
+                f.write("\n")
+        else:
+            with open(f, "w") as of:
+                self.write_edac_emission(of)
--- a/pmsco/cluster.py
+++ b/pmsco/cluster.py
@ -1,12 +1,15 @@
+#!/usr/bin/env python
 """
@package pmsco.cluster
-cluster tools for MSC and EDAC
+cluster building and handling

 the Cluster class is provided to facilitate the construction and import/export of clusters.
 a cluster can be built by adding single atoms, layers, or a half-space bulk lattice.
-the class can import from/export to EDAC, MSC, and XYZ cluster files.
+the class can import from/export to various file formats.
 XYZ allows for export to 3D visualizers, e.g. Avogadro.

+the module has a command line interface to convert cluster files.
+
@pre requires the periodictable package (https://pypi.python.org/pypi/periodictable)
@code{.sh}
 pip install --user periodictable
@ -14,7 +17,11 @@ pip install --user periodictable

@author Matthias Muntwiler

-@copyright (c) 2015-18 by Paul Scherrer Institut
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
 """

 from __future__ import absolute_import
@ -34,6 +41,12 @@ FMT_MSC = 1
 FMT_EDAC = 2
 ## XYZ file format identifier
 FMT_XYZ = 3
+## PHAGEN output file format identifier
+FMT_PHAGEN_OUT = 4
+## PHAGEN input file format identifier
+FMT_PHAGEN_IN = 5
+## native file format identifier
+FMT_PMSCO = 6

 # python version dependent type of chemical symbol
 if sys.version_info[0] >= 3:
@ -43,11 +56,14 @@ else:

 ## numpy.array datatype of Cluster.data array
 DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
-                          ('e', 'u1')]
+                          ('e', 'u1'), ('q', 'f4'), ('c', 'i4')]
 ## file format of internal Cluster.data array
-FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
+FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%5u", "%7.3f", "%7.3f", "%7.3f", "%1u", "%7.3f"]
 ## field (column) names of internal Cluster.data array
-FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'x', 'y', 'z', 'e']
+FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q']
+## column names for export
+NAMES_CLUSTER_INTERNAL = {'i': 'index', 't': 'element', 's': 'symbol', 'c': 'class', 'x': 'x', 'y': 'y', 'z': 'z',
+                          'e': 'emitter', 'q': 'charge'}

 ## numpy.array datatype of cluster for MSC cluster file input/output
 DTYPE_CLUSTER_MSC = [('i', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('t', 'i4')]
@ -57,11 +73,11 @@ FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
 FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']

 ## numpy.array datatype of cluster for EDAC cluster file input/output
-DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
+DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('c', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
 ## file format of EDAC cluster file
 FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
 ## field (column) names of EDAC cluster file
-FIELDS_CLUSTER_EDAC = ['i', 't', 'x', 'y', 'z']
+FIELDS_CLUSTER_EDAC = ['i', 'c', 'x', 'y', 'z']

 ## numpy.array datatype of cluster for XYZ file input/output
 DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
@ -70,6 +86,44 @@ FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
 ## field (column) names of XYZ cluster file
 FIELDS_CLUSTER_XYZ = ['s', 'x', 'y', 'z']

+## numpy.array datatype of cluster for PHAGEN output file input/output
+DTYPE_CLUSTER_PHAGEN_OUT = [('i', 'i4'), ('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('c', 'i4')]
+## file format of PHAGEN cluster output file
+FMT_CLUSTER_PHAGEN_OUT = ["%5u", "%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%5u"]
+## field (column) names of PHAGEN cluster output file
+FIELDS_CLUSTER_PHAGEN_OUT = ['i', 's', 't', 'x', 'y', 'z', 'c']
+
+## numpy.array datatype of cluster for PHAGEN input file input/output
+DTYPE_CLUSTER_PHAGEN_IN = [('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('q', 'f4')]
+## file format of PHAGEN input file, cluster section
+FMT_CLUSTER_PHAGEN_IN = ["%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%7.3f"]
+## field (column) names of PHAGEN input file, cluster section
+FIELDS_CLUSTER_PHAGEN_IN = ['s', 't', 'x', 'y', 'z', 'q']
+
+## dictionary of supported cluster data types
+CLUSTER_DTYPES = {FMT_DEFAULT: DTYPE_CLUSTER_INTERNAL,
+                  FMT_MSC: DTYPE_CLUSTER_MSC,
+                  FMT_EDAC: DTYPE_CLUSTER_EDAC,
+                  FMT_XYZ: DTYPE_CLUSTER_XYZ,
+                  FMT_PHAGEN_OUT: DTYPE_CLUSTER_PHAGEN_OUT,
+                  FMT_PHAGEN_IN: DTYPE_CLUSTER_PHAGEN_IN}
+
+## dictionary of supported cluster file formats
+CLUSTER_FMTS = {FMT_DEFAULT: FMT_CLUSTER_INTERNAL,
+                FMT_MSC: FMT_CLUSTER_MSC,
+                FMT_EDAC: FMT_CLUSTER_EDAC,
+                FMT_XYZ: FMT_CLUSTER_XYZ,
+                FMT_PHAGEN_OUT: FMT_CLUSTER_PHAGEN_OUT,
+                FMT_PHAGEN_IN: FMT_CLUSTER_PHAGEN_IN}
+
+## dictionary of supported cluster field names
+CLUSTER_FIELDS = {FMT_DEFAULT: FIELDS_CLUSTER_INTERNAL,
+                  FMT_MSC: FIELDS_CLUSTER_MSC,
+                  FMT_EDAC: FIELDS_CLUSTER_EDAC,
+                  FMT_XYZ: FIELDS_CLUSTER_XYZ,
+                  FMT_PHAGEN_OUT: FIELDS_CLUSTER_PHAGEN_OUT,
+                  FMT_PHAGEN_IN: FIELDS_CLUSTER_PHAGEN_IN}
+

 class Cluster(object):
    """
@ -84,6 +138,8 @@ class Cluster(object):
    - t coordinate of the atom position
    - z coordinate of the atom position
    - emitter flag
+    - charge/ionicity
+    - scatterer class
    
    the class also defines methods that add or manipulate atoms of the cluster.
    see most importantly the set_rmax, add_atom, add_layer and add_bulk functions.
@ -126,6 +182,8 @@ class Cluster(object):
    #       @arg @c 'y' (float32) t coordinate of the atom position
    #       @arg @c 'z' (float32) z coordinate of the atom position
    #       @arg @c 'e' (uint8)   1 = emitter, 0 = regular atom
+    #       @arg @c 'q' (float32) charge/ionicity
+    #       @arg @c 'c' (int) scatterer class

    ##  @var comment (str)
    #   one-line comment that can be included in some cluster files
@ -152,6 +210,9 @@ class Cluster(object):
        @param cluster: (Cluster) other Cluster object.
        """
        self.data = cluster.data.copy()
+        self.rmax = cluster.rmax
+        self.dtype = cluster.dtype
+        self.comment = cluster.comment

    def set_rmax(self, r):
        """
@ -166,7 +227,7 @@ class Cluster(object):
        """
        self.rmax = r

-    def build_element(self, index, element_number, x, y, z, emitter):
+    def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer=0):
        """
        build a tuple in the format of the internal data array.
        
@ -177,12 +238,16 @@ class Cluster(object):
        @param x, y, z: (float) atom coordinates in the cluster
        
        @param emitter: (int or bool) True = emitter, False = scatterer
+
+        @param charge: (float) ionicity. default = 0
+
+        @param scatterer: (int) scatterer class. default = 0.
        """
        symbol = pt.elements[element_number].symbol
-        element = (index, element_number, symbol, x, y, z, int(emitter))
+        element = (index, element_number, symbol, x, y, z, int(emitter), charge, scatterer)
        return element

-    def add_atom(self, atomtype, v_pos, is_emitter):
+    def add_atom(self, atomtype, v_pos, is_emitter=False, charge=0.):
        """
        add a single atom to the cluster.
        
@ -191,11 +256,15 @@ class Cluster(object):
        @param v_pos: (numpy.ndarray, shape = (3)) position vector
        
        @param is_emitter: (int or bool) True = emitter, False = scatterer
+
+        @param charge: (float) ionicity. default = 0
+
+        @return array index of added atom
        """
        n0 = self.data.shape[0] + 1
-        element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], int(is_emitter))
-        self.data = np.append(self.data, np.array(element,
-            dtype=self.data.dtype))
+        element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter, charge)
+        self.data = np.append(self.data, np.array(element, dtype=self.data.dtype))
+        return n0 - 1

    def add_layer(self, atomtype, v_pos, v_lat1, v_lat2):
        """
@ -290,19 +359,21 @@ class Cluster(object):
        source = cluster.data.copy()

        if check_rmax and source.shape[0] > 0:
-            source_xyz = source[['x', 'y', 'z']].copy()
-            source_xyz = source_xyz.view((source_xyz.dtype[0], len(source_xyz.dtype.names)))
+            source_xyz = cluster.get_positions()
            b_rmax = np.linalg.norm(source_xyz, axis=1) <= self.rmax
            idx = np.where(b_rmax)
            source = source[idx]
        data = np.append(data, source)

        if check_unique and data.shape[0] > 0:
-            data_xyz = data[['x', 'y', 'z']].copy()
-            data_xyz = data_xyz.view((data_xyz.dtype[0], len(data_xyz.dtype.names)))
-            tol_xyz = np.round(data_xyz / tol)
-            uni_xyz = tol_xyz.view(tol_xyz.dtype.descr * 3)
-            _, idx = np.unique(uni_xyz, return_index=True)
+            data_xyz = np.empty((data.shape[0], 3))
+            data_xyz[:, 0] = data['x']
+            data_xyz[:, 1] = data['y']
+            data_xyz[:, 2] = data['z']
+            tol *= 2
+            uni_xyz = np.round(data_xyz / tol)
+            # this requires numpy 1.13 or later
+            _, idx = np.unique(uni_xyz, return_index=True, axis=0)
            data = data[np.sort(idx)]

        self.data = data
@ -322,8 +393,10 @@ class Cluster(object):
            the returned coordinates may not be identical to any atom coordinate of a layer
            but deviate up to the given tolerance.
        """
-        self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
-        z2 = np.round(self_z.copy() / tol)
+        tol *= 2
+        self_z = np.empty(self.data.shape, np.float32)
+        self_z[:] = self.data['z']
+        z2 = np.round(self_z / tol)
        layers = np.unique(z2) * tol
        return layers

@ -338,7 +411,8 @@ class Cluster(object):
            by default (element = 0), all atoms are moved.
        @return: (numpy.ndarray) indices of the atoms that have been shifted.
        """
-        self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
+        self_z = np.empty(self.data.shape, np.float32)
+        self_z[:] = self.data['z']
        b_z = self_z <= z_cut
        b_all = b_z

@ -434,12 +508,18 @@ class Cluster(object):
        """
        find all atoms which occupy a given position.

-        @param pos: (numpy.array, shape = (3)) position vector.
+        @param pos: position vector.
+            this can be a numpy.ndarray with shape (3)
+            or any type where pos[0] represents the x-coordinate, pos[1] y, and pos[2] z.
        
        @param tol: (float) matching tolerance per coordinate.

        @return numpy.array of indices which match v_pos.
        """
+        if isinstance(pos, np.ndarray):
+            assert pos.shape == (3,)
+        else:
+            pos = np.array((pos[0], pos[1], pos[2]))
        b2 = np.abs(pos - self.get_positions()) < tol
        b1 = np.all(b2, axis=1)
        idx = np.where(b1)
@ -463,8 +543,9 @@ class Cluster(object):
        @return numpy.array of indices which match v_pos.
        """
        pos_xy = pos[0:2]
-        self_xy = self.data[['x', 'y']].copy()
-        self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
+        self_xy = np.empty((self.data.shape[0], 2), np.float32)
+        self_xy[:, 0] = self.data['x']
+        self_xy[:, 1] = self.data['y']
        b_xy = np.linalg.norm(self_xy - pos_xy, axis=1) <= r_xy

        pos_z = pos[2]
@ -497,8 +578,9 @@ class Cluster(object):

        @return: None
        """
-        self_xy = self.data[['x', 'y']].copy()
-        self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
+        self_xy = np.empty((self.data.shape[0], 2), np.float32)
+        self_xy[:, 0] = self.data['x']
+        self_xy[:, 1] = self.data['y']
        b_xy = np.linalg.norm(self_xy, axis=1) <= r_xy

        self_z = self.data['z']
@ -545,8 +627,7 @@ class Cluster(object):

        @return: None
        """
-        self_xyz = self.data[['x', 'y', 'z']].copy()
-        self_xyz = self_xyz.view((self_xyz.dtype[0], len(self_xyz.dtype.names)))
+        self_xyz = self.get_positions()
        b_xyz = np.linalg.norm(self_xyz, axis=1) <= radius
        idx = np.where(b_xyz)
        self.data = self.data[idx]
@ -562,7 +643,8 @@ class Cluster(object):

        @return: None
        """
-        coord = self.data[axis].view(np.float32).reshape(self.data.shape)
+        coord = np.empty(self.data.shape, np.float32)
+        coord[:] = self.data[axis]
        sel = np.abs(coord - center) <= depth / 2
        idx = np.where(sel)
        self.data = self.data[idx]
@ -617,15 +699,17 @@ class Cluster(object):

    def get_positions(self):
        """
-        get an array of the atom coordinates.
+        get the atom coordinates in a two-dimensional array.

        the returned array is an independent copy of the original data.
        changes will not affect the original cluster.
        
        @return numpy.ndarray, shape = (N,3)
        """
-        pos = self.data[['x', 'y', 'z']].copy()
-        pos = pos.view((pos.dtype[0], len(pos.dtype.names)))
+        pos = np.empty((self.data.shape[0], 3), np.float32)
+        pos[:, 0] = self.data['x']
+        pos[:, 1] = self.data['y']
+        pos[:, 2] = self.data['z']
        return pos

    def set_positions(self, positions):
@ -689,14 +773,16 @@ class Cluster(object):
        rec = self.data[index]
        return rec['s']

-    def get_emitters(self):
+    def get_emitters(self, fields):
        """
        get a list of all emitters.
-        
-        @return list of tuples (x, y, z, atomtype)
+
+        @param fields: list of field (column) names to return
+
+        @return list of tuples. each tuple contains the values of the requested fields.
        """
        idx = self.data['e'] != 0
-        ems = self.data[['x', 'y', 'z', 't']][idx]
+        ems = self.data[fields][idx]
        return [tuple(em) for em in ems]

    def get_emitter_count(self):
@ -711,10 +797,22 @@ class Cluster(object):
    def load_from_file(self, f, fmt=FMT_DEFAULT):
        """
        load a cluster from a file created by the scattering program.
+
+        the file formats differ in the columns that they contain.
+        only the 'x', 'y', 'z' coordinates are common to all formats.
+        at least one of the 's' and 't' columns must be present.
+        missing columns are initialized as follows.
+
+        @arg 'i': reset to a 1-based sequential index (@ref update_index).
+        @arg 's': derived from the 't' column (@ref update_symbols).
+        @arg 't': derived from the 's' column (@ref update_atomtypes).
+        @arg 'e': set to 0.
+        @arg 'c': set equal to the 't' column (@ref init_atomclasses).
+        @arg 'q': set to 0.
        
-        @param f (string/handle): path name or open file handle of the cluster file.
+        @param f: path name or open file handle of the cluster file.
        
-        @param fmt (int): file format.
+        @param fmt: file format.
            must be one of the FMT_ constants.
            if FMT_DEFAULT, self.file_format is used.
        
@ -735,12 +833,25 @@ class Cluster(object):
            dtype = DTYPE_CLUSTER_XYZ
            fields = FIELDS_CLUSTER_XYZ
            sh = 2
+        elif fmt == FMT_PHAGEN_OUT:
+            dtype = DTYPE_CLUSTER_PHAGEN_OUT
+            fields = FIELDS_CLUSTER_PHAGEN_OUT
+            sh = 1
+        elif fmt == FMT_PHAGEN_IN:
+            dtype = DTYPE_CLUSTER_PHAGEN_IN
+            fields = FIELDS_CLUSTER_PHAGEN_IN
+            sh = 0
+        elif fmt == FMT_PMSCO:
+            dtype = DTYPE_CLUSTER_INTERNAL
+            fields = FIELDS_CLUSTER_INTERNAL
+            sh = 1
        else:
-            dtype = DTYPE_CLUSTER_XYZ
-            fields = FIELDS_CLUSTER_XYZ
-            sh = 2
+            raise ValueError("unknown file format {}".format(fmt))

        data = np.genfromtxt(f, dtype=dtype, skip_header=sh)
+        if fmt == FMT_PHAGEN_IN and data['t'][-1] < 1:
+            data = data[:-1]
+
        self.data = np.empty(data.shape, dtype=self.dtype)
        self.data['x'] = data['x']
        self.data['y'] = data['y']
@ -753,14 +864,23 @@ class Cluster(object):
            self.data['t'] = data['t']
        if 's' in fields:
            self.data['s'] = data['s']
-        else:
+        elif 't' in fields:
            self.update_symbols()
        if 't' not in fields:
-            self.update_atomtypes()
+            if 's' in fields:
+                self.update_atomtypes()
        if 'e' in fields:
            self.data['e'] = data['e']
        else:
            self.data['e'] = 0
+        if 'c' in fields:
+            self.data['c'] = data['c']
+        else:
+            self.data['c'] = 0
+        if 'q' in fields:
+            self.data['q'] = data['q']
+        else:
+            self.data['q'] = 0.

        pos = self.get_positions()
        # note: np.linalg.norm does not accept axis argument in version 1.7
@ -788,6 +908,35 @@ class Cluster(object):
        for atom in self.data:
            atom['t'] = pt.elements.symbol(atom['s'].strip()).number

+    def init_atomclasses(self, field_or_value='t', default_only=False):
+        """
+        initialize atom classes from atom types.
+
+        atom classes identify the atomic scattering potential or scattering factors
+        to be used in the multiple scattering program.
+
+        if the scattering factors are calculated in the PMSCO process (by EDAC or PHAGEN),
+        the atom classes must be set equal to the element type
+        or left at the default value 0 in which case PMSCO sets the correct values.
+
+        if the scattering factors are loaded from existing files,
+        the atom class corresponds to the key of the pmsco.project.Params.phase_files dictionary.
+        in this case the meaning of the class value is up to the project,
+        and the class must be set either by the cluster generator
+        or the project's after_atomic_scattering hook.
+
+        @param field_or_value: name of a cluster data field, e.g. 't', or an integer constant.
+
+        @param default_only: initialize classes only if they are at their default value (0).
+
+        @return None
+        """
+        if not default_only or np.sum(np.abs(self.data['c'])) == 0:
+            if isinstance(field_or_value, str):
+                self.data['c'] = self.data[field_or_value]
+            else:
+                self.data['c'] = field_or_value
+
    def update_index(self):
        """
        update the index column.
@ -795,10 +944,44 @@ class Cluster(object):
        if you have modified the order or number of elements in the self.data array directly,
        you may need to re-index the atoms if your code uses functions that rely on the index. 
        
-        @return: None 
+        @return None
        """
        self.data['i'] = np.arange(1, self.data.shape[0] + 1)

+    def update_atoms(self, clu, fields):
+        """
+        update atom properties from another cluster.
+
+        this method copies selected fields from another cluster.
+        the other cluster must contain the same atoms (same coordinates) in a possibly random order.
+        the atoms of this and the other cluster are matched up by sorting them by coordinate.
+
+        atomic scattering calculators often change the order of atoms in a cluster based on symmetry,
+        and return atom classes versus atomic coordinates.
+        this method allows to import the atom classes into the original cluster.
+
+        the method checks that the other cluster contains the same number of atoms.
+        it does not check that the clusters contain the same atomic positions.
+        linear translations are acceptable.
+
+        @param clu: cluster.Cluster object
+
+        @param fields: subset of field names out of FIELDS_CLUSTER_INTERNAL.
+            'i', 'x', 'y', 'z' are ignored.
+            the set can be specified in any type that converts into a set of strings.
+
+        @return: None
+
+        @raise AssertError if the clusters do not contain the same number of atoms
+        """
+        assert self.data.shape == clu.data.shape
+        fields = set(fields) - {'i', 'x', 'y', 'z'}
+        common_order = ('z', 'y', 'x')
+        index_self = np.argsort(self.data, order=common_order)
+        index_other = np.argsort(clu.data, order=common_order)
+        for field in fields:
+            self.data[field][index_self] = clu.data[field][index_other]
+
    def save_to_file(self, f, fmt=FMT_DEFAULT, comment="", emitters_only=False):
        """
        save the cluster to a file which can be read by the scattering program.
@ -846,10 +1029,21 @@ class Cluster(object):
            file_format = FMT_CLUSTER_XYZ
            fields = FIELDS_CLUSTER_XYZ
            header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
+        elif fmt == FMT_PHAGEN_IN:
+            file_format = FMT_CLUSTER_PHAGEN_IN
+            fields = FIELDS_CLUSTER_PHAGEN_IN
+            header = None
+        elif fmt == FMT_PHAGEN_OUT:
+            file_format = FMT_CLUSTER_PHAGEN_OUT
+            fields = FIELDS_CLUSTER_PHAGEN_OUT
+            header = ""
+        elif fmt == FMT_PMSCO:
+            file_format = FMT_CLUSTER_INTERNAL
+            fields = FIELDS_CLUSTER_INTERNAL
+            names = NAMES_CLUSTER_INTERNAL
+            header = "# " + " ".join([names[field] for field in fields])
        else:
-            file_format = FMT_CLUSTER_XYZ
-            fields = FIELDS_CLUSTER_XYZ
-            header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
+            raise ValueError("unknown file format {}".format(fmt))

        data = data[fields]
        np.savetxt(f, data, fmt=file_format, header=header, comments="")
@ -996,3 +1190,67 @@ class LegacyClusterGenerator(ClusterGenerator):
        redirect the call to the corresponding project method.
        """
        return self.project.create_cluster(model, index)
+
+
+def parse_cli():
+    """
+    parse the command line
+
+    @return: Namespace object created by the argument parser.
+    """
+    import argparse
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="""
+            cluster conversion
+            """)
+    format_choices = ["PMSCO", "MSC", "EDAC", "XYZ", "PHAGEN_OUT", "PHAGEN_IN"]
+    parser.add_argument('input_format',
+                        choices=format_choices,
+                        help="format of input file")
+    parser.add_argument('input_file',
+                        help="path and name of input file")
+    parser.add_argument('output_format',
+                        choices=format_choices,
+                        help="format of output file")
+    parser.add_argument('output_file',
+                        help="path and name of output file")
+
+    args = parser.parse_args()
+
+    return args
+
+
+def convert_cli(args):
+    """
+    convert cluster files from one format into another
+
+    this function is part of the command line interface
+
+    @param args: command line arguments
+
+    @return: None
+    """
+    clu = Cluster()
+    clu.file_format = FMT_PMSCO
+    input_format = globals()["FMT_" + args.input_format.upper()]
+    output_format = globals()["FMT_" + args.output_format.upper()]
+    clu.load_from_file(args.input_file, input_format)
+    clu.save_to_file(args.output_file, output_format)
+
+
+def main_cli():
+    """
+    command line interface to convert cluster files
+
+    see @ref convert_cli.
+
+    @return: None
+    """
+    args = parse_cli()
+    convert_cli(args)
+
+
+if __name__ == '__main__':
+    main_cli()
+    sys.exit(0)
--- a/pmsco/database.py
+++ b/pmsco/database.py
@ -342,6 +342,53 @@ class ResultsDatabase(object):
        where param_id = :param_id and model_id = :model_id 
        """

+    sql_create_tags = """CREATE TABLE IF NOT EXISTS `Tags` (
+        `id` INTEGER PRIMARY KEY,
+        `key` TEXT NOT NULL UNIQUE COLLATE NOCASE
+        )"""
+    sql_insert_tag = "insert into Tags(key) values (:key)"
+    sql_select_tag = "select key from Tags where id=:id"
+    sql_select_tag_key = "select id, key from Tags where key=:key"
+    sql_select_tag_project = """select distinct key, tag_id from Jobs
+        join JobTags on Jobs.id = JobTags.job_id
+        join Tags on Tags.id = JobTags.tag_id
+        where Jobs.project_id = :project_id
+        order by key collate nocase"""
+    sql_select_tag_job = """select distinct key, tag_id from JobTags
+        join Tags on Tags.id = JobTags.tag_id
+        where JobTags.job_id = :job_id
+        order by key collate nocase"""
+
+    sql_create_jobtags = """CREATE TABLE IF NOT EXISTS `JobTags` (
+        `id` INTEGER PRIMARY KEY,
+        `tag_id` INTEGER NOT NULL,
+        `job_id` INTEGER NOT NULL,
+        `value` TEXT COLLATE NOCASE,
+        FOREIGN KEY(tag_id) REFERENCES Tags(id) ON DELETE CASCADE,
+        FOREIGN KEY(job_id) REFERENCES Jobs(id) ON DELETE CASCADE
+        )"""
+    sql_index_jobtags = """create index if not exists 
+        `index_jobtags` ON `JobTags` 
+        (`tag_id`, `job_id`)"""
+    sql_drop_index_jobtags = "drop index if exists index_jobtags"
+    sql_insert_jobtag = """
+        insert into JobTags(tag_id, job_id, value)
+        values (:tag_id, :job_id, :value)
+        """
+    sql_update_jobtag = """
+        update JobTags set value=:value where id=:jobtag_id
+        """
+    sql_select_jobtag_job = """
+        select key, value from JobTags
+        join Tags on JobTags.tag_id = Tags.id
+        where job_id = :job_id
+        """
+    sql_select_jobtag = """
+        select JobTags.id as id, key, value from JobTags
+        join Tags on JobTags.tag_id = Tags.id
+        where tag_id = :tag_id and job_id = :job_id 
+        """
+
    # @var _conn (sqlite3.Connection).
    # connection interface to the database.
    #
@ -391,6 +438,7 @@ class ResultsDatabase(object):
        self.project_id = 0
        self.job_id = 0
        self._model_params = {}
+        self._tags = {}
        self._lock_filename = ""
        self._lock = None

@ -484,9 +532,12 @@ class ResultsDatabase(object):
            self._conn.execute(self.sql_create_results)
            self._conn.execute(self.sql_create_params)
            self._conn.execute(self.sql_create_paramvalues)
+            self._conn.execute(self.sql_create_tags)
+            self._conn.execute(self.sql_create_jobtags)
            self._conn.execute(self.sql_index_results_tasks)
            self._conn.execute(self.sql_index_results_models)
            self._conn.execute(self.sql_index_paramvalues)
+            self._conn.execute(self.sql_index_jobtags)
            self._conn.execute(self.sql_index_models)

    def register_project(self, name, code):
@ -583,6 +634,46 @@ class ResultsDatabase(object):
            param_dict = {'job_id': job_id}
            self._conn.execute(self.sql_delete_job, param_dict)

+    def _query_job_name(self, job_name, project_id=0):
+        """
+        (internal) query a job by name
+
+        this is the internal analog of @ref query_job_name
+        which asserts an acquired lock and open connection.
+
+        @param job_name: name of the job
+
+        @param project_id: project identifier.
+            by default, the current project self.project_id is used.
+
+        @return: id value of the job in the database
+
+        @raise DatabaseError if the job can't be found.
+        """
+        if project_id == 0:
+            project_id = self.project_id
+        param_dict = {'project_id': project_id, 'name': job_name}
+        c = self._conn.execute(self.sql_select_job_name, param_dict)
+        v = c.fetchone()
+        return v[0]
+
+    def query_job_name(self, job_name, project_id=0):
+        """
+        query a job by name
+
+        @param job_name: name of the job
+
+        @param project_id: project identifier.
+            by default, the current project self.project_id is used.
+
+        @return: id value of the job in the database
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            job_id = self._query_job_name(job_name, project_id=project_id)
+
+        return job_id
+
    def register_param(self, key):
        """
        register a parameter key with the database.
@ -681,6 +772,165 @@ class ResultsDatabase(object):

        return params

+    def register_tag(self, key):
+        """
+        register a tag with the database.
+
+        tags are a way of structuring a job description.
+        they can be used to, for instance, distinguish calculations made with different clusters,
+        different experimental data, etc.
+        a job tag has a key and a value, and is associated to a job.
+        the use of tags is up to the user. pmsco does not change or read them.
+
+        each tag name must be registered once before a value can be written to the database.
+        see the class description for an explanation.
+
+        @param key: key (name) of the tag.
+
+        @return: id value of the tag in the database.
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            return self._register_tag(key)
+
+    def _register_tag(self, key):
+        """
+        register a tag with the database without committing the transaction.
+
+        @note this method does not lock the database file and does not commit.
+            to lock the database and commit the transaction, call the public method register_tag().
+
+        @param key: key (name) of the tag.
+
+        @return: id value of the tag in the database.
+        """
+        c = self._conn.execute(self.sql_select_tag_key, {'key': key})
+        v = c.fetchone()
+        if v:
+            tag_id = v[0]
+        else:
+            c = self._conn.execute(self.sql_insert_tag, {'key': key})
+            tag_id = c.lastrowid
+        self._tags[key] = tag_id
+        return tag_id
+
+    def register_tags(self, tags):
+        """
+        register the tags of this project with the database.
+
+        each tag name must be registered once before a value can be written to the database.
+        see the class description for an explanation.
+
+        @param tags: sequence of tag keys, or dictionary of tags.
+        @return: None
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            for key in tags:
+                self._register_tag(key)
+
+    def query_tags(self, project_id=0, job_id=0, update_registry=False):
+        """
+        query a list of tag keys used in a project or job.
+
+        optionally, the local registry can be updated with the results of the query.
+        this should be done if the database is read only and the client does not know the tag names.
+        see the class description for a description of the registry.
+
+        @note this method returns the tags that are used with jobs in the database.
+            if you have registered additional tags but not attached them to jobs,
+            this method will _not_ list them.
+
+        @param project_id: project identifier.
+            by default, the current project self.project_id is used.
+
+        @param job_id: job identifier.
+            by default, all jobs of the selected project are included in the query.
+            if a job is specified, the project_id parameter is ignored.
+
+        @param update_registry: update the local tags registry (self._tags).
+            with the query results.
+
+        @return: dictionary of tags.
+            the keys are the tag names, the values are the tag ids in the database.
+        """
+        if project_id == 0:
+            project_id = self.project_id
+        if job_id == 0:
+            sql = self.sql_select_tag_project
+            args = {'project_id': project_id}
+        else:
+            sql = self.sql_select_tag_job
+            args = {'job_id': job_id}
+
+        tags = {}
+        self.check_connection()
+        with self._lock, self._conn:
+            c = self._conn.execute(sql, args)
+            for row in c:
+                tags[row['key']] = row['tag_id']
+
+        if update_registry:
+            self._tags.update(tags)
+
+        return tags
+
+    def query_job_tags(self, job_id):
+        """
+        query a list of tags (keys and values) associated with a job.
+
+        @param job_id: job identifier.
+
+        @return: dictionary of tags.
+            the keys are the tag names, the values are the tag values.
+        """
+        sql = self.sql_select_jobtag_job
+        args = {'job_id': job_id}
+
+        tags = {}
+        self.check_connection()
+        with self._lock, self._conn:
+            c = self._conn.execute(sql, args)
+            for row in c:
+                tags[row['key']] = row['value']
+
+        return tags
+
+    def insert_jobtags(self, job_id, tags):
+        """
+        add or update job tags in the database.
+
+        the method updates the JobTags table.
+
+        @param job_id: (int) primary key of the job entry in the Jobs table.
+            the entry must exist.
+
+        @param tags: (dict) dictionary containing the tags.
+            keys are matched or added to the Tags table,
+            values are added to the JobTags table and linked to the job and tag key.
+
+        @return: None
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            for key, value in tags.items():
+                try:
+                    tag_id = self._tags[key]
+                except KeyError:
+                    tag_id = self._register_tag(key)
+                    v = None
+                else:
+                    jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
+                    c = self._conn.execute(self.sql_select_jobtag, jobtag_entry)
+                    v = c.fetchone()
+
+                if v:
+                    jobtag_entry = {'jobtag_id': v[0], 'tag_id': tag_id, 'job_id': job_id, 'value': value}
+                    self._conn.execute(self.sql_update_jobtag, jobtag_entry)
+                else:
+                    jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
+                    self._conn.execute(self.sql_insert_jobtag, jobtag_entry)
+
    def create_models_view(self, job_id=0, temporary=False):
        """
        create a flat (pivot) view of model parameters of the current project or job.
@ -878,7 +1128,7 @@ class ResultsDatabase(object):
            results = c.fetchall()

            names = [desc[0] for desc in c.description]
-            dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names)])
+            dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names, key=str.lower)])
            out_array = np.zeros((count,), dtype=dt)
            for idx, row in enumerate(results):
                for name in names:
@ -942,6 +1192,70 @@ class ResultsDatabase(object):

        return out_array

+    def query_best_models_per_jobs(self, job_ids=None, task_level='model'):
+        """
+        return the best model (by rfac) of each selected job
+
+        the query gathers the R-factors of the selected jobs at the selected task levels
+        and, for each job, returns the (database) model id where the lowest R-factor is reported
+        among the gathered results.
+
+        this can be useful if you want to compile a report of the best model per job.
+
+        @param job_ids: iterable of job ids to include in the query.
+            the job ids must belong to the current project.
+            if empty or non-specified, all jobs of the current project are included.
+
+        @param task_level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
+            deepest task_level to include in the query.
+            results on deeper levels are not considered.
+            e.g. if you pass 'scan', R-factors of individual scans are included in the query.
+            note that including deeper levels will not increase the number of results returned.
+
+        @return sequence of model_id.
+            the number of results corresponds to the number of jobs in the filter scope.
+            to find out details of the models, execute another query that filters on these model ids.
+
+        the method produces an SQL query similar to:
+        @code{.sql}
+        select Models.id from Models
+        join Results on Models.id = Results.model_id
+        join Jobs on Models.job_id = Jobs.id
+        where scan=-1
+        and project_id=1
+        and job_id in (1,2,3)
+        group by Models.job_id
+        having min(rfac)
+        order by rfac
+        @endcode
+        """
+
+        try:
+            level = dispatch.CALC_LEVELS.index(task_level) + 1
+        except ValueError:
+            level = task_level + 1
+        try:
+            level_name = dispatch.CALC_LEVELS[level]
+        except IndexError:
+            level_name = dispatch.CALC_LEVELS[4]
+
+        self.check_connection()
+        with self._lock, self._conn:
+            sql = "select Models.id from Models "
+            sql += "join Results on Models.id = Results.model_id "
+            sql += "join Jobs on Models.job_id = Jobs.id "
+            sql += "where project_id = {0} ".format(self.project_id)
+            sql += "and {0} = -1 ".format(level_name)
+            if job_ids:
+                sql += "and Models.job_id in ({0}) ".format(",".join(map(str, job_ids)))
+            sql += "group by Models.job_id "
+            sql += "having min(rfac) "
+            sql += "order by rfac, job_id, model, scan, sym, emit, region "
+            c = self._conn.execute(sql)
+            models = [row['id'] for row in c]
+
+        return models
+
    def query_tasks(self, job_id=0):
        """
        query the task index used in a calculation job.
@ -1213,13 +1527,18 @@ class ResultsDatabase(object):

        data = np.genfromtxt(filename, names=True)
        self.register_params(data.dtype.names)
-        unique_models, unique_index = np.unique(data['_model'], True)
+        try:
+            unique_models, unique_index = np.unique(data['_model'], True)
+        except ValueError:
+            unique_models = np.array([0])
+            unique_index = np.array([0])
        unique_data = data[unique_index]
        model_ids = {}

        def model_entry_generator():
            for result in unique_data:
                model_entry = {'job_id': job_id,
+                               'model': unique_models[0],
                               'gen': None,
                               'particle': None}
                model_entry.update(special_params(result))
@ -1227,7 +1546,11 @@ class ResultsDatabase(object):

        def result_entry_generator():
            for result in data:
-                result_entry = {'model_id': model_ids[result['_model']],
+                try:
+                    model = result['_model']
+                except ValueError:
+                    model = unique_models[0]
+                result_entry = {'model_id': model_ids[model],
                                'scan': -1,
                                'sym': -1,
                                'emit': -1,
@ -1238,8 +1561,12 @@ class ResultsDatabase(object):

        def param_entry_generator():
            for result in unique_data:
+                try:
+                    model = result['_model']
+                except ValueError:
+                    model = unique_models[0]
                for key, value in regular_params(result).items():
-                    param_entry = {'model_id': model_ids[result['_model']],
+                    param_entry = {'model_id': model_ids[model],
                                   'param_id': self._model_params[key],
                                   'value': value}
                    yield param_entry
--- a/pmsco/dispatch.py
+++ b/pmsco/dispatch.py
@ -227,7 +227,7 @@ class CalculationTask(object):
    #  files generated by the task and their category
    #
    #  dictionary key is the file name,
-    #  value is the file category, e.g. 'cluster', 'phase', etc.
+    #  value is the file category, e.g. 'cluster', 'atomic', etc.
    #
    #  this information is used to automatically clean up unnecessary data files.

@ -374,7 +374,7 @@ class CalculationTask(object):
        this information is used to automatically clean up unnecessary data files.

        @param name: file name (optionally including a path).
-        @param category: file category, e.g. 'cluster', 'phase', etc.
+        @param category: file category, e.g. 'cluster', 'atomic', etc.
        @return: None
        """
        self.files[name] = category
@ -521,7 +521,8 @@ class MscoProcess(object):
    def __init__(self, comm):
        self._comm = comm
        self._project = None
-        self._calculator = None
+        self._atomic_scattering = None
+        self._multiple_scattering = None
        self._running = False
        self._finishing = False
        self.stop_signal = False
@ -529,7 +530,8 @@ class MscoProcess(object):

    def setup(self, project):
        self._project = project
-        self._calculator = project.calculator_class()
+        self._atomic_scattering = project.atomic_scattering_factory()
+        self._multiple_scattering = project.multiple_scattering_factory()
        self._running = False
        self._finishing = False
        self.stop_signal = False
@ -596,19 +598,18 @@ class MscoProcess(object):
        scan = self._define_scan(task)
        output_file = task.format_filename(ext="")

-        # check parameters and call the msc program
-        if clu.get_atom_count() < 2:
+        # check parameters and call the calculators
+        if clu.get_atom_count() >= 1:
+            self._calc_atomic(task, par, clu, scan, output_file)
+        else:
            logger.error("empty cluster in calculation %s", s_id)
            task.result_valid = False
-        elif clu.get_emitter_count() < 1:
+
+        if clu.get_emitter_count() >= 1:
+            self._calc_multiple(task, par, clu, scan, output_file)
+        else:
            logger.error("no emitters in cluster of calculation %s.", s_id)
            task.result_valid = False
-        else:
-            task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
-            (root, ext) = os.path.splitext(task.result_filename)
-            task.file_ext = ext
-            task.result_valid = True
-            task.files.update(files)

        task.time = datetime.datetime.now() - start_time

@ -666,6 +667,8 @@ class MscoProcess(object):
        """
        nem = self._project.cluster_generator.count_emitters(task.model, task.id)
        clu = self._project.cluster_generator.create_cluster(task.model, task.id)
+        # overwrite atom classes only if they are at their default value
+        clu.init_atomclasses(field_or_value='t', default_only=True)

        if task.id.region == 0:
            file_index = task.id._replace(region=-1)
@ -696,6 +699,59 @@ class MscoProcess(object):

        return par

+    def _calc_atomic(self, task, par, clu, scan, output_file):
+        """
+        calculate the atomic scattering factors if necessary and link them to the cluster.
+
+        the method first calls the `before_atomic_scattering` project hook,
+        the atomic scattering calculator,
+        and finally the `after_atomic_scattering` hook.
+        this process updates the par and clu objects to link to the created files.
+        if any of the functions returns None, the par and clu objects are left unchanged.
+
+        @param task: CalculationTask with all attributes set for the calculation.
+
+        @param par: pmsco.project.Params object for the calculator.
+            its phase_files attribute is updated with the created scattering files.
+            the radial matrix elements are not changed (but may be in a future version).
+
+        @param clu: pmsco.cluster.Cluster object for the calculator.
+            the cluster is overwritten with the one returned by the calculator,
+            so that atom classes match the phase_files.
+
+        @return: None
+        """
+        _par = copy.deepcopy(par)
+        _clu = copy.deepcopy(clu)
+
+        _par, _clu = self._project.before_atomic_scattering(task, _par, _clu)
+        if _clu is not None:
+            filename, files = self._atomic_scattering.run(_par, _clu, scan, output_file)
+            if files:
+                task.files.update(files)
+
+                _par, _clu = self._project.after_atomic_scattering(task, _par, _clu)
+                if _clu is not None:
+                    par.phase_files = _par.phase_files
+                    clu.copy_from(_clu)
+
+    def _calc_multiple(self, task, par, clu, scan, output_file):
+        """
+        calculate the multiple scattering intensity.
+
+        @param task: CalculationTask with all attributes set for the calculation.
+        @param par: pmsco.project.Params object for the calculator.
+        @param clu: pmsco.cluster.Cluster object for the calculator.
+        @return: None
+        """
+        task.result_filename, files = self._multiple_scattering.run(par, clu, scan, output_file)
+        if task.result_filename:
+            (root, ext) = os.path.splitext(task.result_filename)
+            task.file_ext = ext
+            task.result_valid = True
+        if files:
+            task.files.update(files)
+

 class MscoMaster(MscoProcess):
    """
@ -1025,19 +1081,19 @@ class MscoMaster(MscoProcess):
        @return: self._finishing
        """
        if not self._finishing and (self._model_done and not self._pending_tasks and not self._running_tasks):
-            logger.info("finish: model handler is done")
+            logger.warning("finish: model handler is done")
            self._finishing = True
        if not self._finishing and (self._calculations >= self.max_calculations):
            logger.warning("finish: max. calculations (%u) exeeded", self.max_calculations)
            self._finishing = True
        if not self._finishing and self.stop_signal:
-            logger.info("finish: stop signal received")
+            logger.warning("finish: stop signal received")
            self._finishing = True
        if not self._finishing and (datetime.datetime.now() > self.datetime_limit):
            logger.warning("finish: time limit exceeded")
            self._finishing = True
        if not self._finishing and os.path.isfile("finish_pmsco"):
-            logger.info("finish: finish_pmsco file detected")
+            logger.warning("finish: finish_pmsco file detected")
            self._finishing = True

        if self._finishing and not self._running_slaves and not self._running_tasks:
--- a/pmsco/files.py
+++ b/pmsco/files.py
@ -27,9 +27,9 @@ logger = logging.getLogger(__name__)
 #
 # each string of this set marks a category of files.
 #
-# @arg @c 'input' :     raw input files for calculator, including cluster and phase files in custom format
+# @arg @c 'input' :     raw input files for calculator, including cluster and atomic files in custom format
 # @arg @c 'output' :    raw output files from calculator
-# @arg @c 'phase' :     phase files in portable format for report
+# @arg @c 'atomic' :    atomic scattering (phase, emission) files in portable format
 # @arg @c 'cluster' :   cluster files in portable XYZ format for report
 # @arg @c 'log' :       log files
 # @arg @c 'debug' :     debug files
@ -47,7 +47,7 @@ logger = logging.getLogger(__name__)
 # the string is used only to specify whether bad models should be deleted or not.
 # if so, all files related to bad models are deleted, regardless of their static category.
 #
-FILE_CATEGORIES = {'cluster', 'phase', 'input', 'output',
+FILE_CATEGORIES = {'cluster', 'atomic', 'input', 'output',
                   'report', 'region', 'emitter', 'scan', 'symmetry', 'model',
                   'log', 'debug', 'population', 'rfac'}

--- a/pmsco/graphics/scan.py
+++ b/pmsco/graphics/scan.py
@ -36,7 +36,7 @@ except ImportError:
    logger.warning("error importing matplotlib. graphics rendering disabled.")


-def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
+def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False, ref_data=None):
    """
    produce a graphics file from a one-dimensional scan file.

@ -47,14 +47,22 @@ def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):

    @param filename: path and name of the scan file.
        this is used to derive the output file path by adding the extension of the graphics file format.
+
    @param data: numpy-structured array of EI, ETPI or ETPAI data.
+
    @param scan_mode: list containing the field name of the scanning axis of the data array.
        it must contain one element exactly.
+
    @param canvas: a FigureCanvas class reference from a matplotlib backend.
        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
+
    @param is_modf: whether data contains a modulation function (True) or intensity (False, default).
        this parameter is used to set axis labels.

+    @param ref_data: numpy-structured array of EI, ETPI or ETPAI data.
+        this is reference data (e.g. experimental data) that should be plotted with the main dataset.
+        both datasets will be plotted on the same axis and should have similar data range.
+
    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.

@ -66,6 +74,8 @@ def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
    canvas(fig)

    ax = fig.add_subplot(111)
+    if ref_data is not None:
+        ax.plot(ref_data[scan_mode[0]], ref_data['i'], 'k.')
    ax.plot(data[scan_mode[0]], data['i'])

    ax.set_xlabel(scan_mode[0])
@ -225,7 +235,7 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
    return out_filename


-def render_scan(filename, data=None):
+def render_scan(filename, data=None, ref_data=None):
    """
    produce a graphics file from a scan file.

@ -248,6 +258,11 @@ def render_scan(filename, data=None):
    @param data: numpy-structured array of ETPI or ETPAI data.
        if this argument is omitted, the data is loaded from the file referenced by the filename argument.

+    @param ref_data: numpy-structured array of ETPI or ETPAI data.
+        this is reference data (e.g. experimental data) that should be plotted with the main dataset.
+        this is supported for 1d scans only.
+        both datasets will be plotted on the same axis and should have similar data range.
+
    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.
    """
@ -258,7 +273,7 @@ def render_scan(filename, data=None):

    try:
        if len(scan_mode) == 1:
-            out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf)
+            out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf, ref_data=ref_data)
        elif len(scan_mode) == 2 and 'e' in scan_mode:
            out_filename = render_ea_scan(filename, data, scan_mode, is_modf=is_modf)
        elif len(scan_mode) == 2 and 't' in scan_mode and 'p' in scan_mode:
--- a/pmsco/handlers.py
+++ b/pmsco/handlers.py
@ -344,7 +344,6 @@ class SingleModelHandler(ModelHandler):
        modf_ext = ".modf" + parent_task.file_ext
        parent_task.modf_filename = parent_task.file_root + modf_ext

-        assert not math.isnan(task.rfac)
        self.result = task.model.copy()
        self.result['_rfac'] = task.rfac

@ -629,7 +628,8 @@ class SymmetryHandler(TaskHandler):
                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
-                graph_file = mgs.render_scan(parent_task.modf_filename)
+                graph_file = mgs.render_scan(parent_task.modf_filename,
+                                             ref_data=self._project.scans[parent_task.id.scan].modulation)
                self._project.files.add_file(graph_file, parent_task.id.model, 'scan')

            del self._pending_ids_per_parent[parent_task.id]
@ -752,7 +752,8 @@ class EmitterHandler(TaskHandler):
                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
-                graph_file = mgs.render_scan(parent_task.modf_filename)
+                graph_file = mgs.render_scan(parent_task.modf_filename,
+                                             ref_data=self._project.scans[parent_task.id.scan].modulation)
                self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')

            del self._pending_ids_per_parent[parent_task.id]
--- a/pmsco/igor.py
+++ b/pmsco/igor.py
@ -0,0 +1,143 @@
+"""
+@package pmsco.igor
+data exchange with wavemetrics igor pro.
+
+this module provides functions for loading/saving pmsco data in igor pro.
+
+@author Matthias Muntwiler
+
+@copyright (c) 2019 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from pmsco.compat import open
+
+
+def _escape_igor_string(s):
+    s = s.replace('\\', '\\\\')
+    s = s.replace('"', '\\"')
+    return s
+
+
+def namefix_double(name):
+    """
+    fix 1-character wave name by doubling
+
+    replaces length-1 string by a doubled version.
+
+    @param name: (str) proposed wave name
+
+    @return: corrected name
+    """
+    return name*2 if len(name) == 1 else name
+
+
+def namefix_etpais(name):
+    """
+    fix 1-character wave name according to ETPAIS scheme
+
+    replaces 'e' by 'en' etc.
+
+    @param name: (str) proposed wave name
+
+    @return: corrected name
+    """
+    name_map = {'e': 'en', 't': 'th', 'p': 'ph', 'i': 'in', 'm': 'mo', 's': 'si'}
+    try:
+        return name_map[name]
+    except KeyError:
+        return name
+
+
+class IgorExport(object):
+    """
+    class exports pmsco data to an Igor text (ITX) file.
+
+    usage:
+    1) create an object instance.
+    2) set @ref data.
+    3) set optional attributes: @ref prefix and @ref namefix.
+    4) call @ref export.
+    """
+
+    def __init__(self):
+        super(IgorExport, self).__init__()
+        self.data = None
+        self.prefix = ""
+        self.namefix = namefix_double
+
+    def set_data(self, data):
+        """
+        set the data array to export.
+
+        this must (currently) be a one-dimensional structured array.
+        the column names will become wave names.
+
+        @param data: numpy.ndarray
+        @return:
+        """
+        self.data = data
+
+    def export(self, filename):
+        """
+        write to igor file.
+        """
+        with open(filename, 'w') as f:
+            self._write_header(f)
+            self._write_data(f)
+
+    def _fix_name(self, name):
+        """
+        fix a wave name.
+
+        this function first applies @ref namefix and @ref prefix to the proposed wave name.
+
+        @param name: (str) proposed wave name
+
+        @return: corrected name
+        """
+        if self.namefix is not None:
+            name = self.namefix(name)
+        return self.prefix + name
+
+    def _write_header(self, f):
+        """
+        write the header of the igor text file
+
+        @param f: open file or stream
+
+        @return: None
+        """
+        f.write('IGOR' + '\n')
+        f.write('X // pmsco data export\n')
+
+    def _write_data(self, f):
+        """
+        write a data section to the igor text file.
+
+        @param f: open file or stream
+
+        @return: None
+        """
+        assert isinstance(self.data, np.ndarray)
+        assert len(self.data.shape) == 1
+        assert len(self.data.dtype.names[0]) >= 1
+
+        arr = self.data
+        shape = ",".join(map(str, arr.shape))
+        names = (self._fix_name(name) for name in arr.dtype.names)
+        names = ", ".join(names)
+
+        f.write('Waves/O/D/N=({shape}) {names}\n'.format(shape=shape, names=names))
+        f.write('BEGIN\n')
+        np.savetxt(f, arr, fmt='%g')
+        f.write('END\n')
--- a/pmsco/makefile
+++ b/pmsco/makefile
@ -1,15 +1,16 @@
 SHELL=/bin/sh

-# makefile for EDAC, MSC, and MUFPOT programs and modules
+# makefile for external programs and modules
 #
 # see the top-level makefile for additional information.

-.PHONY: all clean edac loess msc mufpot
+.PHONY: all clean edac loess msc mufpot phagen

 EDAC_DIR = edac
 MSC_DIR = msc
 MUFPOT_DIR = mufpot
 LOESS_DIR = loess
+PHAGEN_DIR = calculators/phagen

 all: edac loess

@ -25,9 +26,13 @@ msc:
 mufpot:
 	$(MAKE) -C $(MUFPOT_DIR)

+phagen:
+	$(MAKE) -C $(PHAGEN_DIR)
+
 clean:
 	$(MAKE) -C $(EDAC_DIR) clean
 	$(MAKE) -C $(LOESS_DIR) clean
 	$(MAKE) -C $(MSC_DIR) clean
 	$(MAKE) -C $(MUFPOT_DIR) clean
+	$(MAKE) -C $(PHAGEN_DIR) clean
 	rm -f *.pyc
--- a/pmsco/optimizers/grid.py
+++ b/pmsco/optimizers/grid.py
@ -344,9 +344,14 @@ class GridSearchHandler(handlers.ModelHandler):
        time_pending += self._model_time
        if time_pending > time_avail:
            self._timeout = True
+            logger.warning("time limit reached")
+
+        if self._invalid_count > self._invalid_limit:
+            self._timeout = True
+            logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)

        model = self._next_model
-        if not self._timeout and model < self._pop.model_count and self._invalid_count < self._invalid_limit:
+        if not self._timeout and model < self._pop.model_count:
            new_task = parent_task.copy()
            new_task.parent_id = parent_id
            pos = self._pop.positions[model]
--- a/pmsco/optimizers/population.py
+++ b/pmsco/optimizers/population.py
@ -491,9 +491,9 @@ class Population(object):
        seed = np.genfromtxt(seed_file, names=True)
        try:
            seed = seed[seed['_rfac'] <= rfac_limit]
-        except KeyError:
-            logger.warning(BMsg("missing _rfac column in seed file {hf}. ignoring seed file.", hf=seed_file))
-            return 0
+        except ValueError:
+            recalc_seed = True
+            logger.warning(BMsg("missing _rfac column in seed file {hf}. re-calculating.", hf=seed_file))
        else:
            seed.sort(order='_rfac')
        seed_size = min(seed.shape[0], count_limit)
@ -508,9 +508,12 @@ class Population(object):
        if len(common_fields) < len(dest_fields):
            logger.warning(BMsg("missing columns in seed file {hf}.", hf=seed_file))

-        logger.info(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))
+        logger.warning(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))

-        self.pos['_rfac'][first:last] = seed['_rfac']
+        try:
+            self.pos['_rfac'][first:last] = seed['_rfac']
+        except ValueError:
+            self.pos['_rfac'][first:last] = 2.1
        dest_index = np.arange(first, last)
        for name in common_fields:
            sel1 = np.less_equal(self.model_min[name], seed[name])
@ -525,11 +528,11 @@ class Population(object):
        self.pos['_model'][first:last] = np.arange(seed_size) + first
        if recalc_seed:
            self.pos['_rfac'][first:last] = 2.1
-            logger.info("models from seed file are re-calculated.")
+            logger.warning("models from seed file are re-calculated.")
        else:
            sel = self.pos['_rfac'][first:last] <= rfac_limit
            self.pos['_gen'][dest_index[sel]] = -1
-            logger.info(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))
+            logger.warning(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))

        return seed_size

@ -579,7 +582,7 @@ class Population(object):
        if len(common_fields) < len(dest_fields):
            logger.warning(BMsg("loaded patch file {pf}. some columns are missing.", pf=patch_file))
        else:
-            logger.info(BMsg("loaded patch file {pf}.", pf=patch_file))
+            logger.warning(BMsg("loaded patch file {pf}.", pf=patch_file))

    def _apply_patch(self):
        """
@ -592,7 +595,7 @@ class Population(object):
        parameter values that lie outside the parameter domain (min/max) are ignored.
        """
        if self.pos_patch is not None:
-            logger.info(BMsg("patching the population with new positions."))
+            logger.warning(BMsg("patching generation {gen} with new positions.", gen=self.generation))
            source_fields = set(self.pos_patch.dtype.names)
            dest_fields = set(self.model_start.keys())
            common_fields = source_fields & dest_fields
@ -996,10 +999,10 @@ class Population(object):

        # rewrite model, tolerance and results as two-dimensional array
        if search_array is None:
-            results = self.results[names].copy()
-        else:
-            results = search_array[names].copy()
-        results = results.view((results.dtype[0], len(names)))
+            search_array = self.results
+        results = np.empty((search_array.shape[0], len(names)))
+        for col, name in enumerate(names):
+            results[:, col] = search_array[name]
        model = np.asarray(model_tuple, results.dtype)
        tol = np.asarray([max(abs(self.model_max[name]), abs(self.model_min[name]), precision)
                          for name in names])
@ -1169,8 +1172,9 @@ class PopulationHandler(handlers.ModelHandler):
        """
        initialize the particle swarm and open an output file.

-        the population size is set to project.optimizer_params.['pop_size'] if it is defined and greater than 4.
-        otherwise, it defaults to <code>max(2 * slots, 4)</code>.
+        the population size is set to `project.optimizer_params['pop_size']`
+        if it is defined and greater than 4.
+        otherwise, it defaults to `max(slots, 4)`.

        for good efficiency the population size (number of particles) should be
        greater or equal to the number of available processing slots,
@ -1191,7 +1195,9 @@ class PopulationHandler(handlers.ModelHandler):
        super(PopulationHandler, self).setup(project, slots)

        _min_size = 4
-        self._pop_size = max(project.optimizer_params.get('pop_size', self._slots * 2), _min_size)
+        _def_size = self._slots
+        _req_size = project.optimizer_params.get('pop_size', 0)
+        self._pop_size = _req_size if _req_size >= _min_size else _def_size
        self.setup_population()
        self._invalid_limit = self._pop_size * 10

@ -1228,7 +1234,11 @@ class PopulationHandler(handlers.ModelHandler):
        because the best peer position in the generation may not be known yet.
        the effect can be reduced by making the population larger than the number of processes.

-        @return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
+        the created tasks are returned as the function result and added to self._pending_tasks.
+
+        @return list of generated tasks.
+            empty list if the optimization has converged (see Population.is_converged())
+            or if the time limit is approaching.
        """

        super(PopulationHandler, self).create_tasks(parent_task)
@ -1241,7 +1251,7 @@ class PopulationHandler(handlers.ModelHandler):
        time_pending = self._model_time * len(self._pending_tasks)
        time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)

-        out_tasks = []
+        new_tasks = []
        if not self._timeout and not self._converged:
            self._check_patch_file()
            self._pop.advance_population()
@ -1250,7 +1260,8 @@ class PopulationHandler(handlers.ModelHandler):
                time_pending += self._model_time
                if time_pending > time_avail:
                    self._timeout = True
-                    logger.info("time limit reached")
+                    logger.warning("time limit reached")
+                    new_tasks = []
                    break

                if pos['_gen'] >= 0:
@ -1258,12 +1269,12 @@ class PopulationHandler(handlers.ModelHandler):
                    new_task.parent_id = parent_id
                    new_task.model = pos
                    new_task.change_id(model=pos['_model'])
+                    new_tasks.append(new_task)

-                    child_id = new_task.id
-                    self._pending_tasks[child_id] = new_task
-                    out_tasks.append(new_task)
+        for task in new_tasks:
+            self._pending_tasks[task.id] = task

-        return out_tasks
+        return new_tasks

    def _check_patch_file(self):
        """
@ -1323,7 +1334,7 @@ class PopulationHandler(handlers.ModelHandler):

        if task.result_valid:
            if self._pop.is_converged() and not self._converged:
-                logger.info("population converged")
+                logger.warning("population converged")
                self._converged = True

            if task.time > self._model_time:
--- a/pmsco/pmsco.py
+++ b/pmsco/pmsco.py
@ -46,8 +46,6 @@ import sys

 from mpi4py import MPI

-import pmsco.calculators.calculator as calculator
-import pmsco.cluster as cluster
 import pmsco.dispatch as dispatch
 import pmsco.files as files
 import pmsco.handlers as handlers
@ -154,12 +152,6 @@ def set_common_args(project, args):
    if args.table_file:
        project.optimizer_params['table_file'] = args.table_file

-    code = args.code.lower()
-    if code in {'edac', 'msc', 'test'}:
-        project.code = code
-    else:
-        logger.error("invalid code argument")
-
    if args.time_limit:
        project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit))

@ -178,38 +170,6 @@ def set_common_args(project, args):
        project.keep_best = args.keep_best


-def log_project_args(project):
-    """
-    send some common project arguments to the log.
-
-    @param project: project instance (sub-class of pmsco.project.Project).
-    @return: None
-    """
-    try:
-        logger.info("scattering code: {0}".format(project.code))
-        logger.info("optimization mode: {0}".format(project.mode))
-        try:
-            logger.info("minimum population size: {0}".format(project.optimizer_params['pop_size']))
-        except KeyError:
-            pass
-        try:
-            logger.info("seed file: {0}".format(project.optimizer_params['seed_file']))
-            logger.info("seed limit: {0}".format(project.optimizer_params['seed_limit']))
-        except KeyError:
-            pass
-        try:
-            logger.info("table file: {0}".format(project.optimizer_params['table_file']))
-        except KeyError:
-            pass
-        logger.info("data directory: {0}".format(project.data_dir))
-        logger.info("output file: {0}".format(project.output_file))
-
-        _files_to_keep = files.FILE_CATEGORIES - project.files.categories_to_delete
-        logger.info("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
-    except AttributeError:
-        logger.warning("AttributeError in log_project_args")
-
-
 def run_project(project):
    """
    run a calculation project.
@ -217,7 +177,11 @@ def run_project(project):
    @param project:
    @return:
    """
-    log_project_args(project)
+    # log project arguments only in rank 0
+    mpi_comm = MPI.COMM_WORLD
+    mpi_rank = mpi_comm.Get_rank()
+    if mpi_rank == 0:
+        project.log_project_args()

    optimizer_class = None
    if project.mode == 'single':
@ -240,26 +204,7 @@ def run_project(project):

    project.handler_classes['region'] = handlers.choose_region_handler_class(project)

-    calculator_class = None
-    if project.code == 'edac':
-        logger.debug("importing EDAC interface")
-        from pmsco.calculators import edac
-        project.cluster_format = cluster.FMT_EDAC
-        calculator_class = edac.EdacCalculator
-    elif project.code == 'msc':
-        logger.debug("importing MSC interface")
-        from pmsco.calculators import msc
-        project.cluster_format = cluster.FMT_MSC
-        calculator_class = msc.MscCalculator
-    elif project.code == 'test':
-        logger.debug("importing TEST interface")
-        project.cluster_format = cluster.FMT_EDAC
-        calculator_class = calculator.TestCalculator
-    else:
-        logger.error("invalid code argument")
-    project.calculator_class = calculator_class
-
-    if project and optimizer_class and calculator_class:
+    if project and optimizer_class:
        logger.info("starting calculations")
        try:
            dispatch.run_calculations(project)
@ -286,7 +231,7 @@ class Args(object):
    values as the command line parser.
    """

-    def __init__(self, mode="single", code="edac", output_file="pmsco_data"):
+    def __init__(self, mode="single", output_file="pmsco_data"):
        """
        constructor.
        
@ -299,8 +244,7 @@ class Args(object):
        self.pop_size = 0
        self.seed_file = ""
        self.seed_limit = 0
-        self.code = code
-        self.data_dir = os.getcwd()
+        self.data_dir = ""
        self.output_file = output_file
        self.time_limit = 24.0
        self.keep_files = files.FILE_CATEGORIES_TO_KEEP
@ -319,6 +263,7 @@ def get_cli_parser(default_args=None):
    KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}

    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="""
        multiple-scattering calculations and optimization

@ -349,7 +294,7 @@ def get_cli_parser(default_args=None):
                        help='calculation mode')
    parser.add_argument('--pop-size', type=int, default=default_args.pop_size,
                        help='population size (number of particles) in swarm or genetic optimization mode. ' +
-                        'default is the greater of 4 or two times the number of calculation processes.')
+                        'default is the greater of 4 or the number of calculation processes.')
    parser.add_argument('--seed-file',
                        help='path and name of population seed file. ' +
                        'population data of previous optimizations can be used to seed a new optimization. ' +
@ -357,14 +302,11 @@ def get_cli_parser(default_args=None):
    parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit,
                        help='maximum number of models to use from the seed file. ' +
                        'the models with the best R-factors are selected.')
-    parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default=default_args.code,
-                        help='scattering code (default: edac)')
    parser.add_argument('-d', '--data-dir', default=default_args.data_dir,
                        help='directory path for experimental data files (if required by project). ' +
                             'default: working directory')
    parser.add_argument('-o', '--output-file', default=default_args.output_file,
-                        help='base path for intermediate and output files.' +
-                             'default: pmsco_data')
+                        help='base path for intermediate and output files.')
    parser.add_argument('--table-file',
                        help='path and name of population table file for table optimization mode. ' +
                        'the file must have the same structure as the .pop or .dat files.')
@ -375,21 +317,18 @@ def get_cli_parser(default_args=None):
                             'of a limited number of best models are kept.')
    parser.add_argument('--keep-best', type=int, default=default_args.keep_best,
                        help='number of best models for which to keep result files '
-                             '(at each node from root down to keep-levels). '
-                             'default 10 (project can define higher default).')
+                             '(at each node from root down to keep-levels).')
    parser.add_argument('--keep-levels', type=int, choices=range(5),
                        default=default_args.keep_levels,
                        help='task level down to which result files of best models are kept. '
-                             '0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region. '
-                             'default 1 (project can define higher default).')
+                             '0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region.')
    parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit,
-                        help='wall time limit in hours. the optimizers try to finish before the limit. default: 24.')
+                        help='wall time limit in hours. the optimizers try to finish before the limit.')
    parser.add_argument('--log-file', default=default_args.log_file,
                        help='name of the main log file. ' +
-                             'under MPI, the rank of the process is inserted before the extension. ' +
-                             'defaults: output file + log, or pmsco.log.')
+                             'under MPI, the rank of the process is inserted before the extension.')
    parser.add_argument('--log-level', default=default_args.log_level,
-                        help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL. default: WARNING.')
+                        help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.')
    feature_parser = parser.add_mutually_exclusive_group(required=False)
    feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
                        help="enable logging. by default, logging is on.")
--- a/pmsco/project.py
+++ b/pmsco/project.py
@ -33,12 +33,15 @@ from __future__ import print_function
 import collections
 import copy
 import datetime
+import git
 import logging
 import numpy as np
 import os.path
 import socket
 import sys

+from pmsco.calculators.calculator import InternalAtomicCalculator
+from pmsco.calculators.edac import EdacCalculator
 import pmsco.cluster as mc
 from pmsco.compat import open
 import pmsco.data as md
@ -177,32 +180,89 @@ class Params(object):
    # @arg emission angle window (EDAC)
    # @arg angular_broadening (MSC)

+    ## @var binding_energy (float)
+    # initial state binding energy with respect to the Fermi level in eV
+    #
+
+    ## @var initial_state (str)
+    # initial state
+    #
+    # 1s, 2p, 2p1/2, etc.
+    #
+
    ## @var phase_files (dict)
-    # dictionary of phase files.
+    # dictionary of phase or scattering matrix element files.
    #
    # the keys are atomic numbers, the values file names.
-    # if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
+    # whether the files contain phase shifts or matrix elements depends on the calculator.
+    # EDAC determines the kind of information from the first line in the file.
+    #
+    # if the dictionary is empty or the files don't exist,
+    # the scattering matrix is computed by the calculator (if supported).
    #
    # maps to:
    # @arg scatterer (EDAC)
    # @arg atomic_number, phase_file (MSC)

+    ## @var phase_output_classes (int or iterable of int)
+    # atom classes for which to output phase files
+    #
+    # if the atomic scattering factors are calculated internally,
+    # EDAC can export them to scattering files.
+    #
+    # this parameter can be one of
+    # @arg None (default) no phase output,
+    # @arg integer number defining a range 0:N-1 of atom classes,
+    # @arg iterable (e.g., set or sequence) of atom classes to export.
+    #
+    # the problem is that EDAC expects the user to list each atom class to export,
+    # though it is not possible to know how many classes there will be
+    # or which atoms belong to which class before the calculation is actually done.
+    # the number of classes will be between the number of different elements and the number of atoms.
+    #
+    # thus, this parameter should normally be left at its default value
+    # and used only in specific situations that can be processed manually.
+    # if the parameter is non-default, EDAC will also produce a cluster output
+    # that includes a mapping between atomic coordinates and atom classes.
+    #
+    # @note the files generated belong to the category "output".
+    # you need to specify `--keep-files output` to prevent them from getting cleaned up.
+
+    ## @var polarization (str)
+    # photon polarization
+    #
+    # 'H', 'V', 'L', 'R', 'U'
+    #
+
+    ## @var rme_files (dict)
+    # dictionary of radial matrix element files.
+    #
+    # if the dictionary is empty or the files don't exist,
+    # the radial matrix defaults to the rme_xxx_xxx attributes.
+    #
+    # in EDAC, RME files or constants are considered only if @ref phase_files are specified.
+    #
+
+    ## @var work function (float)
+    # work function in eV
+    #
+    # the energy scale of EDAC is referenced to the vacuum level
+    # but data files are referenced to the Fermi level.
+    # the @ref pmsco.calculators.edac module adds the work function to the kinetic energy before it calls EDAC.
+    #
+
    def __init__(self):
        self.title = "default parameters"
        self.comment = "set by project.Params()"
        self.cluster_file = ""
        self.output_file = ""
        self.scan_file = ""
-        # EDAC convention: 1s, 2p, 2p1/2, etc.
        self.initial_state = "1s"
-        # MSC convention: H, V, L, R, U
+        self.binding_energy = 0.0
        self.polarization = "H"
        self.angular_resolution = 1.0
        self.z_surface = 0.0
        self.inner_potential = 10.0
-        # the energy scale of EDAC is referenced to the vacuum level
-        # but data files are referenced to the Fermi level
-        # the msc_edac module adds the work function to the kinetic energy before it calls EDAC
        self.work_function = 0.0
        self.symmetry_range = 360.0
        self.polar_incidence_angle = 60.0
@ -211,6 +271,11 @@ class Params(object):
        self.debye_temperature = 400.0
        self.debye_wavevector = 1.0
        self.phase_files = {}
+        self.rme_files = {}
+        self.rme_minus_value = 0.1
+        self.rme_minus_shift = 0.0
+        self.rme_plus_value = 1.0
+        self.rme_plus_shift = 0.0
        # used by MSC only
        self.spherical_order = 2
        self.scattering_level = 5
@ -221,15 +286,23 @@ class Params(object):
        self.planewave_attenuation = 1.0
        self.vibration_model = "N"
        self.substrate_atomic_mass = 1.0
-        self.rme_minus_value = 0.5
-        self.rme_minus_shift = 0.0
-        self.rme_plus_value = 0.5
-        self.rme_plus_shift = 0.0
        # used by EDAC only
        self.emitters = [(0.0, 0.0, 0.0, 0)]
        self.lmax = 15
        self.dmax = 5.0
        self.orders = [20]
+        self.phase_output_classes = None
+
+    @property
+    def l_init(self):
+        """
+        initial state l quantum number.
+
+        this is converted from the initial_state property.
+
+        @return: (int) 0..3
+        """
+        return "spdf".index(self.initial_state[1])


 class Scan(object):
@ -301,11 +374,45 @@ class Scan(object):
        self.mode = []
        self.emitter = ""
        self.initial_state = "1s"
-        self.energies = np.zeros((0))
-        self.thetas = np.zeros((0))
-        self.phis = np.zeros((0))
-        self.alphas = np.zeros((0))
-        
+        self.positions = {
+            'e': np.empty(0),
+            't': np.empty(0),
+            'p': np.empty(0),
+            'a': np.empty(0),
+        }
+
+    @property
+    def energies(self):
+        return self.positions['e']
+
+    @energies.setter
+    def energies(self, value):
+        self.positions['e'] = value
+
+    @property
+    def thetas(self):
+        return self.positions['t']
+
+    @thetas.setter
+    def thetas(self, value):
+        self.positions['t'] = value
+
+    @property
+    def phis(self):
+        return self.positions['p']
+
+    @phis.setter
+    def phis(self, value):
+        self.positions['p'] = value
+
+    @property
+    def alphas(self):
+        return self.positions['a']
+
+    @alphas.setter
+    def alphas(self, value):
+        self.positions['a'] = value
+
    def copy(self):
        """
        create a copy of the scan.
@ -314,9 +421,9 @@ class Scan(object):
        """
        return copy.deepcopy(self)

-    def set_scan(self, filename, emitter, initial_state):
+    def import_scan_file(self, filename, emitter, initial_state):
        """
-        set file name of reference experiment and load it.
+        import the reference experiment.

        the extension must be one of msc_data.DATATYPES (case insensitive)
        corresponding to the meaning of the columns in the file.
@ -324,9 +431,8 @@ class Scan(object):
        this method does not calculate the modulation function.

        @attention EDAC can only calculate equidistant, rectangular scans.
-        this version introduces holo scans as an experimental feature.
-        for all other scan types, the scan file must exactly conform with a rectangular scan.
-        the following scans are currently supported:
+        holo scans are transparently mapped to rectangular scans by pmsco.
+        this method accepts the following scans:

        * intensity vs energy at fixed theta, phi
        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
@ -347,43 +453,120 @@ class Scan(object):
        if self.filename:
            self.raw_data = md.load_data(self.filename)
            self.dtype = self.raw_data.dtype
-            self.mode, positions = md.detect_scan_mode(self.raw_data)
+            self.mode, self.positions = md.detect_scan_mode(self.raw_data)

-            if 'e' in self.mode:
-                self.energies = positions['e']
-            else:
+            if 'e' not in self.mode:
                try:
                    self.energies = np.asarray((self.raw_data['e'][0], ))
                except ValueError:
                    logger.error("missing energy in scan file %s", self.filename)
                    raise

-            if 't' in self.mode:
-                self.thetas = positions['t']
-            else:
+            if 't' not in self.mode:
                try:
                    self.thetas = np.asarray((self.raw_data['t'][0], ))
                except ValueError:
                    logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
-                    self.thetas = np.zeros((1))
+                    self.thetas = np.zeros(1)

-            if 'p' in self.mode:
-                self.phis = positions['p']
-            else:
+            if 'p' not in self.mode:
                try:
                    self.phis = np.asarray((self.raw_data['p'][0], ))
                except ValueError:
                    logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
-                    self.phis = np.zeros((1))
+                    self.phis = np.zeros(1)

-            if 'a' in self.mode:
-                self.alphas = positions['a']
-            else:
+            if 'a' not in self.mode:
                try:
                    self.alphas = np.asarray((self.raw_data['a'][0], ))
                except ValueError:
                    logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
-                    self.alphas = np.zeros((1))
+                    self.alphas = np.zeros(1)
+
+    def define_scan(self, positions, emitter, initial_state):
+        """
+        define a cartesian (rectangular/grid) scan.
+
+        this method initializes the scan with a one- or two-dimensional cartesian scan
+        of the four possible scan dimensions.
+        the scan range is given as arguments, the intensity values are initialized as 1.
+        the file name and modulation functions are reset to empty and None, respectively.
+
+        the method can create the following scan schemes:
+
+        * intensity vs energy at fixed theta, phi
+        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
+        * intensity vs theta, phi, or alpha
+        * intensity vs theta and phi (rectangular holo scan)
+
+        @param positions: (dictionary of numpy arrays)
+            the dictionary must contain a one-dimensional array for each scan dimension 'e', 't', 'p' and 'a'.
+            these array must contain unique, equidistant positions.
+            constant dimensions must contain exactly one value.
+            missing angle dimensions default to 0,
+            a missing energy dimension results in a KeyError.
+
+        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
+
+        @param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
+
+        """
+        self.filename = ""
+        self.emitter = emitter
+        self.initial_state = initial_state
+        self.mode = []
+        shape = 1
+
+        try:
+            self.energies = np.copy(positions['e'])
+        except KeyError:
+            logger.error("missing energy in define_scan arguments")
+            raise
+        else:
+            if self.energies.shape[0] > 1:
+                self.mode.append('e')
+                shape *= self.energies.shape[0]
+
+        try:
+            self.thetas = np.copy(positions['t'])
+        except KeyError:
+            logger.info("missing theta in define_scan arguments, defaulting to 0.0")
+            self.thetas = np.zeros(1)
+        else:
+            if self.thetas.shape[0] > 1:
+                self.mode.append('t')
+                shape *= self.thetas.shape[0]
+
+        try:
+            self.phis = np.copy(positions['p'])
+        except KeyError:
+            logger.info("missing phi in define_scan arguments, defaulting to 0.0")
+            self.phis = np.zeros(1)
+        else:
+            if self.phis.shape[0] > 1:
+                self.mode.append('p')
+                shape *= self.phis.shape[0]
+
+        try:
+            self.alphas = np.copy(positions['a'])
+        except KeyError:
+            logger.info("missing alpha in define_scan arguments, defaulting to 0.0")
+            self.alphas = np.zeros(1)
+        else:
+            if self.alphas.shape[0] > 1:
+                self.mode.append('a')
+                shape *= self.alphas.shape[0]
+
+        assert 0 < len(self.mode) <= 2, "unacceptable number of dimensions in define_scan"
+        assert not ('t' in self.mode and 'a' in self.mode), "unacceptable combination of dimensions in define_scan"
+
+        self.dtype = md.DTYPE_ETPAI
+        self.raw_data = np.zeros(shape, self.dtype)
+        dimensions = [self.positions[dim] for dim in ['e', 't', 'p', 'a']]
+        grid = np.meshgrid(*dimensions)
+        for i, dim in enumerate(['e', 't', 'p', 'a']):
+            self.raw_data[dim] = grid[i].reshape(-1)
+        self.raw_data['i'] = 1


 # noinspection PyMethodMayBeStatic
@ -465,9 +648,8 @@ class Project(object):
    #
    # @arg @c 'pop_size' (int)
    #   population size (number of particles) in the swarm or genetic optimization mode.
-    #   by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
+    #   by default, the population size is set to the number of parallel processes or 4, whichever is greater.
    #   you may want to override the default value in cases where the automatic choice is not appropriate.
-    #   the value can be set by the command line.
    # @arg @c 'seed_file' (string)
    #   name of a file containing the results from previous optimization runs.
    #   this can be used to resume a swarm or genetic optimization where it was interrupted before.
@ -537,9 +719,27 @@ class Project(object):
    # @arg 3 = emitter level: emitter nodes in addition to level 1.
    # @arg 4 = region level: region nodes in addition to level 1.

+    ## @var atomic_scattering_factory
+    # factory function to create an atomic scattering calculator
+    #
+    # this can also be the name of a class.
+    # the calculator must inherit from pmsco.calculators.calculator.AtomicCalculator.
+    # the name of atomic scattering calculator classes should end in AtomicCalculator.
+
+    ## @var multiple_scattering_factory
+    # factory function to create a multiple scattering calculator
+    #
+    # this can also be the name of a class.
+    # the calculator must inherit from pmsco.calculators.calculator.Calculator
+    #
+    # example: pmsco.calculators.edac.EdacCalculator
+    #
+
    def __init__(self):
        self.mode = "single"
-        self.code = "edac"
+        self.job_name = ""
+        self.git_hash = ""
+        self.description = ""
        self.features = {}
        self.cluster_format = mc.FMT_EDAC
        self.cluster_generator = mc.LegacyClusterGenerator(self)
@ -568,7 +768,8 @@ class Project(object):
            'emit': handlers.EmitterHandler,
            'region': handlers.SingleRegionHandler
        }
-        self.calculator_class = None
+        self.atomic_scattering_factory = InternalAtomicCalculator
+        self.multiple_scattering_factory = EdacCalculator
        self._tasks_fields = []
        self._db = database.ResultsDatabase()

@ -608,7 +809,7 @@ class Project(object):
        self.combined_scan = None
        self.combined_modf = None

-    def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
+    def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None, positions=None):
        """
        add the file name of reference experiment and load it.
        
@ -627,6 +828,15 @@ class Project(object):
        it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.

        @param filename: (string) file name of the experimental data, possibly including a path.
+            the file is not loaded when the optional positions argument is present,
+            but the filename may serve as basename for output files (e.g. modulation function).
+
+        @param positions: (optional, dictionary of numpy arrays) scan positions.
+            if specified, the file given by filename is _not_ loaded,
+            and the scan positions are initialized from this dictionary.
+            the dictionary keys are the possible scan dimensions: 'e', 't', 'p', 'a'.
+            the arrays are one-dimensional and contain unique, equidistant positions.
+            constant dimensions have shape 1. see @ref Scan.define_scan.

        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".

@ -638,11 +848,13 @@ class Project(object):
        @param modf_model: (dict) model parameters to be passed to the modulation function.

        @return (Scan) the new scan object (which is also a member of self.scans).
-
-        @todo the accepted scanning schemes should be generalized.
        """
        scan = Scan()
-        scan.set_scan(filename, emitter, initial_state)
+        if positions is not None:
+            scan.define_scan(positions, emitter, initial_state)
+            scan.filename = filename
+        else:
+            scan.import_scan_file(filename, emitter, initial_state)
        self.scans.append(scan)

        if modf_model is None:
@ -735,6 +947,41 @@ class Project(object):
        """
        self.timedelta_limit = timedelta

+    def log_project_args(self):
+        """
+        send some common project attributes to the log.
+
+        the attributes are normally logged at WARNING level.
+
+        this method is called by the main pmsco module after creating the project and assigning command line arguments.
+        it may be overridden to add logs of attributes of the sub-class.
+
+        @return: None
+        """
+        try:
+            logger.warning("atomic scattering: {0}".format(self.atomic_scattering_factory))
+            logger.warning("multiple scattering: {0}".format(self.multiple_scattering_factory))
+            logger.warning("optimization mode: {0}".format(self.mode))
+
+            for key in sorted(self.optimizer_params):
+                val = self.optimizer_params[key]
+                lev = logging.WARNING if val else logging.DEBUG
+                logger.log(lev, "optimizer_params['{k}']: {v}".format(k=key, v=val))
+
+            logger.warning("data directory: {0}".format(self.data_dir))
+            logger.warning("output file: {0}".format(self.output_file))
+
+            _files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
+            logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
+
+            for idx, scan in enumerate(self.scans):
+                logger.warning(BMsg("scan {0}: {filename} ({emitter} {initial_state})", idx, **vars(scan)))
+            for idx, sym in enumerate(self.symmetries):
+                logger.warning(BMsg("symmetry {0}: {sym}", idx, sym=sym))
+
+        except AttributeError:
+            logger.warning("AttributeError in log_project_args")
+
    def combine_symmetries(self, parent_task, child_tasks):
        """
        combine results of different symmetry into one result and calculate the modulation function.
@ -937,6 +1184,23 @@ class Project(object):
        else:
            md.save_data(parent_task.modf_filename, modf)

+    def get_git_hash(self):
+        """
+        get the git commit (hash) of the running code (HEAD)
+
+        the method looks for a git repository in the source tree of this module.
+        if successful, it returns the hash string of the HEAD commit.
+
+        @return: hexadecimal hash string.
+            empty string if the file is not in a git repository.
+        """
+        try:
+            repo = git.Repo(__file__, search_parent_directories=True)
+        except git.exc.InvalidGitRepositoryError:
+            return ""
+        else:
+            return repo.head.commit.hexsha
+
    def setup(self, handlers):
        """
        prepare for calculations.
@ -954,11 +1218,13 @@ class Project(object):

        @return: None
        """
+        self.git_hash = self.get_git_hash()
        fields = ["rfac"]
        fields.extend(dispatch.CalcID._fields)
+        fields.append("secs")
        fields = ["_" + f for f in fields]
        dom = self.create_domain()
-        model_fields = dom.start.keys()
+        model_fields = list(dom.start.keys())
        model_fields.sort(key=lambda name: name.lower())
        fields.extend(model_fields)
        self._tasks_fields = fields
@ -968,16 +1234,16 @@ class Project(object):
            outfile.write(" ".join(fields))
            outfile.write("\n")

-        # todo : fill in the descriptive fields, change to file-database
+        # todo : change to file-database
        self._db.connect(":memory:")
        project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
        job_id = self._db.register_job(project_id,
-                                       "job-name",
+                                       self.job_name,
                                       self.mode,
                                       socket.gethostname(),
-                                       "git-hash",
+                                       self.git_hash,
                                       datetime.datetime.now(),
-                                       "description")
+                                       self.description)
        self._db.register_params(model_fields)
        self._db.create_models_view()

@ -1012,6 +1278,7 @@ class Project(object):
                values_dict = {"_" + k: v for k, v in values_dict.items()}
                values_dict.update(parent_task.model)
                values_dict['_rfac'] = parent_task.rfac
+                values_dict['_secs'] = parent_task.time.total_seconds()
                values_list = [values_dict[field] for field in self._tasks_fields]
                with open(self.output_file + ".tasks.dat", "a") as outfile:
                    outfile.write(" ".join(format(value) for value in values_list) + "\n")
@ -1258,6 +1525,76 @@ class Project(object):

        return _files

+    def before_atomic_scattering(self, task, par, clu):
+        """
+        project hook before atomic scattering factors are calculated.
+
+        this method derives modified Params and Cluster objects for the atomic scattering calculation
+        from the original objects that will be used in the multiple scattering calculation.
+
+        in the basic version, the method does not change the objects
+        except that it returns None for the root task (reference cluster).
+        subclasses may override it to modify or replace the cluster.
+
+        @param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
+            if the model index is -1, the project can return the global reference cluster
+            (to calculate the fixed scattering factors that will be used for all models)
+            or None if no global scattering factors should be calculated.
+            do not modify this object!
+
+        @param par: @ref pmsco.project.Params object representing the preliminary
+            multiple scattering input parameters of the current task.
+            the method can make modifications to this object instance directly.
+
+        @param clu: @ref pmsco.cluster.Cluster object representing the preliminary
+            multiple scattering cluster of the current task.
+            the method can make modifications to this object instance directly.
+
+        @return: a tuple (par, clu) where par and clu are the input parameters and cluster
+            to be used for the calculation of atomic scattering factors.
+            these should either be the original function arguments,
+            or copies of the original arguments.
+            if atomic scattering factors should not be calculated, the return values should be None.
+        """
+        if task.id.model >= 0:
+            return par, clu
+        else:
+            return None, None
+
+    def after_atomic_scattering(self, task, par, clu):
+        """
+        project hook after atomic scattering factors are calculated.
+
+        this method cleans up the Params and Cluster objects from the atomic scattering calculation
+        so that they can be used in the multiple scattering calculation.
+
+        in the basic version, the method just passes the input parameters for model tasks
+        and returns None for the root task.
+        subclasses may override it and modify the cluster and/or input parameters
+        so that the desired atomic scattering factors are used.
+
+        @param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
+            if the model index is -1, the project should return the global reference cluster
+            (to calculate the fixed scattering factors that will be used for all models)
+            or None if no global scattering factors should be calculated.
+
+        @param par: @ref pmsco.project.Params object representing the preliminary
+            multiple scattering input parameters of the current task.
+
+        @param clu: @ref pmsco.cluster.Cluster object representing the preliminary
+            multiple scattering cluster of the current task.
+            do not modify this object, make a copy!
+
+        @return: a tuple (par, clu) where par and clu are the input parameters and cluster
+            to be used for the calculation of atomic scattering factors.
+            these should either be the original function arguments,
+            or copies of the original arguments.
+        """
+        if task.id.model >= 0:
+            return par, clu
+        else:
+            return None, None
+
    def cleanup(self):
        """
        delete unwanted files at the end of a project.
--- a/projects/demo/fcc.py
+++ b/projects/demo/fcc.py
@ -1,12 +1,10 @@
-#!/usr/bin/env python2
-
 """
@package pmsco.projects.fcc
 scattering calculation project for the (111) surface of an arbitrary face-centered cubic crystal

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2015 by Paul Scherrer Institut @n
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
@ -171,16 +169,14 @@ class FCC111Project(mp.Project):
            dom.add_param('Zsurf',    1.00,    0.00,  2.00, 0.50)

        return dom
-        
-def create_project(element):
+
+
+def create_project():
    """
    create an FCC111Project calculation project.
-
-    @param element: symbol of the chemical element of the atoms contained in the cluster.
    """

    project = FCC111Project()
-    project.element = element

    project_dir = os.path.dirname(os.path.abspath(__file__))
    project.data_dir = project_dir
@ -188,9 +184,9 @@ def create_project(element):
    # scan dictionary
    # to select any number of scans, add their dictionary keys as scans option on the command line
    project.scan_dict['default'] = {'filename': os.path.join(project_dir, "demo_holo_scan.etp"),
-                                  'emitter': "Ni", 'initial_state': "3s"}
+                                    'emitter': "Ni", 'initial_state': "3s"}
    project.scan_dict['holo'] = {'filename': os.path.join(project_dir, "demo_holo_scan.etp"),
-                                  'emitter': "Ni", 'initial_state': "3s"}
+                                 'emitter': "Ni", 'initial_state': "3s"}
    project.scan_dict['alpha'] = {'filename': os.path.join(project_dir, "demo_alpha_scan.etp"),
                                  'emitter': "Ni", 'initial_state': "3s"}

@ -224,6 +220,7 @@ def set_project_args(project, project_args):

    try:
        if project_args.element:
+            project.element = project_args.element
            for scan in project.scans:
                scan.emitter = project_args.element
            logger.warning(BMsg("override emitters to {0}", project.emitter))
@ -258,5 +255,5 @@ def parse_project_args(_args):
    parser.add_argument('--energy', type=float,
                        help="kinetic energy of photoelectron (override scan file)")

-    parsed_args = parser.parse_known_args(_args)
+    parsed_args = parser.parse_args(_args)
    return parsed_args
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 attrdict
 fasteners
-numpy
+numpy >= 1.13
 periodictable
 statsmodels
 mpi4py
@ -9,3 +9,5 @@ mock
 scipy
 matplotlib
 future
+swig
+gitpython
--- a/tests/test_cluster.py
+++ b/tests/test_cluster.py
@ -10,7 +10,7 @@ to run the tests, change to the directory which contains the tests directory, an

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2015-17 by Paul Scherrer Institut @n
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
@ -102,16 +102,18 @@ class TestClusterFunctions(unittest.TestCase):
        @return: None
        """
        clu = self.create_cube()
-        xy2 = clu.data[['x', 'y']].copy()
-        xy3 = xy2.view((xy2.dtype[0], len(xy2.dtype.names)))
+        xy3 = np.empty((clu.data.shape[0], 2), np.float32)
+        xy3[:, 0] = clu.data['x']
+        xy3[:, 1] = clu.data['y']
        ctr = np.asarray((1.0, 0.0, 0.0))
        dist = np.linalg.norm(xy3 - ctr[0:2], axis=1)
        self.assertAlmostEqual(1.0, dist[0])
        self.assertAlmostEqual(0.0, dist[1])

        clu.clear()
-        xy2 = clu.data[['x', 'y']].copy()
-        xy3 = xy2.view((xy2.dtype[0], len(xy2.dtype.names)))
+        xy3 = np.empty((clu.data.shape[0], 2), np.float32)
+        xy3[:, 0] = clu.data['x']
+        xy3[:, 1] = clu.data['y']
        ctr = np.asarray((1.0, 0.0, 0.0))
        dist = np.linalg.norm(xy3 - ctr[0:2], axis=1)
        self.assertEqual(0, dist.shape[0])
@ -156,7 +158,7 @@ class TestClusterFunctions(unittest.TestCase):
        clu.set_emitter(idx=0)
        clu.set_emitter(idx=9)
        self.assertEqual(2, clu.get_emitter_count())
-        result = clu.get_emitters()
+        result = clu.get_emitters(['x', 'y', 'z', 't'])
        expect = [(0., 0., 0., 1), (1., 0., 1., 10)]
        self.assertEqual(expect, result)

@ -233,7 +235,7 @@ class TestClusterFunctions(unittest.TestCase):
        emitter = np.array((0.0, 0.0, 0.0))
        clu.add_layer(7, a1, b1, b2)
        pos = clu.find_positions(pos=emitter)
-        self.assertEqual(len(pos), 1)
+        self.assertEqual(1, len(pos))

    def test_add_cluster(self):
        clu1 = mc.Cluster()
@ -244,15 +246,18 @@ class TestClusterFunctions(unittest.TestCase):
        clu1.add_atom(5, np.asarray([0, 0, -2]), 0)

        clu2 = mc.Cluster()
-        clu2.add_atom(3, np.asarray([-0.2, 0, 0]), 0)
-        clu2.add_atom(4, np.asarray([0, -0.2, 0]), 0)
-        clu2.add_atom(5, np.asarray([0, 0.05, -1]), 0)
-        clu2.add_atom(5, np.asarray([0, 0, -1.01]), 0)
-        clu2.add_atom(6, np.asarray([0, 0, -1.99]), 0)
+        clu2.add_atom(3, np.asarray([-0.2, 0, 0]), 0)   # unique
+        clu2.add_atom(4, np.asarray([0, -0.2, 0]), 0)   # unique
+        clu2.add_atom(5, np.asarray([0, 0.05, -1]), 0)  # not unique
+        clu2.add_atom(5, np.asarray([0, 0, -1.09]), 0)  # just within tolerance of uniqueness
+        clu2.add_atom(6, np.asarray([0, 0, -1.99]), 0)  # not unique
+        clu2.add_atom(7, np.asarray([0, 0, -1.10]), 0)  # just out of tolerance of uniqueness

        clu1.set_rmax(1.5)
        clu1.add_cluster(clu2, check_rmax=True, check_unique=True, tol=0.1)
-        self.assertEqual(clu1.get_atom_count(), 5+2)
+        self.assertEqual(5+3, clu1.get_atom_count())
+        self.assertEqual(7, clu1.data['t'][-1])
+        self.assertEqual(6, clu2.data.shape[0])

    def test_find_positions(self):
        clu = mc.Cluster()
@ -269,8 +274,14 @@ class TestClusterFunctions(unittest.TestCase):
        clu.add_layer(7, a_N, b1, b2)
        clu.add_layer(5, a_B, b1, b2)
        pos = clu.find_positions(pos=emitter)
-        self.assertEqual(len(pos), 1)
-        self.assertEqual(pos[0], 206)
+        self.assertEqual(1, len(pos))
+        self.assertEqual(206, pos[0])
+
+        # position in the format returned by get_emitters
+        emitter = (emitter[0], emitter[1], emitter[2], 7)
+        pos = clu.find_positions(pos=emitter)
+        self.assertEqual(1, len(pos))
+        self.assertEqual(206, pos[0])

    def test_find_index_cylinder(self):
        clu = self.create_cube()
@ -278,11 +289,11 @@ class TestClusterFunctions(unittest.TestCase):
        rxy = 0.5
        rz = 1.0
        idx = clu.find_index_cylinder(pos, rxy, rz, None)
-        self.assertEqual(len(idx), 2)
-        self.assertEqual(clu.get_atomtype(idx[0]), 8)
-        self.assertEqual(clu.get_atomtype(idx[1]), 20)
+        self.assertEqual(2, len(idx))
+        self.assertEqual(8, clu.get_atomtype(idx[0]))
+        self.assertEqual(20, clu.get_atomtype(idx[1]))
        idx = clu.find_index_cylinder(pos, rxy, rz, 8)
-        self.assertEqual(len(idx), 1)
+        self.assertEqual(1, len(idx))

    def test_trim_cylinder(self):
        clu = mc.Cluster()
@ -296,12 +307,12 @@ class TestClusterFunctions(unittest.TestCase):
        r0 = 2.3
        z0 = 4.2
        clu.trim_cylinder(r0, z0)
-        self.assertEqual(clu.data.dtype, clu.dtype)
-        self.assertEqual(clu.data.shape[0], 21 * 5)
-        self.assertEqual(clu.data[1]['i'], 2)
-        self.assertEqual(clu.data[1]['s'], 'N')
-        self.assertEqual(clu.data[1]['t'], 7)
-        self.assertEqual(clu.get_emitter_count(), 1)
+        self.assertEqual(clu.dtype, clu.data.dtype)
+        self.assertEqual(21 * 5, clu.data.shape[0])
+        self.assertEqual(2, clu.data[1]['i'])
+        self.assertEqual('N', clu.data[1]['s'])
+        self.assertEqual(7, clu.data[1]['t'])
+        self.assertEqual(1, clu.get_emitter_count())
        n_low = np.sum(clu.data['z'] < -z0)
        self.assertEqual(0, n_low)
        n_high = np.sum(clu.data['z'] > z0)
@ -320,12 +331,12 @@ class TestClusterFunctions(unittest.TestCase):
        clu.set_emitter(pos=v_pos)
        r0 = 2.3
        clu.trim_sphere(r0)
-        self.assertEqual(clu.data.dtype, clu.dtype)
-        self.assertEqual(clu.data.shape[0], 39)
-        self.assertEqual(clu.data[1]['i'], 2)
-        self.assertEqual(clu.data[1]['s'], 'N')
-        self.assertEqual(clu.data[1]['t'], 7)
-        self.assertEqual(clu.get_emitter_count(), 1)
+        self.assertEqual(clu.dtype, clu.data.dtype)
+        self.assertEqual(39, clu.data.shape[0])
+        self.assertEqual(2, clu.data[1]['i'])
+        self.assertEqual('N', clu.data[1]['s'])
+        self.assertEqual(7, clu.data[1]['t'])
+        self.assertEqual(1, clu.get_emitter_count())
        n_out = np.sum(clu.data['x']**2 + clu.data['y']**2 + clu.data['z'] > r0**2)
        self.assertEqual(0, n_out)

@ -355,9 +366,9 @@ class TestClusterFunctions(unittest.TestCase):
    def test_trim_slab(self):
        clu = self.create_cube()
        clu.trim_slab('x', 0.5, 1.1)
-        self.assertEqual(clu.data.dtype, clu.dtype)
-        self.assertEqual(clu.data.shape[0], 9 * 2)
-        self.assertEqual(clu.get_emitter_count(), 1)
+        self.assertEqual(clu.dtype, clu.data.dtype)
+        self.assertEqual(9 * 2, clu.data.shape[0])
+        self.assertEqual(1, clu.get_emitter_count())

    def test_save_to_file(self):
        clu = self.create_cube()
@ -367,12 +378,34 @@ class TestClusterFunctions(unittest.TestCase):
        clu.save_to_file(f, mc.FMT_XYZ, "qwerty", emitters_only=True)
        f.seek(0)
        line = f.readline()
-        self.assertEqual(line, b"2\n", b"line 1: " + line)
+        self.assertEqual(b"2\n", line, b"line 1: " + line)
        line = f.readline()
-        self.assertEqual(line, b"qwerty\n", b"line 2: " + line)
+        self.assertEqual(b"qwerty\n", line, b"line 2: " + line)
        line = f.readline()
        self.assertRegexpMatches(line, b"H +[0.]+ +[0.]+ +[0.]+", b"line 3: " + line)
        line = f.readline()
        self.assertRegexpMatches(line, b"Si +[01.-]+ +[01.-]+ +[0.]+", b"line 4: " + line)
        line = f.readline()
-        self.assertEqual(line, b"", b"end of file")
+        self.assertEqual(b"", line, b"end of file")
+
+    def test_update_atoms(self):
+        clu = mc.Cluster()
+        clu.add_atom(1, np.asarray([0, 0, 0]), 1)
+        clu.add_atom(3, np.asarray([0, 1, 0]), 0)
+        clu.add_atom(5, np.asarray([-1, 0, 0]), 0)
+        clu.add_atom(6, np.asarray([0, -1, 0]), 0)
+        clu.add_atom(2, np.asarray([1, 0, 0]), 0)
+        clu.add_atom(4, np.asarray([0, 0, 1]), 0)
+
+        other = mc.Cluster()
+        other.add_atom(1, np.asarray([0, 0, 0]), 1)
+        other.add_atom(5, np.asarray([-1, 0, 0]), 0)
+        other.add_atom(2, np.asarray([1, 0, 0]), 0)
+        other.add_atom(6, np.asarray([0, -1, 0]), 0)
+        other.add_atom(3, np.asarray([0, 1, 0]), 0)
+        other.add_atom(4, np.asarray([0, 0, 1]), 0)
+
+        other.data['c'] = np.asarray((1, 2, 2, 3, 3, 4))
+        clu.update_atoms(other, {'c'})
+        expected = np.asarray((1, 3, 2, 3, 2, 4))
+        np.testing.assert_array_equal(expected, clu.data['c'])
--- a/tests/test_database.py
+++ b/tests/test_database.py
@ -143,7 +143,7 @@ class TestDatabase(unittest.TestCase):
        self.db.insert_model(model5)
        results = self.db.query_project_params(project_id=project1)
        expected = ['parA', 'parB']
-        self.assertEqual(expected, sorted(results.keys()))
+        self.assertEqual(expected, sorted(list(results.keys())))

    def test_insert_model(self):
        self.setup_sample_database()
--- a/tests/test_grid.py
+++ b/tests/test_grid.py
@ -0,0 +1,71 @@
+"""
+@package tests.test_grid
+unit tests for pmsco.optimizers.grid
+
+the purpose of these tests is to help debugging the code.
+
+to run the tests, change to the directory which contains the tests directory, and execute =nosetests=.
+
+@pre nose must be installed (python-nose package on Debian).
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import random
+import unittest
+
+import pmsco.optimizers.grid as mo
+import pmsco.project as mp
+
+
+class TestPopulation(unittest.TestCase):
+    def setUp(self):
+        random.seed(0)
+        self.domain = mp.Domain()
+
+        self.domain.add_param('A', 1.5, 1.0, 2.0, 0.2)
+        self.domain.add_param('B', 2.5, 2.0, 3.0, 0.25)
+        self.domain.add_param('C', 3.5, 3.5, 3.5, 0.0)
+        self.expected_popsize = 30
+        self.expected_names = ('_model', '_rfac', 'A', 'B', 'C')
+
+        self.pop = mo.GridPopulation()
+
+    def tearDown(self):
+        # after each test method
+        self.pop = None
+
+    @classmethod
+    def setup_class(cls):
+        # before any methods in this class
+        pass
+
+    @classmethod
+    def teardown_class(cls):
+        # teardown_class() after any methods in this class
+        pass
+
+    def test_setup(self):
+        self.pop.setup(self.domain)
+        self.assertEqual(self.pop.positions.dtype.names, self.expected_names)
+        self.assertEqual(self.pop.positions.shape, (self.expected_popsize,))
+        self.assertEqual(self.pop.model_count, self.expected_popsize)
+        check = np.arange(self.expected_popsize)
+        np.testing.assert_array_equal(self.pop.positions['_model'], check)
+        check = np.ones(self.expected_popsize) * 2.1
+        np.testing.assert_array_almost_equal(self.pop.positions['_rfac'], check)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/tests/test_project.py
+++ b/tests/test_project.py
@ -20,14 +20,71 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import unittest
+
 import mock
 import numpy as np
+import os
+import unittest
+
 import pmsco.data as data
 import pmsco.dispatch as dispatch
 import pmsco.project as project


+class TestScan(unittest.TestCase):
+    """
+    test case for @ref pmsco.project.Scan class
+
+    """
+    def test_import_scan_file(self):
+        base_dir = os.path.dirname(os.path.abspath(__file__))
+        test_file = os.path.join(base_dir, "..", "projects", "twoatom", "twoatom_energy_alpha.etpai")
+
+        scan = project.Scan()
+        scan.import_scan_file(test_file, "C", "1s")
+
+        mode = ['e', 'a']
+        self.assertEqual(scan.mode, mode)
+
+        ae = np.arange(10, 1005, 5)
+        at = np.asarray([0])
+        ap = np.asarray([0])
+        aa = np.arange(-90, 91, 1)
+
+        np.testing.assert_array_almost_equal(scan.energies, ae)
+        np.testing.assert_array_almost_equal(scan.thetas, at)
+        np.testing.assert_array_almost_equal(scan.phis, ap)
+        np.testing.assert_array_almost_equal(scan.alphas, aa)
+
+    def test_define_scan(self):
+        scan = project.Scan()
+        p0 = np.asarray([20])
+        p1 = np.linspace(1, 4, 4)
+        p2 = np.linspace(11, 13, 3)
+        d = {'t': p1, 'e': p0, 'p': p2}
+        scan.define_scan(d, "C", "1s")
+
+        ae = np.asarray([20])
+        at = np.asarray([1, 2, 3, 4])
+        ap = np.asarray([11, 12, 13])
+        aa = np.asarray([0])
+
+        np.testing.assert_array_almost_equal(scan.energies, ae)
+        np.testing.assert_array_almost_equal(scan.thetas, at)
+        np.testing.assert_array_almost_equal(scan.phis, ap)
+        np.testing.assert_array_almost_equal(scan.alphas, aa)
+
+        re = np.ones(12) * 20
+        rt = np.asarray([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4])
+        rp = np.asarray([11, 12, 13, 11, 12, 13, 11, 12, 13, 11, 12, 13])
+        ra = np.ones(12) * 0
+
+        np.testing.assert_array_almost_equal(scan.raw_data['e'], re)
+        np.testing.assert_array_almost_equal(scan.raw_data['t'], rt)
+        np.testing.assert_array_almost_equal(scan.raw_data['p'], rp)
+        np.testing.assert_array_almost_equal(scan.raw_data['a'], ra)
+
+
 class TestProject(unittest.TestCase):
    def setUp(self):
        # before each test method