public distro 2.1.0

2019-07-19 12:54:54 +02:00
parent acea809e4e
commit fbd2d4fa8c
40 changed files with 2813 additions and 345 deletions
--- a/pmsco/calculators/calculator.py
+++ b/pmsco/calculators/calculator.py
@ -11,7 +11,7 @@ TestCalcInterface is provided for testing the PMSCO code quickly without calling

@author Matthias Muntwiler

-@copyright (c) 2015-18 by Paul Scherrer Institut @n
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
@ -56,11 +56,25 @@ class Calculator(object):
            @arg the first element is the name of the main ETPI or ETPAI result file to be further processed.
            @arg the second element is a dictionary that lists the names of all created data files with their category.
                 the dictionary key is the file name,
-                 the value is the file category (cluster, phase, etc.).
+                 the value is the file category (cluster, atomic, etc.).
        """
        return None, None


+class AtomicCalculator(Calculator):
+    """
+    abstract interface class to the atomic scattering calculation program.
+    """
+    pass
+
+
+class InternalAtomicCalculator(AtomicCalculator):
+    """
+    dummy atomic scattering class if scattering factors are calculated internally by the multiple scattering calculator.
+    """
+    pass
+
+
 class TestCalculator(Calculator):
    """
    interface class producing random data for testing the MSCO code without calling an external program.
--- a/pmsco/calculators/edac.py
+++ b/pmsco/calculators/edac.py
@ -55,7 +55,10 @@ class EdacCalculator(calculator.Calculator):

        @param filepath: (str) name and path of the file to be created.

+        @return dictionary of created files {filename: category}
        """
+        files = {}
+
        with open(filepath, "w") as f:
            f.write("verbose off\n")
            f.write("cluster input {0}\n".format(params.cluster_file))
@ -146,9 +149,16 @@ class EdacCalculator(calculator.Calculator):
            scatterers = ["scatterer {at} {fi}\n".format(at=at, fi=fi)
                          for (at, fi) in params.phase_files.items()
                          if os.path.isfile(fi)]
-            if scatterers:
+            rme = ["rmat {fi}\n".format(fi=fi)
+                   for (at, fi) in params.rme_files.items()
+                   if at == params.emitters[0][3] and os.path.isfile(fi)] or \
+                  ["rmat inline 1 regular1 {l0} {pv} {pd} {mv} {md}\n".format(l0=params.l_init,
+                   pv=params.rme_plus_value, pd=params.rme_plus_shift,
+                   mv=params.rme_minus_value, md=params.rme_minus_shift)]
+            if scatterers and rme:
                for scat in scatterers:
                    f.write(scat)
+                f.write(rme[0])
            else:
                f.write("muffin-tin\n")

@ -162,16 +172,27 @@ class EdacCalculator(calculator.Calculator):
            f.write("orders {0:d} ".format(len(params.orders)))
            f.write(" ".join(format(order, "d") for order in params.orders) + "\n")
            f.write("emission angle window {0:F}\n".format(params.angular_resolution / 2.0))
-            # f.write("cluster output l(A) out.clu")
-            # problems:
-            # - muffin-tin relabels atoms
-            # - there can be multiple atom types for the same chemical element
-            # - we have to compare coordinates to find the mapping between input and output cluster
-            # f.write("scan scatterer i phase-shifts i.pha")
-            # f.write("scan scatterer i potential i.pot")
+
+            # scattering factor output (see project.Params.phase_output_classes)
+            if params.phase_output_classes is not None:
+                fn = "{0}.clu".format(params.output_file)
+                f.write("cluster output l(A) {fn}\n".format(fn=fn))
+                files[fn] = "output"
+                try:
+                    cls = (cl for cl in params.phase_output_classes)
+                except TypeError:
+                    cls = range(params.phase_output_classes)
+                for cl in cls:
+                    fn = "{of}.{cl}.scat".format(cl=cl, of=params.output_file)
+                    f.write("scan scatterer {cl} phase-shifts {fn}\n".format(cl=cl, fn=fn))
+                    files[fn] = "output"
+
            f.write("scan pd {0}\n".format(params.output_file))
+            files[params.output_file] = "output"
            f.write("end\n")

+        return files
+
    def run(self, params, cluster, scan, output_file):
        """
        run EDAC with the given parameters and cluster.
@ -205,13 +226,13 @@ class EdacCalculator(calculator.Calculator):
        params.cluster_file = clu_filename
        params.output_file = out_filename
        params.data_file = dat_filename
-        params.emitters = cluster.get_emitters()
+        params.emitters = cluster.get_emitters(['x', 'y', 'z', 'c'])

        # save parameter files
        logger.debug("writing cluster file %s", clu_filename)
        cluster.save_to_file(clu_filename, fmt=mc.FMT_EDAC)
        logger.debug("writing input file %s", par_filename)
-        self.write_input_file(params, scan, par_filename)
+        files = self.write_input_file(params, scan, par_filename)

        # run EDAC
        logger.info("calling EDAC with input file %s", par_filename)
@ -244,6 +265,9 @@ class EdacCalculator(calculator.Calculator):
        logger.debug("save result to file %s", etpi_filename)
        md.save_data(etpi_filename, result_etpi)

-        files = {clu_filename: 'input', par_filename: 'input', dat_filename: 'output',
-                 etpi_filename: 'region'}
+        files[clu_filename] = 'input'
+        files[par_filename] = 'input'
+        files[dat_filename] = 'output'
+        files[etpi_filename] = 'region'
+
        return etpi_filename, files
--- a/pmsco/calculators/phagen/init.py
+++ b/pmsco/calculators/phagen/init.py
--- a/pmsco/calculators/phagen/makefile
+++ b/pmsco/calculators/phagen/makefile
@ -0,0 +1,43 @@
+SHELL=/bin/sh
+
+# makefile for PHAGEN program and module
+#
+# the PHAGEN source code is not included in the public distribution.
+# please obtain the PHAGEN code from the original author,
+# and copy it to this directory before compilation.
+#
+# see the top-level makefile for additional information.
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
+.PHONY: all clean phagen
+
+FC?=gfortran
+F2PY?=f2py
+F2PYOPTS?=
+CC?=gcc
+CCOPTS?=
+SWIG?=swig
+SWIGOPTS?=
+PYTHON?=python
+PYTHONOPTS?=
+PYTHONINC?=
+PYTHON_CONFIG = ${PYTHON}-config
+PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
+PYTHON_EXT_SUFFIX ?= $(shell ${PYTHON_CONFIG} --extension-suffix)
+
+all: phagen
+
+phagen: phagen.exe phagen$(EXT_SUFFIX)
+
+phagen.exe: phagen_scf.f msxas3.inc msxasc3.inc
+	$(FC) $(FCOPTS) -o phagen.exe phagen_scf.f
+
+phagen.pyf: | phagen_scf.f
+	$(F2PY) -h phagen.pyf -m phagen phagen_scf.f only: libmain
+
+phagen$(EXT_SUFFIX): phagen_scf.f phagen.pyf msxas3.inc msxasc3.inc
+	$(F2PY) -c $(F2PYOPTS) -m phagen phagen.pyf phagen_scf.f
+
+clean:
+	rm -f *.so *.o *.exe
--- a/pmsco/calculators/phagen/phagen_scf.f.patch
+++ b/pmsco/calculators/phagen/phagen_scf.f.patch
@ -0,0 +1,102 @@
+--- phagen_scf.orig.f	2019-06-05 16:45:52.977855859 +0200
+++ phagen_scf.f	2019-05-09 16:32:35.790286429 +0200
+@@ -174,6 +174,99 @@
+  1100 format(//,1x,' ** phagen terminated normally ** ',//)
+       end
+ 
+
+c-----------------------------------------------------------------------
+      subroutine libmain(infile,outfile,etcfile)
+c      main calculation routine
+c      entry point for external callers
+c
+c      infile: name of parameter input file
+c
+c      outfile: base name of output files
+c        output files with endings .list, .clu, .pha, .tl, .rad
+c        will be created
+c-----------------------------------------------------------------------
+      implicit real*8 (a-h,o-z)
+c
+      include 'msxas3.inc'
+      include 'msxasc3.inc'
+
+      character*60 infile,outfile,etcfile
+      character*70 listfile,clufile,tlfile,radfile,phafile
+
+c
+c.. constants
+      antoau  = 0.52917715d0
+      pi      = 3.141592653589793d0
+      ev      = 13.6058d0
+      zero    = 0.d0
+c.. threshold for linearity
+      thresh  = 1.d-4
+c.. fortran io units
+      idat = 5
+      iwr = 6
+      iphas = 30
+      iedl0 = 31
+      iwf = 32
+      iof = 17
+
+      iii=LnBlnk(outfile)+1
+      listfile=outfile
+      listfile(iii:)='.list'
+      clufile=outfile
+      clufile(iii:)='.clu'
+      phafile=outfile
+      phafile(iii:)='.pha'
+      tlfile=outfile
+      tlfile(iii:)='.tl'
+      radfile=outfile
+      radfile(iii:)='.rad'
+
+      open(idat,file=infile,form='formatted',status='old')
+      open(iwr,file=listfile,form='formatted',status='unknown')
+      open(10,file=clufile,form='formatted',status='unknown')
+      open(35,file=tlfile,form='formatted',status='unknown')
+      open(55,file=radfile,form='formatted',status='unknown')
+      open(iphas,file=phafile,form='formatted',status='unknown')
+
+      open(iedl0,form='unformatted',status='scratch')
+      open(iof,form='unformatted',status='scratch')
+      open(unit=21,form='unformatted',status='scratch')
+      open(60,form='formatted',status='scratch')
+      open(50,form='formatted',status='scratch')
+      open(unit=13,form='formatted',status='scratch')
+      open(unit=14,form='formatted',status='scratch')
+      open(unit=11,status='scratch')
+      open(unit=iwf,status='scratch')
+      open(unit=33,status='scratch')
+      open(unit=66,status='scratch')
+
+      call inctrl
+      call intit(iof)
+      call incoor
+      call calphas
+
+      close(idat)
+      close(iwr)
+      close(10)
+      close(35)
+      close(55)
+      close(iphas)
+      close(iedl0)
+      close(iof)
+      close(60)
+      close(50)
+      close(13)
+      close(14)
+      close(11)
+      close(iwf)
+      close(33)
+      close(66)
+      close(21)
+
+      endsubroutine
+
+
+       subroutine inctrl
+       implicit real*8 (a-h,o-z)
+       include 'msxas3.inc'
--- a/pmsco/calculators/phagen/runner.py
+++ b/pmsco/calculators/phagen/runner.py
@ -0,0 +1,153 @@
+"""
+@package pmsco.calculators.phagen.runner
+Natoli/Sebilleau PHAGEN interface
+
+this module runs the PHAGEN program to calculate scattering factors and radial matrix element.
+
+@author Matthias Muntwiler
+
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import shutil
+import tempfile
+
+from pmsco.calculators.calculator import AtomicCalculator
+from pmsco.calculators.phagen.phagen import libmain
+from pmsco.calculators.phagen.translator import Translator
+import pmsco.cluster
+
+logger = logging.getLogger(__name__)
+
+
+class PhagenCalculator(AtomicCalculator):
+    """
+    use the PHAGEN program to calculate scattering factors and radial matrix element.
+
+    this produces scatterer, radial matrix element and cluster files for EDAC.
+    """
+
+    def run(self, params, cluster, scan, output_file):
+        """
+        create the input file, run PHAGEN, and translate the output to EDAC format.
+
+        the following files are created in the job work directory:
+        - scattering factor files in EDAC format.
+          their names are `output_file + "_{atomclass}.scat"`.
+        - radial matrix element file in EDAC format.
+          its name is `output_file + ".rme"`.
+        - cluster file in PMSCO format.
+          its name is `output_file + ".clu"`.
+
+        the cluster and params objects are updated and linked to the scattering files
+        so that they can be passed to EDAC without further modification.
+        the radial matrix element is currently not used.
+
+        note that the scattering files are numbered according to the atomic environment and not chemical element.
+        this means that the updated cluster (cluster object or ".clu" file)
+        must be used in the scattering calculation.
+        atomic index is not preserved - atoms in the input and output clusters can only be related by coordinate!
+
+        because PHAGEN generates a lot of files with hard-coded names,
+        the function creates a temporary directory for PHAGEN and deletes it before returning.
+
+        @param params: pmsco.project.Params object.
+            the phase_files attribute is updated with the paths of the scattering files.
+
+        @param cluster: pmsco.cluster.Cluster object.
+            the cluster is updated with the one returned from PHAGEN.
+            the atom classes are linked to the scattering files.
+
+        @param scan: pmsco.project.Scan object.
+            the scan object is used to determine the kinetic energy range.
+
+        @param output_file: base path and name of the output files.
+
+        @return (None, dict) where dict is a list of output files with their category.
+            the category is "atomic" for all output files.
+        """
+        transl = Translator()
+        transl.params.set_params(params)
+        transl.params.set_cluster(cluster)
+        transl.params.set_scan(scan)
+        phagen_cluster = pmsco.cluster.Cluster()
+
+        files = {}
+        prev_wd = os.getcwd()
+        try:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                os.chdir(temp_dir)
+                os.mkdir("div")
+                os.mkdir("div/wf")
+                os.mkdir("plot")
+                os.mkdir("data")
+
+                # prepare input for phagen
+                infile = "phagen.in"
+                outfile = "phagen.out"
+
+                try:
+                    transl.write_input(infile)
+                    report_infile = os.path.join(prev_wd, output_file + ".phagen.in")
+                    shutil.copy(infile, report_infile)
+                    files[report_infile] = "input"
+                except IOError:
+                    logger.warning("error writing phagen input file {fi}.".format(fi=infile))
+
+                # call phagen
+                libmain(infile, outfile)
+
+                # collect results
+                try:
+                    phafile = outfile + ".pha"
+                    transl.parse_phagen_phase(phafile)
+                    report_phafile = os.path.join(prev_wd, output_file + ".phagen.pha")
+                    shutil.copy(phafile, report_phafile)
+                    files[report_phafile] = "output"
+                except IOError:
+                    logger.error("error loading phagen phase file {fi}".format(fi=phafile))
+
+                try:
+                    radfile = outfile + ".rad"
+                    transl.parse_radial_file(radfile)
+                    report_radfile = os.path.join(prev_wd, output_file + ".phagen.rad")
+                    shutil.copy(radfile, report_radfile)
+                    files[report_radfile] = "output"
+                except IOError:
+                    logger.error("error loading phagen radial file {fi}".format(fi=radfile))
+
+                try:
+                    clufile = outfile + ".clu"
+                    phagen_cluster.load_from_file(clufile, pmsco.cluster.FMT_PHAGEN_OUT)
+                except IOError:
+                    logger.error("error loading phagen cluster file {fi}".format(fi=clufile))
+
+        finally:
+            os.chdir(prev_wd)
+
+        # write edac files
+        scatfile = output_file + "_{}.scat"
+        scatfiles = transl.write_edac_scattering(scatfile)
+        params.phase_files = {c: scatfiles[c] for c in scatfiles}
+        files.update({scatfiles[c]: "atomic" for c in scatfiles})
+
+        rmefile = output_file + ".rme"
+        transl.write_edac_emission(rmefile)
+        files[rmefile] = "atomic"
+
+        cluster.update_atoms(phagen_cluster, {'c'})
+        clufile = output_file + ".pmsco.clu"
+        cluster.save_to_file(clufile, pmsco.cluster.FMT_PMSCO)
+        files[clufile] = "cluster"
+
+        return None, files
--- a/pmsco/calculators/phagen/translator.py
+++ b/pmsco/calculators/phagen/translator.py
@ -0,0 +1,411 @@
+"""
+@package pmsco.calculators.phagen.translator
+Natoli/Sebilleau PHAGEN interface
+
+this module provides conversion between input/output files of PHAGEN and EDAC.
+
+@author Matthias Muntwiler
+
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from pmsco.compat import open
+
+## rydberg energy in electron volts
+ERYDBERG = 13.6056923
+
+
+def state_to_edge(state):
+    """
+    translate spectroscopic notation to edge notation.
+
+    @param state: spectroscopic notation: "1s", "2s", "2p1/2", etc.
+    @return: edge notation: "k", "l1", "l2", etc.
+        note: if the j-value is not given, the lower j edge is returned.
+    """
+    jshells = ['s', 'p1/2', 'p3/2', 'd3/2', 'd5/2', 'f5/2', 'f7/2']
+    lshells = [s[0] for s in jshells]
+    shell = int(state[0])
+    try:
+        subshell = jshells.index(state[1:]) + 1
+    except ValueError:
+        subshell = lshells.index(state[1]) + 1
+    except IndexError:
+        subshell = 1
+    edge = "klmnop"[shell-1]
+    if shell > 1:
+        edge += str(subshell)
+    return edge
+
+
+class TranslationParams(object):
+    """
+    project parameters needed for translation.
+
+    energy unit is eV.
+    """
+    def __init__(self):
+        self.initial_state = "1s"
+        self.binding_energy = 0.
+        self.cluster = None
+        self.kinetic_energies = np.empty(0, dtype=np.float)
+
+    @property
+    def l_init(self):
+        return "spdf".index(self.initial_state[1])
+
+    @property
+    def edge(self):
+        return state_to_edge(self.initial_state)
+
+    def set_params(self, params):
+        """
+        set the translation parameters.
+
+        @param params: a pmsco.project.Params object or
+                       a dictionary containing some or all public fields of this class.
+        @return: None
+        """
+        try:
+            self.initial_state = params.initial_state
+            self.binding_energy = params.binding_energy
+        except AttributeError:
+            for key in params:
+                self.__setattr__(key, params[key])
+
+    def set_scan(self, scan):
+        """
+        set the scan parameters.
+
+        @param scan: a pmsco.project.Scan object
+        @return: None
+        """
+        try:
+            energies = scan.energies
+        except AttributeError:
+            try:
+                energies = scan['e']
+            except KeyError:
+                energies = scan
+        if not isinstance(energies, np.ndarray):
+            energies = np.array(energies)
+        self.kinetic_energies = np.resize(self.kinetic_energies, energies.shape)
+        self.kinetic_energies = energies
+
+    def set_cluster(self, cluster):
+        """
+        set the initial cluster.
+
+        @param cluster: a pmsco.cluster.Cluster object
+        @return: None
+        """
+        self.cluster = cluster
+
+
+class Translator(object):
+    """
+    data conversion to/from phagen input/output files.
+
+    usage:
+    1. set the translation parameters self.params.
+    2. call write_input_file to create the phagen input files.
+    3. call phagen on the input file.
+    4. call parse_phagen_phase.
+    5. call parse_radial_file.
+    6. call write_edac_scattering to produce the EDAC scattering matrix files.
+    7. call write_edac_emission to produce the EDAC emission matrix file.
+    """
+    def __init__(self):
+        """
+        initialize the object instance.
+        """
+        self.params = TranslationParams()
+        dt = [('e', 'f4'), ('a', 'i4'), ('l', 'i4'), ('t', 'c16')]
+        self.scattering = np.empty(0, dtype=dt)
+        dt = [('e', 'f4'), ('dw', 'c16'), ('up', 'c16')]
+        self.emission = np.empty(0, dtype=dt)
+
+    def write_cluster(self, f):
+        """
+        write the cluster section of the PHAGEN input file.
+
+        requires a valid pmsco.cluster.Cluster in self.params.cluster.
+
+        @param f: file or output stream (an object with a write method)
+
+        @return: None
+        """
+        for atom in self.params.cluster.data:
+            d = {k: atom[k] for k in atom.dtype.names}
+            f.write("{s} {t} {x} {y} {z}\n".format(**d))
+        f.write("-1 -1 0. 0. 0.\n")
+
+    def write_ionicity(self, f):
+        """
+        write the ionicity section of the PHAGEN input file.
+
+        ionicity is read from the 'q' column of the cluster.
+        all atoms of a chemical element must have the same charge state
+        because ionicity has to be specified per element.
+        this function writes the average of all charge states of an element.
+
+        @param f: file or output stream (an object with a write method)
+
+        @return: None
+        """
+        data = self.params.cluster.data
+        elements = np.unique(data['t'])
+        for element in elements:
+            idx = np.where(data['t'] == element)
+            charge = np.mean(data['q'][idx])
+            f.write("{t} {q}\n".format(t=element, q=charge))
+
+        f.write("-1\n")
+
+    def write_input(self, f):
+        """
+        write the PHAGEN input file.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @return: None
+        """
+        phagen_params = {}
+        phagen_params['emin'] = self.params.kinetic_energies.min() / ERYDBERG
+        phagen_params['emax'] = self.params.kinetic_energies.max() / ERYDBERG
+        phagen_params['delta'] = (phagen_params['emax'] - phagen_params['emin']) / \
+                                 (self.params.kinetic_energies.shape[0] - 1)
+        if phagen_params['delta'] < 0.0001:
+            phagen_params['delta'] = 0.1
+        phagen_params['edge'] = state_to_edge(self.params.initial_state)  # possibly not used
+        phagen_params['edge1'] = 'm4'  # auger not supported
+        phagen_params['edge2'] = 'm4'  # auger not supported
+        phagen_params['cip'] = self.params.binding_energy / ERYDBERG
+        if phagen_params['cip'] < 0.001:
+            raise ValueError("binding energy parameter is zero.")
+
+        if np.sum(np.abs(self.params.cluster.data['q']) >= 0.001) > 0:
+            phagen_params['ionzst'] = 'ionic'
+        else:
+            phagen_params['ionzst'] = 'neutral'
+
+        if hasattr(f, "write"):
+            f.write("&job\n")
+            f.write("calctype='xpd',\n")
+            f.write("coor='angs',\n")
+            f.write("cip={cip},\n".format(**phagen_params))
+            f.write("edge='{edge}',\n".format(**phagen_params))
+            f.write("edge1='{edge1}',\n".format(**phagen_params))
+            f.write("edge2='{edge1}',\n".format(**phagen_params))
+            f.write("gamma=0.03,\n")
+            f.write("lmax_mode=2,\n")
+            f.write("lmaxt=50,\n")
+            f.write("emin={emin},\n".format(**phagen_params))
+            f.write("emax={emax},\n".format(**phagen_params))
+            f.write("delta={delta},\n".format(**phagen_params))
+            f.write("potgen='in',\n")
+            f.write("potype='hedin',\n")
+            f.write("norman='stdcrm',\n")
+            f.write("ovlpfac=0.0,\n")
+            f.write("ionzst='{ionzst}',\n".format(**phagen_params))
+            f.write("charelx='ex',\n")
+            f.write("l2h=4\n")
+            f.write("&end\n")
+            f.write("comment 1\n")
+            f.write("comment 2\n")
+            f.write("\n")
+
+            self.write_cluster(f)
+            self.write_ionicity(f)
+        else:
+            with open(f, "w") as fi:
+                self.write_input(fi)
+
+    def parse_phagen_phase(self, f):
+        """
+        parse the phase output file from PHAGEN.
+
+        the phase file is written to div/phases.dat.
+        it contains the following columns:
+
+        @arg e energy (Ry)
+        @arg x1 unknown 1
+        @arg x2 unknown 2
+        @arg na atom index (1-based)
+        @arg nl angular momentum quantum number l
+        @arg tr real part of the scattering matrix element
+        @arg ti imaginary part of the scattering matrix element
+        @arg ph phase shift
+
+        the data is translated into the self.scattering array.
+
+        @arg e energy (eV)
+        @arg a atom index (1-based)
+        @arg l angular momentum quantum number l
+        @arg t complex scattering matrix element
+
+        @param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
+
+        @return: None
+        """
+        dt = [('e', 'f4'), ('x1', 'f4'), ('x2', 'f4'), ('na', 'i4'), ('nl', 'i4'),
+              ('tr', 'f8'), ('ti', 'f8'), ('ph', 'f4')]
+        data = np.genfromtxt(f, dtype=dt)
+
+        self.scattering = np.resize(self.scattering, data.shape)
+        scat = self.scattering
+        scat['e'] = data['e'] * ERYDBERG
+        scat['a'] = data['na']
+        scat['l'] = data['nl']
+        scat['t'] = data['tr'] + 1j * data['ti']
+
+    def write_edac_scattering(self, filename_format, phases=False):
+        """
+        write scatterer files for EDAC.
+
+        produces one file for each atom class in self.scattering.
+
+        @param filename_format: file name including a placeholder {} for the atom class.
+
+        @param phases: write phase files instead of t-matrix files.
+
+        @return: dictionary that maps atom classes to file names
+        """
+        if phases:
+            write = self.write_edac_phase_file
+        else:
+            write = self.write_edac_scattering_file
+        scat = self.scattering
+        atoms = np.unique(scat['a'])
+        files = {}
+        for atom in atoms:
+            f = filename_format.format(atom)
+            sel = scat['a'] == atom
+            idx = np.where(sel)
+            atom_scat = scat[idx]
+            write(f, atom_scat)
+            files[atom] = f
+
+        return files
+
+    def write_edac_scattering_file(self, f, scat):
+        """
+        write a scatterer file for EDAC.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @param scat: a slice of the self.scattering array belonging to the same atom class.
+
+        @return: None
+        """
+        if hasattr(f, "write"):
+            energies = np.unique(scat['e'])
+            ne = energies.shape[0]
+            lmax = scat['l'].max()
+            if ne == 1:
+                f.write("1 {lmax} regular tl\n".format(lmax=lmax))
+            else:
+                f.write("{nk} E(eV) {lmax} regular tl\n".format(nk=ne, lmax=lmax))
+            for energy in energies:
+                sel = scat['e'] == energy
+                idx = np.where(sel)
+                energy_scat = scat[idx]
+                if ne > 1:
+                    f.write("{0:.3f} ".format(energy))
+                for item in energy_scat:
+                    f.write(" {0:.6f} {1:.6f}".format(item['t'].real, item['t'].imag))
+                for i in range(len(energy_scat), lmax + 1):
+                    f.write(" 0 0")
+                f.write("\n")
+        else:
+            with open(f, "w") as fi:
+                self.write_edac_scattering_file(fi, scat)
+
+    def write_edac_phase_file(self, f, scat):
+        """
+        write a phase file for EDAC.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @param scat: a slice of the self.scattering array belonging to the same atom class.
+
+        @return: None
+        """
+        if hasattr(f, "write"):
+            energies = np.unique(scat['e'])
+            ne = energies.shape[0]
+            lmax = scat['l'].max()
+            if ne == 1:
+                f.write("1 {lmax} regular real\n".format(lmax=lmax))
+            else:
+                f.write("{nk} E(eV) {lmax} regular real\n".format(nk=ne, lmax=lmax))
+            for energy in energies:
+                sel = scat['e'] == energy
+                idx = np.where(sel)
+                energy_scat = scat[idx]
+                if ne > 1:
+                    f.write("{0:.3f} ".format(energy))
+                for item in energy_scat:
+                    f.write(" {0:.6f}".format(np.angle(item['t'])))
+                for i in range(len(energy_scat), lmax + 1):
+                    f.write(" 0")
+                f.write("\n")
+        else:
+            with open(f, "w") as fi:
+                self.write_edac_phase_file(fi, scat)
+
+    def parse_radial_file(self, f):
+        """
+        parse the radial matrix element output file from phagen.
+
+        @param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
+
+        @return: None
+        """
+        dt = [('ar', 'f8'), ('ai', 'f8'), ('br', 'f8'), ('bi', 'f8')]
+        data = np.genfromtxt(f, dtype=dt)
+
+        self.emission = np.resize(self.emission, data.shape)
+        emission = self.emission
+        emission['dw'] = data['ar'] + 1j * data['ai']
+        emission['up'] = data['br'] + 1j * data['bi']
+
+    def write_edac_emission(self, f):
+        """
+        write the radial photoemission matrix element in EDAC format.
+
+        requires self.emission, self.params.kinetic_energies and self.params.initial_state.
+
+        @param f: file path or output stream (an object with a write method).
+
+        @return: None
+        """
+        if hasattr(f, "write"):
+            l0 = self.params.l_init
+            energies = self.params.kinetic_energies
+            emission = self.emission
+            emission['e'] = energies
+            ne = energies.shape[0]
+            if ne == 1:
+                f.write("1 regular2 {l0}\n".format(l0=l0))
+            else:
+                f.write("{nk} E(eV) regular2 {l0}\n".format(nk=ne, l0=l0))
+            for item in emission:
+                if ne > 1:
+                    f.write("{0:.3f} ".format(item['e']))
+                f.write(" {0:.6f} {1:.6f}".format(item['up'].real, item['up'].imag))
+                f.write(" {0:.6f} {1:.6f}".format(item['dw'].real, item['dw'].imag))
+                f.write("\n")
+        else:
+            with open(f, "w") as of:
+                self.write_edac_emission(of)
--- a/pmsco/cluster.py
+++ b/pmsco/cluster.py
@ -1,12 +1,15 @@
+#!/usr/bin/env python
 """
@package pmsco.cluster
-cluster tools for MSC and EDAC
+cluster building and handling

 the Cluster class is provided to facilitate the construction and import/export of clusters.
 a cluster can be built by adding single atoms, layers, or a half-space bulk lattice.
-the class can import from/export to EDAC, MSC, and XYZ cluster files.
+the class can import from/export to various file formats.
 XYZ allows for export to 3D visualizers, e.g. Avogadro.

+the module has a command line interface to convert cluster files.
+
@pre requires the periodictable package (https://pypi.python.org/pypi/periodictable)
@code{.sh}
 pip install --user periodictable
@ -14,7 +17,11 @@ pip install --user periodictable

@author Matthias Muntwiler

-@copyright (c) 2015-18 by Paul Scherrer Institut
+@copyright (c) 2015-19 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
 """

 from __future__ import absolute_import
@ -34,6 +41,12 @@ FMT_MSC = 1
 FMT_EDAC = 2
 ## XYZ file format identifier
 FMT_XYZ = 3
+## PHAGEN output file format identifier
+FMT_PHAGEN_OUT = 4
+## PHAGEN input file format identifier
+FMT_PHAGEN_IN = 5
+## native file format identifier
+FMT_PMSCO = 6

 # python version dependent type of chemical symbol
 if sys.version_info[0] >= 3:
@ -43,11 +56,14 @@ else:

 ## numpy.array datatype of Cluster.data array
 DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
-                          ('e', 'u1')]
+                          ('e', 'u1'), ('q', 'f4'), ('c', 'i4')]
 ## file format of internal Cluster.data array
-FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
+FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%5u", "%7.3f", "%7.3f", "%7.3f", "%1u", "%7.3f"]
 ## field (column) names of internal Cluster.data array
-FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'x', 'y', 'z', 'e']
+FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q']
+## column names for export
+NAMES_CLUSTER_INTERNAL = {'i': 'index', 't': 'element', 's': 'symbol', 'c': 'class', 'x': 'x', 'y': 'y', 'z': 'z',
+                          'e': 'emitter', 'q': 'charge'}

 ## numpy.array datatype of cluster for MSC cluster file input/output
 DTYPE_CLUSTER_MSC = [('i', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('t', 'i4')]
@ -57,11 +73,11 @@ FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
 FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']

 ## numpy.array datatype of cluster for EDAC cluster file input/output
-DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
+DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('c', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
 ## file format of EDAC cluster file
 FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
 ## field (column) names of EDAC cluster file
-FIELDS_CLUSTER_EDAC = ['i', 't', 'x', 'y', 'z']
+FIELDS_CLUSTER_EDAC = ['i', 'c', 'x', 'y', 'z']

 ## numpy.array datatype of cluster for XYZ file input/output
 DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
@ -70,6 +86,44 @@ FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
 ## field (column) names of XYZ cluster file
 FIELDS_CLUSTER_XYZ = ['s', 'x', 'y', 'z']

+## numpy.array datatype of cluster for PHAGEN output file input/output
+DTYPE_CLUSTER_PHAGEN_OUT = [('i', 'i4'), ('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('c', 'i4')]
+## file format of PHAGEN cluster output file
+FMT_CLUSTER_PHAGEN_OUT = ["%5u", "%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%5u"]
+## field (column) names of PHAGEN cluster output file
+FIELDS_CLUSTER_PHAGEN_OUT = ['i', 's', 't', 'x', 'y', 'z', 'c']
+
+## numpy.array datatype of cluster for PHAGEN input file input/output
+DTYPE_CLUSTER_PHAGEN_IN = [('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('q', 'f4')]
+## file format of PHAGEN input file, cluster section
+FMT_CLUSTER_PHAGEN_IN = ["%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%7.3f"]
+## field (column) names of PHAGEN input file, cluster section
+FIELDS_CLUSTER_PHAGEN_IN = ['s', 't', 'x', 'y', 'z', 'q']
+
+## dictionary of supported cluster data types
+CLUSTER_DTYPES = {FMT_DEFAULT: DTYPE_CLUSTER_INTERNAL,
+                  FMT_MSC: DTYPE_CLUSTER_MSC,
+                  FMT_EDAC: DTYPE_CLUSTER_EDAC,
+                  FMT_XYZ: DTYPE_CLUSTER_XYZ,
+                  FMT_PHAGEN_OUT: DTYPE_CLUSTER_PHAGEN_OUT,
+                  FMT_PHAGEN_IN: DTYPE_CLUSTER_PHAGEN_IN}
+
+## dictionary of supported cluster file formats
+CLUSTER_FMTS = {FMT_DEFAULT: FMT_CLUSTER_INTERNAL,
+                FMT_MSC: FMT_CLUSTER_MSC,
+                FMT_EDAC: FMT_CLUSTER_EDAC,
+                FMT_XYZ: FMT_CLUSTER_XYZ,
+                FMT_PHAGEN_OUT: FMT_CLUSTER_PHAGEN_OUT,
+                FMT_PHAGEN_IN: FMT_CLUSTER_PHAGEN_IN}
+
+## dictionary of supported cluster field names
+CLUSTER_FIELDS = {FMT_DEFAULT: FIELDS_CLUSTER_INTERNAL,
+                  FMT_MSC: FIELDS_CLUSTER_MSC,
+                  FMT_EDAC: FIELDS_CLUSTER_EDAC,
+                  FMT_XYZ: FIELDS_CLUSTER_XYZ,
+                  FMT_PHAGEN_OUT: FIELDS_CLUSTER_PHAGEN_OUT,
+                  FMT_PHAGEN_IN: FIELDS_CLUSTER_PHAGEN_IN}
+

 class Cluster(object):
    """
@ -84,6 +138,8 @@ class Cluster(object):
    - t coordinate of the atom position
    - z coordinate of the atom position
    - emitter flag
+    - charge/ionicity
+    - scatterer class
    
    the class also defines methods that add or manipulate atoms of the cluster.
    see most importantly the set_rmax, add_atom, add_layer and add_bulk functions.
@ -126,6 +182,8 @@ class Cluster(object):
    #       @arg @c 'y' (float32) t coordinate of the atom position
    #       @arg @c 'z' (float32) z coordinate of the atom position
    #       @arg @c 'e' (uint8)   1 = emitter, 0 = regular atom
+    #       @arg @c 'q' (float32) charge/ionicity
+    #       @arg @c 'c' (int) scatterer class

    ##  @var comment (str)
    #   one-line comment that can be included in some cluster files
@ -152,6 +210,9 @@ class Cluster(object):
        @param cluster: (Cluster) other Cluster object.
        """
        self.data = cluster.data.copy()
+        self.rmax = cluster.rmax
+        self.dtype = cluster.dtype
+        self.comment = cluster.comment

    def set_rmax(self, r):
        """
@ -166,7 +227,7 @@ class Cluster(object):
        """
        self.rmax = r

-    def build_element(self, index, element_number, x, y, z, emitter):
+    def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer=0):
        """
        build a tuple in the format of the internal data array.
        
@ -177,12 +238,16 @@ class Cluster(object):
        @param x, y, z: (float) atom coordinates in the cluster
        
        @param emitter: (int or bool) True = emitter, False = scatterer
+
+        @param charge: (float) ionicity. default = 0
+
+        @param scatterer: (int) scatterer class. default = 0.
        """
        symbol = pt.elements[element_number].symbol
-        element = (index, element_number, symbol, x, y, z, int(emitter))
+        element = (index, element_number, symbol, x, y, z, int(emitter), charge, scatterer)
        return element

-    def add_atom(self, atomtype, v_pos, is_emitter):
+    def add_atom(self, atomtype, v_pos, is_emitter=False, charge=0.):
        """
        add a single atom to the cluster.
        
@ -191,11 +256,15 @@ class Cluster(object):
        @param v_pos: (numpy.ndarray, shape = (3)) position vector
        
        @param is_emitter: (int or bool) True = emitter, False = scatterer
+
+        @param charge: (float) ionicity. default = 0
+
+        @return array index of added atom
        """
        n0 = self.data.shape[0] + 1
-        element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], int(is_emitter))
-        self.data = np.append(self.data, np.array(element,
-            dtype=self.data.dtype))
+        element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter, charge)
+        self.data = np.append(self.data, np.array(element, dtype=self.data.dtype))
+        return n0 - 1

    def add_layer(self, atomtype, v_pos, v_lat1, v_lat2):
        """
@ -290,19 +359,21 @@ class Cluster(object):
        source = cluster.data.copy()

        if check_rmax and source.shape[0] > 0:
-            source_xyz = source[['x', 'y', 'z']].copy()
-            source_xyz = source_xyz.view((source_xyz.dtype[0], len(source_xyz.dtype.names)))
+            source_xyz = cluster.get_positions()
            b_rmax = np.linalg.norm(source_xyz, axis=1) <= self.rmax
            idx = np.where(b_rmax)
            source = source[idx]
        data = np.append(data, source)

        if check_unique and data.shape[0] > 0:
-            data_xyz = data[['x', 'y', 'z']].copy()
-            data_xyz = data_xyz.view((data_xyz.dtype[0], len(data_xyz.dtype.names)))
-            tol_xyz = np.round(data_xyz / tol)
-            uni_xyz = tol_xyz.view(tol_xyz.dtype.descr * 3)
-            _, idx = np.unique(uni_xyz, return_index=True)
+            data_xyz = np.empty((data.shape[0], 3))
+            data_xyz[:, 0] = data['x']
+            data_xyz[:, 1] = data['y']
+            data_xyz[:, 2] = data['z']
+            tol *= 2
+            uni_xyz = np.round(data_xyz / tol)
+            # this requires numpy 1.13 or later
+            _, idx = np.unique(uni_xyz, return_index=True, axis=0)
            data = data[np.sort(idx)]

        self.data = data
@ -322,8 +393,10 @@ class Cluster(object):
            the returned coordinates may not be identical to any atom coordinate of a layer
            but deviate up to the given tolerance.
        """
-        self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
-        z2 = np.round(self_z.copy() / tol)
+        tol *= 2
+        self_z = np.empty(self.data.shape, np.float32)
+        self_z[:] = self.data['z']
+        z2 = np.round(self_z / tol)
        layers = np.unique(z2) * tol
        return layers

@ -338,7 +411,8 @@ class Cluster(object):
            by default (element = 0), all atoms are moved.
        @return: (numpy.ndarray) indices of the atoms that have been shifted.
        """
-        self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
+        self_z = np.empty(self.data.shape, np.float32)
+        self_z[:] = self.data['z']
        b_z = self_z <= z_cut
        b_all = b_z

@ -434,12 +508,18 @@ class Cluster(object):
        """
        find all atoms which occupy a given position.

-        @param pos: (numpy.array, shape = (3)) position vector.
+        @param pos: position vector.
+            this can be a numpy.ndarray with shape (3)
+            or any type where pos[0] represents the x-coordinate, pos[1] y, and pos[2] z.
        
        @param tol: (float) matching tolerance per coordinate.

        @return numpy.array of indices which match v_pos.
        """
+        if isinstance(pos, np.ndarray):
+            assert pos.shape == (3,)
+        else:
+            pos = np.array((pos[0], pos[1], pos[2]))
        b2 = np.abs(pos - self.get_positions()) < tol
        b1 = np.all(b2, axis=1)
        idx = np.where(b1)
@ -463,8 +543,9 @@ class Cluster(object):
        @return numpy.array of indices which match v_pos.
        """
        pos_xy = pos[0:2]
-        self_xy = self.data[['x', 'y']].copy()
-        self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
+        self_xy = np.empty((self.data.shape[0], 2), np.float32)
+        self_xy[:, 0] = self.data['x']
+        self_xy[:, 1] = self.data['y']
        b_xy = np.linalg.norm(self_xy - pos_xy, axis=1) <= r_xy

        pos_z = pos[2]
@ -497,8 +578,9 @@ class Cluster(object):

        @return: None
        """
-        self_xy = self.data[['x', 'y']].copy()
-        self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
+        self_xy = np.empty((self.data.shape[0], 2), np.float32)
+        self_xy[:, 0] = self.data['x']
+        self_xy[:, 1] = self.data['y']
        b_xy = np.linalg.norm(self_xy, axis=1) <= r_xy

        self_z = self.data['z']
@ -545,8 +627,7 @@ class Cluster(object):

        @return: None
        """
-        self_xyz = self.data[['x', 'y', 'z']].copy()
-        self_xyz = self_xyz.view((self_xyz.dtype[0], len(self_xyz.dtype.names)))
+        self_xyz = self.get_positions()
        b_xyz = np.linalg.norm(self_xyz, axis=1) <= radius
        idx = np.where(b_xyz)
        self.data = self.data[idx]
@ -562,7 +643,8 @@ class Cluster(object):

        @return: None
        """
-        coord = self.data[axis].view(np.float32).reshape(self.data.shape)
+        coord = np.empty(self.data.shape, np.float32)
+        coord[:] = self.data[axis]
        sel = np.abs(coord - center) <= depth / 2
        idx = np.where(sel)
        self.data = self.data[idx]
@ -617,15 +699,17 @@ class Cluster(object):

    def get_positions(self):
        """
-        get an array of the atom coordinates.
+        get the atom coordinates in a two-dimensional array.

        the returned array is an independent copy of the original data.
        changes will not affect the original cluster.
        
        @return numpy.ndarray, shape = (N,3)
        """
-        pos = self.data[['x', 'y', 'z']].copy()
-        pos = pos.view((pos.dtype[0], len(pos.dtype.names)))
+        pos = np.empty((self.data.shape[0], 3), np.float32)
+        pos[:, 0] = self.data['x']
+        pos[:, 1] = self.data['y']
+        pos[:, 2] = self.data['z']
        return pos

    def set_positions(self, positions):
@ -689,14 +773,16 @@ class Cluster(object):
        rec = self.data[index]
        return rec['s']

-    def get_emitters(self):
+    def get_emitters(self, fields):
        """
        get a list of all emitters.
-        
-        @return list of tuples (x, y, z, atomtype)
+
+        @param fields: list of field (column) names to return
+
+        @return list of tuples. each tuple contains the values of the requested fields.
        """
        idx = self.data['e'] != 0
-        ems = self.data[['x', 'y', 'z', 't']][idx]
+        ems = self.data[fields][idx]
        return [tuple(em) for em in ems]

    def get_emitter_count(self):
@ -711,10 +797,22 @@ class Cluster(object):
    def load_from_file(self, f, fmt=FMT_DEFAULT):
        """
        load a cluster from a file created by the scattering program.
+
+        the file formats differ in the columns that they contain.
+        only the 'x', 'y', 'z' coordinates are common to all formats.
+        at least one of the 's' and 't' columns must be present.
+        missing columns are initialized as follows.
+
+        @arg 'i': reset to a 1-based sequential index (@ref update_index).
+        @arg 's': derived from the 't' column (@ref update_symbols).
+        @arg 't': derived from the 's' column (@ref update_atomtypes).
+        @arg 'e': set to 0.
+        @arg 'c': set equal to the 't' column (@ref init_atomclasses).
+        @arg 'q': set to 0.
        
-        @param f (string/handle): path name or open file handle of the cluster file.
+        @param f: path name or open file handle of the cluster file.
        
-        @param fmt (int): file format.
+        @param fmt: file format.
            must be one of the FMT_ constants.
            if FMT_DEFAULT, self.file_format is used.
        
@ -735,12 +833,25 @@ class Cluster(object):
            dtype = DTYPE_CLUSTER_XYZ
            fields = FIELDS_CLUSTER_XYZ
            sh = 2
+        elif fmt == FMT_PHAGEN_OUT:
+            dtype = DTYPE_CLUSTER_PHAGEN_OUT
+            fields = FIELDS_CLUSTER_PHAGEN_OUT
+            sh = 1
+        elif fmt == FMT_PHAGEN_IN:
+            dtype = DTYPE_CLUSTER_PHAGEN_IN
+            fields = FIELDS_CLUSTER_PHAGEN_IN
+            sh = 0
+        elif fmt == FMT_PMSCO:
+            dtype = DTYPE_CLUSTER_INTERNAL
+            fields = FIELDS_CLUSTER_INTERNAL
+            sh = 1
        else:
-            dtype = DTYPE_CLUSTER_XYZ
-            fields = FIELDS_CLUSTER_XYZ
-            sh = 2
+            raise ValueError("unknown file format {}".format(fmt))

        data = np.genfromtxt(f, dtype=dtype, skip_header=sh)
+        if fmt == FMT_PHAGEN_IN and data['t'][-1] < 1:
+            data = data[:-1]
+
        self.data = np.empty(data.shape, dtype=self.dtype)
        self.data['x'] = data['x']
        self.data['y'] = data['y']
@ -753,14 +864,23 @@ class Cluster(object):
            self.data['t'] = data['t']
        if 's' in fields:
            self.data['s'] = data['s']
-        else:
+        elif 't' in fields:
            self.update_symbols()
        if 't' not in fields:
-            self.update_atomtypes()
+            if 's' in fields:
+                self.update_atomtypes()
        if 'e' in fields:
            self.data['e'] = data['e']
        else:
            self.data['e'] = 0
+        if 'c' in fields:
+            self.data['c'] = data['c']
+        else:
+            self.data['c'] = 0
+        if 'q' in fields:
+            self.data['q'] = data['q']
+        else:
+            self.data['q'] = 0.

        pos = self.get_positions()
        # note: np.linalg.norm does not accept axis argument in version 1.7
@ -788,6 +908,35 @@ class Cluster(object):
        for atom in self.data:
            atom['t'] = pt.elements.symbol(atom['s'].strip()).number

+    def init_atomclasses(self, field_or_value='t', default_only=False):
+        """
+        initialize atom classes from atom types.
+
+        atom classes identify the atomic scattering potential or scattering factors
+        to be used in the multiple scattering program.
+
+        if the scattering factors are calculated in the PMSCO process (by EDAC or PHAGEN),
+        the atom classes must be set equal to the element type
+        or left at the default value 0 in which case PMSCO sets the correct values.
+
+        if the scattering factors are loaded from existing files,
+        the atom class corresponds to the key of the pmsco.project.Params.phase_files dictionary.
+        in this case the meaning of the class value is up to the project,
+        and the class must be set either by the cluster generator
+        or the project's after_atomic_scattering hook.
+
+        @param field_or_value: name of a cluster data field, e.g. 't', or an integer constant.
+
+        @param default_only: initialize classes only if they are at their default value (0).
+
+        @return None
+        """
+        if not default_only or np.sum(np.abs(self.data['c'])) == 0:
+            if isinstance(field_or_value, str):
+                self.data['c'] = self.data[field_or_value]
+            else:
+                self.data['c'] = field_or_value
+
    def update_index(self):
        """
        update the index column.
@ -795,10 +944,44 @@ class Cluster(object):
        if you have modified the order or number of elements in the self.data array directly,
        you may need to re-index the atoms if your code uses functions that rely on the index. 
        
-        @return: None 
+        @return None
        """
        self.data['i'] = np.arange(1, self.data.shape[0] + 1)

+    def update_atoms(self, clu, fields):
+        """
+        update atom properties from another cluster.
+
+        this method copies selected fields from another cluster.
+        the other cluster must contain the same atoms (same coordinates) in a possibly random order.
+        the atoms of this and the other cluster are matched up by sorting them by coordinate.
+
+        atomic scattering calculators often change the order of atoms in a cluster based on symmetry,
+        and return atom classes versus atomic coordinates.
+        this method allows to import the atom classes into the original cluster.
+
+        the method checks that the other cluster contains the same number of atoms.
+        it does not check that the clusters contain the same atomic positions.
+        linear translations are acceptable.
+
+        @param clu: cluster.Cluster object
+
+        @param fields: subset of field names out of FIELDS_CLUSTER_INTERNAL.
+            'i', 'x', 'y', 'z' are ignored.
+            the set can be specified in any type that converts into a set of strings.
+
+        @return: None
+
+        @raise AssertError if the clusters do not contain the same number of atoms
+        """
+        assert self.data.shape == clu.data.shape
+        fields = set(fields) - {'i', 'x', 'y', 'z'}
+        common_order = ('z', 'y', 'x')
+        index_self = np.argsort(self.data, order=common_order)
+        index_other = np.argsort(clu.data, order=common_order)
+        for field in fields:
+            self.data[field][index_self] = clu.data[field][index_other]
+
    def save_to_file(self, f, fmt=FMT_DEFAULT, comment="", emitters_only=False):
        """
        save the cluster to a file which can be read by the scattering program.
@ -846,10 +1029,21 @@ class Cluster(object):
            file_format = FMT_CLUSTER_XYZ
            fields = FIELDS_CLUSTER_XYZ
            header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
+        elif fmt == FMT_PHAGEN_IN:
+            file_format = FMT_CLUSTER_PHAGEN_IN
+            fields = FIELDS_CLUSTER_PHAGEN_IN
+            header = None
+        elif fmt == FMT_PHAGEN_OUT:
+            file_format = FMT_CLUSTER_PHAGEN_OUT
+            fields = FIELDS_CLUSTER_PHAGEN_OUT
+            header = ""
+        elif fmt == FMT_PMSCO:
+            file_format = FMT_CLUSTER_INTERNAL
+            fields = FIELDS_CLUSTER_INTERNAL
+            names = NAMES_CLUSTER_INTERNAL
+            header = "# " + " ".join([names[field] for field in fields])
        else:
-            file_format = FMT_CLUSTER_XYZ
-            fields = FIELDS_CLUSTER_XYZ
-            header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
+            raise ValueError("unknown file format {}".format(fmt))

        data = data[fields]
        np.savetxt(f, data, fmt=file_format, header=header, comments="")
@ -996,3 +1190,67 @@ class LegacyClusterGenerator(ClusterGenerator):
        redirect the call to the corresponding project method.
        """
        return self.project.create_cluster(model, index)
+
+
+def parse_cli():
+    """
+    parse the command line
+
+    @return: Namespace object created by the argument parser.
+    """
+    import argparse
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="""
+            cluster conversion
+            """)
+    format_choices = ["PMSCO", "MSC", "EDAC", "XYZ", "PHAGEN_OUT", "PHAGEN_IN"]
+    parser.add_argument('input_format',
+                        choices=format_choices,
+                        help="format of input file")
+    parser.add_argument('input_file',
+                        help="path and name of input file")
+    parser.add_argument('output_format',
+                        choices=format_choices,
+                        help="format of output file")
+    parser.add_argument('output_file',
+                        help="path and name of output file")
+
+    args = parser.parse_args()
+
+    return args
+
+
+def convert_cli(args):
+    """
+    convert cluster files from one format into another
+
+    this function is part of the command line interface
+
+    @param args: command line arguments
+
+    @return: None
+    """
+    clu = Cluster()
+    clu.file_format = FMT_PMSCO
+    input_format = globals()["FMT_" + args.input_format.upper()]
+    output_format = globals()["FMT_" + args.output_format.upper()]
+    clu.load_from_file(args.input_file, input_format)
+    clu.save_to_file(args.output_file, output_format)
+
+
+def main_cli():
+    """
+    command line interface to convert cluster files
+
+    see @ref convert_cli.
+
+    @return: None
+    """
+    args = parse_cli()
+    convert_cli(args)
+
+
+if __name__ == '__main__':
+    main_cli()
+    sys.exit(0)
--- a/pmsco/database.py
+++ b/pmsco/database.py
@ -342,6 +342,53 @@ class ResultsDatabase(object):
        where param_id = :param_id and model_id = :model_id 
        """

+    sql_create_tags = """CREATE TABLE IF NOT EXISTS `Tags` (
+        `id` INTEGER PRIMARY KEY,
+        `key` TEXT NOT NULL UNIQUE COLLATE NOCASE
+        )"""
+    sql_insert_tag = "insert into Tags(key) values (:key)"
+    sql_select_tag = "select key from Tags where id=:id"
+    sql_select_tag_key = "select id, key from Tags where key=:key"
+    sql_select_tag_project = """select distinct key, tag_id from Jobs
+        join JobTags on Jobs.id = JobTags.job_id
+        join Tags on Tags.id = JobTags.tag_id
+        where Jobs.project_id = :project_id
+        order by key collate nocase"""
+    sql_select_tag_job = """select distinct key, tag_id from JobTags
+        join Tags on Tags.id = JobTags.tag_id
+        where JobTags.job_id = :job_id
+        order by key collate nocase"""
+
+    sql_create_jobtags = """CREATE TABLE IF NOT EXISTS `JobTags` (
+        `id` INTEGER PRIMARY KEY,
+        `tag_id` INTEGER NOT NULL,
+        `job_id` INTEGER NOT NULL,
+        `value` TEXT COLLATE NOCASE,
+        FOREIGN KEY(tag_id) REFERENCES Tags(id) ON DELETE CASCADE,
+        FOREIGN KEY(job_id) REFERENCES Jobs(id) ON DELETE CASCADE
+        )"""
+    sql_index_jobtags = """create index if not exists 
+        `index_jobtags` ON `JobTags` 
+        (`tag_id`, `job_id`)"""
+    sql_drop_index_jobtags = "drop index if exists index_jobtags"
+    sql_insert_jobtag = """
+        insert into JobTags(tag_id, job_id, value)
+        values (:tag_id, :job_id, :value)
+        """
+    sql_update_jobtag = """
+        update JobTags set value=:value where id=:jobtag_id
+        """
+    sql_select_jobtag_job = """
+        select key, value from JobTags
+        join Tags on JobTags.tag_id = Tags.id
+        where job_id = :job_id
+        """
+    sql_select_jobtag = """
+        select JobTags.id as id, key, value from JobTags
+        join Tags on JobTags.tag_id = Tags.id
+        where tag_id = :tag_id and job_id = :job_id 
+        """
+
    # @var _conn (sqlite3.Connection).
    # connection interface to the database.
    #
@ -391,6 +438,7 @@ class ResultsDatabase(object):
        self.project_id = 0
        self.job_id = 0
        self._model_params = {}
+        self._tags = {}
        self._lock_filename = ""
        self._lock = None

@ -484,9 +532,12 @@ class ResultsDatabase(object):
            self._conn.execute(self.sql_create_results)
            self._conn.execute(self.sql_create_params)
            self._conn.execute(self.sql_create_paramvalues)
+            self._conn.execute(self.sql_create_tags)
+            self._conn.execute(self.sql_create_jobtags)
            self._conn.execute(self.sql_index_results_tasks)
            self._conn.execute(self.sql_index_results_models)
            self._conn.execute(self.sql_index_paramvalues)
+            self._conn.execute(self.sql_index_jobtags)
            self._conn.execute(self.sql_index_models)

    def register_project(self, name, code):
@ -583,6 +634,46 @@ class ResultsDatabase(object):
            param_dict = {'job_id': job_id}
            self._conn.execute(self.sql_delete_job, param_dict)

+    def _query_job_name(self, job_name, project_id=0):
+        """
+        (internal) query a job by name
+
+        this is the internal analog of @ref query_job_name
+        which asserts an acquired lock and open connection.
+
+        @param job_name: name of the job
+
+        @param project_id: project identifier.
+            by default, the current project self.project_id is used.
+
+        @return: id value of the job in the database
+
+        @raise DatabaseError if the job can't be found.
+        """
+        if project_id == 0:
+            project_id = self.project_id
+        param_dict = {'project_id': project_id, 'name': job_name}
+        c = self._conn.execute(self.sql_select_job_name, param_dict)
+        v = c.fetchone()
+        return v[0]
+
+    def query_job_name(self, job_name, project_id=0):
+        """
+        query a job by name
+
+        @param job_name: name of the job
+
+        @param project_id: project identifier.
+            by default, the current project self.project_id is used.
+
+        @return: id value of the job in the database
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            job_id = self._query_job_name(job_name, project_id=project_id)
+
+        return job_id
+
    def register_param(self, key):
        """
        register a parameter key with the database.
@ -681,6 +772,165 @@ class ResultsDatabase(object):

        return params

+    def register_tag(self, key):
+        """
+        register a tag with the database.
+
+        tags are a way of structuring a job description.
+        they can be used to, for instance, distinguish calculations made with different clusters,
+        different experimental data, etc.
+        a job tag has a key and a value, and is associated to a job.
+        the use of tags is up to the user. pmsco does not change or read them.
+
+        each tag name must be registered once before a value can be written to the database.
+        see the class description for an explanation.
+
+        @param key: key (name) of the tag.
+
+        @return: id value of the tag in the database.
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            return self._register_tag(key)
+
+    def _register_tag(self, key):
+        """
+        register a tag with the database without committing the transaction.
+
+        @note this method does not lock the database file and does not commit.
+            to lock the database and commit the transaction, call the public method register_tag().
+
+        @param key: key (name) of the tag.
+
+        @return: id value of the tag in the database.
+        """
+        c = self._conn.execute(self.sql_select_tag_key, {'key': key})
+        v = c.fetchone()
+        if v:
+            tag_id = v[0]
+        else:
+            c = self._conn.execute(self.sql_insert_tag, {'key': key})
+            tag_id = c.lastrowid
+        self._tags[key] = tag_id
+        return tag_id
+
+    def register_tags(self, tags):
+        """
+        register the tags of this project with the database.
+
+        each tag name must be registered once before a value can be written to the database.
+        see the class description for an explanation.
+
+        @param tags: sequence of tag keys, or dictionary of tags.
+        @return: None
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            for key in tags:
+                self._register_tag(key)
+
+    def query_tags(self, project_id=0, job_id=0, update_registry=False):
+        """
+        query a list of tag keys used in a project or job.
+
+        optionally, the local registry can be updated with the results of the query.
+        this should be done if the database is read only and the client does not know the tag names.
+        see the class description for a description of the registry.
+
+        @note this method returns the tags that are used with jobs in the database.
+            if you have registered additional tags but not attached them to jobs,
+            this method will _not_ list them.
+
+        @param project_id: project identifier.
+            by default, the current project self.project_id is used.
+
+        @param job_id: job identifier.
+            by default, all jobs of the selected project are included in the query.
+            if a job is specified, the project_id parameter is ignored.
+
+        @param update_registry: update the local tags registry (self._tags).
+            with the query results.
+
+        @return: dictionary of tags.
+            the keys are the tag names, the values are the tag ids in the database.
+        """
+        if project_id == 0:
+            project_id = self.project_id
+        if job_id == 0:
+            sql = self.sql_select_tag_project
+            args = {'project_id': project_id}
+        else:
+            sql = self.sql_select_tag_job
+            args = {'job_id': job_id}
+
+        tags = {}
+        self.check_connection()
+        with self._lock, self._conn:
+            c = self._conn.execute(sql, args)
+            for row in c:
+                tags[row['key']] = row['tag_id']
+
+        if update_registry:
+            self._tags.update(tags)
+
+        return tags
+
+    def query_job_tags(self, job_id):
+        """
+        query a list of tags (keys and values) associated with a job.
+
+        @param job_id: job identifier.
+
+        @return: dictionary of tags.
+            the keys are the tag names, the values are the tag values.
+        """
+        sql = self.sql_select_jobtag_job
+        args = {'job_id': job_id}
+
+        tags = {}
+        self.check_connection()
+        with self._lock, self._conn:
+            c = self._conn.execute(sql, args)
+            for row in c:
+                tags[row['key']] = row['value']
+
+        return tags
+
+    def insert_jobtags(self, job_id, tags):
+        """
+        add or update job tags in the database.
+
+        the method updates the JobTags table.
+
+        @param job_id: (int) primary key of the job entry in the Jobs table.
+            the entry must exist.
+
+        @param tags: (dict) dictionary containing the tags.
+            keys are matched or added to the Tags table,
+            values are added to the JobTags table and linked to the job and tag key.
+
+        @return: None
+        """
+        self.check_connection()
+        with self._lock, self._conn:
+            for key, value in tags.items():
+                try:
+                    tag_id = self._tags[key]
+                except KeyError:
+                    tag_id = self._register_tag(key)
+                    v = None
+                else:
+                    jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
+                    c = self._conn.execute(self.sql_select_jobtag, jobtag_entry)
+                    v = c.fetchone()
+
+                if v:
+                    jobtag_entry = {'jobtag_id': v[0], 'tag_id': tag_id, 'job_id': job_id, 'value': value}
+                    self._conn.execute(self.sql_update_jobtag, jobtag_entry)
+                else:
+                    jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
+                    self._conn.execute(self.sql_insert_jobtag, jobtag_entry)
+
    def create_models_view(self, job_id=0, temporary=False):
        """
        create a flat (pivot) view of model parameters of the current project or job.
@ -878,7 +1128,7 @@ class ResultsDatabase(object):
            results = c.fetchall()

            names = [desc[0] for desc in c.description]
-            dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names)])
+            dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names, key=str.lower)])
            out_array = np.zeros((count,), dtype=dt)
            for idx, row in enumerate(results):
                for name in names:
@ -942,6 +1192,70 @@ class ResultsDatabase(object):

        return out_array

+    def query_best_models_per_jobs(self, job_ids=None, task_level='model'):
+        """
+        return the best model (by rfac) of each selected job
+
+        the query gathers the R-factors of the selected jobs at the selected task levels
+        and, for each job, returns the (database) model id where the lowest R-factor is reported
+        among the gathered results.
+
+        this can be useful if you want to compile a report of the best model per job.
+
+        @param job_ids: iterable of job ids to include in the query.
+            the job ids must belong to the current project.
+            if empty or non-specified, all jobs of the current project are included.
+
+        @param task_level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
+            deepest task_level to include in the query.
+            results on deeper levels are not considered.
+            e.g. if you pass 'scan', R-factors of individual scans are included in the query.
+            note that including deeper levels will not increase the number of results returned.
+
+        @return sequence of model_id.
+            the number of results corresponds to the number of jobs in the filter scope.
+            to find out details of the models, execute another query that filters on these model ids.
+
+        the method produces an SQL query similar to:
+        @code{.sql}
+        select Models.id from Models
+        join Results on Models.id = Results.model_id
+        join Jobs on Models.job_id = Jobs.id
+        where scan=-1
+        and project_id=1
+        and job_id in (1,2,3)
+        group by Models.job_id
+        having min(rfac)
+        order by rfac
+        @endcode
+        """
+
+        try:
+            level = dispatch.CALC_LEVELS.index(task_level) + 1
+        except ValueError:
+            level = task_level + 1
+        try:
+            level_name = dispatch.CALC_LEVELS[level]
+        except IndexError:
+            level_name = dispatch.CALC_LEVELS[4]
+
+        self.check_connection()
+        with self._lock, self._conn:
+            sql = "select Models.id from Models "
+            sql += "join Results on Models.id = Results.model_id "
+            sql += "join Jobs on Models.job_id = Jobs.id "
+            sql += "where project_id = {0} ".format(self.project_id)
+            sql += "and {0} = -1 ".format(level_name)
+            if job_ids:
+                sql += "and Models.job_id in ({0}) ".format(",".join(map(str, job_ids)))
+            sql += "group by Models.job_id "
+            sql += "having min(rfac) "
+            sql += "order by rfac, job_id, model, scan, sym, emit, region "
+            c = self._conn.execute(sql)
+            models = [row['id'] for row in c]
+
+        return models
+
    def query_tasks(self, job_id=0):
        """
        query the task index used in a calculation job.
@ -1213,13 +1527,18 @@ class ResultsDatabase(object):

        data = np.genfromtxt(filename, names=True)
        self.register_params(data.dtype.names)
-        unique_models, unique_index = np.unique(data['_model'], True)
+        try:
+            unique_models, unique_index = np.unique(data['_model'], True)
+        except ValueError:
+            unique_models = np.array([0])
+            unique_index = np.array([0])
        unique_data = data[unique_index]
        model_ids = {}

        def model_entry_generator():
            for result in unique_data:
                model_entry = {'job_id': job_id,
+                               'model': unique_models[0],
                               'gen': None,
                               'particle': None}
                model_entry.update(special_params(result))
@ -1227,7 +1546,11 @@ class ResultsDatabase(object):

        def result_entry_generator():
            for result in data:
-                result_entry = {'model_id': model_ids[result['_model']],
+                try:
+                    model = result['_model']
+                except ValueError:
+                    model = unique_models[0]
+                result_entry = {'model_id': model_ids[model],
                                'scan': -1,
                                'sym': -1,
                                'emit': -1,
@ -1238,8 +1561,12 @@ class ResultsDatabase(object):

        def param_entry_generator():
            for result in unique_data:
+                try:
+                    model = result['_model']
+                except ValueError:
+                    model = unique_models[0]
                for key, value in regular_params(result).items():
-                    param_entry = {'model_id': model_ids[result['_model']],
+                    param_entry = {'model_id': model_ids[model],
                                   'param_id': self._model_params[key],
                                   'value': value}
                    yield param_entry
--- a/pmsco/dispatch.py
+++ b/pmsco/dispatch.py
@ -227,7 +227,7 @@ class CalculationTask(object):
    #  files generated by the task and their category
    #
    #  dictionary key is the file name,
-    #  value is the file category, e.g. 'cluster', 'phase', etc.
+    #  value is the file category, e.g. 'cluster', 'atomic', etc.
    #
    #  this information is used to automatically clean up unnecessary data files.

@ -374,7 +374,7 @@ class CalculationTask(object):
        this information is used to automatically clean up unnecessary data files.

        @param name: file name (optionally including a path).
-        @param category: file category, e.g. 'cluster', 'phase', etc.
+        @param category: file category, e.g. 'cluster', 'atomic', etc.
        @return: None
        """
        self.files[name] = category
@ -521,7 +521,8 @@ class MscoProcess(object):
    def __init__(self, comm):
        self._comm = comm
        self._project = None
-        self._calculator = None
+        self._atomic_scattering = None
+        self._multiple_scattering = None
        self._running = False
        self._finishing = False
        self.stop_signal = False
@ -529,7 +530,8 @@ class MscoProcess(object):

    def setup(self, project):
        self._project = project
-        self._calculator = project.calculator_class()
+        self._atomic_scattering = project.atomic_scattering_factory()
+        self._multiple_scattering = project.multiple_scattering_factory()
        self._running = False
        self._finishing = False
        self.stop_signal = False
@ -596,19 +598,18 @@ class MscoProcess(object):
        scan = self._define_scan(task)
        output_file = task.format_filename(ext="")

-        # check parameters and call the msc program
-        if clu.get_atom_count() < 2:
+        # check parameters and call the calculators
+        if clu.get_atom_count() >= 1:
+            self._calc_atomic(task, par, clu, scan, output_file)
+        else:
            logger.error("empty cluster in calculation %s", s_id)
            task.result_valid = False
-        elif clu.get_emitter_count() < 1:
+
+        if clu.get_emitter_count() >= 1:
+            self._calc_multiple(task, par, clu, scan, output_file)
+        else:
            logger.error("no emitters in cluster of calculation %s.", s_id)
            task.result_valid = False
-        else:
-            task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
-            (root, ext) = os.path.splitext(task.result_filename)
-            task.file_ext = ext
-            task.result_valid = True
-            task.files.update(files)

        task.time = datetime.datetime.now() - start_time

@ -666,6 +667,8 @@ class MscoProcess(object):
        """
        nem = self._project.cluster_generator.count_emitters(task.model, task.id)
        clu = self._project.cluster_generator.create_cluster(task.model, task.id)
+        # overwrite atom classes only if they are at their default value
+        clu.init_atomclasses(field_or_value='t', default_only=True)

        if task.id.region == 0:
            file_index = task.id._replace(region=-1)
@ -696,6 +699,59 @@ class MscoProcess(object):

        return par

+    def _calc_atomic(self, task, par, clu, scan, output_file):
+        """
+        calculate the atomic scattering factors if necessary and link them to the cluster.
+
+        the method first calls the `before_atomic_scattering` project hook,
+        the atomic scattering calculator,
+        and finally the `after_atomic_scattering` hook.
+        this process updates the par and clu objects to link to the created files.
+        if any of the functions returns None, the par and clu objects are left unchanged.
+
+        @param task: CalculationTask with all attributes set for the calculation.
+
+        @param par: pmsco.project.Params object for the calculator.
+            its phase_files attribute is updated with the created scattering files.
+            the radial matrix elements are not changed (but may be in a future version).
+
+        @param clu: pmsco.cluster.Cluster object for the calculator.
+            the cluster is overwritten with the one returned by the calculator,
+            so that atom classes match the phase_files.
+
+        @return: None
+        """
+        _par = copy.deepcopy(par)
+        _clu = copy.deepcopy(clu)
+
+        _par, _clu = self._project.before_atomic_scattering(task, _par, _clu)
+        if _clu is not None:
+            filename, files = self._atomic_scattering.run(_par, _clu, scan, output_file)
+            if files:
+                task.files.update(files)
+
+                _par, _clu = self._project.after_atomic_scattering(task, _par, _clu)
+                if _clu is not None:
+                    par.phase_files = _par.phase_files
+                    clu.copy_from(_clu)
+
+    def _calc_multiple(self, task, par, clu, scan, output_file):
+        """
+        calculate the multiple scattering intensity.
+
+        @param task: CalculationTask with all attributes set for the calculation.
+        @param par: pmsco.project.Params object for the calculator.
+        @param clu: pmsco.cluster.Cluster object for the calculator.
+        @return: None
+        """
+        task.result_filename, files = self._multiple_scattering.run(par, clu, scan, output_file)
+        if task.result_filename:
+            (root, ext) = os.path.splitext(task.result_filename)
+            task.file_ext = ext
+            task.result_valid = True
+        if files:
+            task.files.update(files)
+

 class MscoMaster(MscoProcess):
    """
@ -1025,19 +1081,19 @@ class MscoMaster(MscoProcess):
        @return: self._finishing
        """
        if not self._finishing and (self._model_done and not self._pending_tasks and not self._running_tasks):
-            logger.info("finish: model handler is done")
+            logger.warning("finish: model handler is done")
            self._finishing = True
        if not self._finishing and (self._calculations >= self.max_calculations):
            logger.warning("finish: max. calculations (%u) exeeded", self.max_calculations)
            self._finishing = True
        if not self._finishing and self.stop_signal:
-            logger.info("finish: stop signal received")
+            logger.warning("finish: stop signal received")
            self._finishing = True
        if not self._finishing and (datetime.datetime.now() > self.datetime_limit):
            logger.warning("finish: time limit exceeded")
            self._finishing = True
        if not self._finishing and os.path.isfile("finish_pmsco"):
-            logger.info("finish: finish_pmsco file detected")
+            logger.warning("finish: finish_pmsco file detected")
            self._finishing = True

        if self._finishing and not self._running_slaves and not self._running_tasks:
--- a/pmsco/files.py
+++ b/pmsco/files.py
@ -27,9 +27,9 @@ logger = logging.getLogger(__name__)
 #
 # each string of this set marks a category of files.
 #
-# @arg @c 'input' :     raw input files for calculator, including cluster and phase files in custom format
+# @arg @c 'input' :     raw input files for calculator, including cluster and atomic files in custom format
 # @arg @c 'output' :    raw output files from calculator
-# @arg @c 'phase' :     phase files in portable format for report
+# @arg @c 'atomic' :    atomic scattering (phase, emission) files in portable format
 # @arg @c 'cluster' :   cluster files in portable XYZ format for report
 # @arg @c 'log' :       log files
 # @arg @c 'debug' :     debug files
@ -47,7 +47,7 @@ logger = logging.getLogger(__name__)
 # the string is used only to specify whether bad models should be deleted or not.
 # if so, all files related to bad models are deleted, regardless of their static category.
 #
-FILE_CATEGORIES = {'cluster', 'phase', 'input', 'output',
+FILE_CATEGORIES = {'cluster', 'atomic', 'input', 'output',
                   'report', 'region', 'emitter', 'scan', 'symmetry', 'model',
                   'log', 'debug', 'population', 'rfac'}

--- a/pmsco/graphics/scan.py
+++ b/pmsco/graphics/scan.py
@ -36,7 +36,7 @@ except ImportError:
    logger.warning("error importing matplotlib. graphics rendering disabled.")


-def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
+def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False, ref_data=None):
    """
    produce a graphics file from a one-dimensional scan file.

@ -47,14 +47,22 @@ def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):

    @param filename: path and name of the scan file.
        this is used to derive the output file path by adding the extension of the graphics file format.
+
    @param data: numpy-structured array of EI, ETPI or ETPAI data.
+
    @param scan_mode: list containing the field name of the scanning axis of the data array.
        it must contain one element exactly.
+
    @param canvas: a FigureCanvas class reference from a matplotlib backend.
        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
+
    @param is_modf: whether data contains a modulation function (True) or intensity (False, default).
        this parameter is used to set axis labels.

+    @param ref_data: numpy-structured array of EI, ETPI or ETPAI data.
+        this is reference data (e.g. experimental data) that should be plotted with the main dataset.
+        both datasets will be plotted on the same axis and should have similar data range.
+
    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.

@ -66,6 +74,8 @@ def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
    canvas(fig)

    ax = fig.add_subplot(111)
+    if ref_data is not None:
+        ax.plot(ref_data[scan_mode[0]], ref_data['i'], 'k.')
    ax.plot(data[scan_mode[0]], data['i'])

    ax.set_xlabel(scan_mode[0])
@ -225,7 +235,7 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
    return out_filename


-def render_scan(filename, data=None):
+def render_scan(filename, data=None, ref_data=None):
    """
    produce a graphics file from a scan file.

@ -248,6 +258,11 @@ def render_scan(filename, data=None):
    @param data: numpy-structured array of ETPI or ETPAI data.
        if this argument is omitted, the data is loaded from the file referenced by the filename argument.

+    @param ref_data: numpy-structured array of ETPI or ETPAI data.
+        this is reference data (e.g. experimental data) that should be plotted with the main dataset.
+        this is supported for 1d scans only.
+        both datasets will be plotted on the same axis and should have similar data range.
+
    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.
    """
@ -258,7 +273,7 @@ def render_scan(filename, data=None):

    try:
        if len(scan_mode) == 1:
-            out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf)
+            out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf, ref_data=ref_data)
        elif len(scan_mode) == 2 and 'e' in scan_mode:
            out_filename = render_ea_scan(filename, data, scan_mode, is_modf=is_modf)
        elif len(scan_mode) == 2 and 't' in scan_mode and 'p' in scan_mode:
--- a/pmsco/handlers.py
+++ b/pmsco/handlers.py
@ -344,7 +344,6 @@ class SingleModelHandler(ModelHandler):
        modf_ext = ".modf" + parent_task.file_ext
        parent_task.modf_filename = parent_task.file_root + modf_ext

-        assert not math.isnan(task.rfac)
        self.result = task.model.copy()
        self.result['_rfac'] = task.rfac

@ -629,7 +628,8 @@ class SymmetryHandler(TaskHandler):
                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
-                graph_file = mgs.render_scan(parent_task.modf_filename)
+                graph_file = mgs.render_scan(parent_task.modf_filename,
+                                             ref_data=self._project.scans[parent_task.id.scan].modulation)
                self._project.files.add_file(graph_file, parent_task.id.model, 'scan')

            del self._pending_ids_per_parent[parent_task.id]
@ -752,7 +752,8 @@ class EmitterHandler(TaskHandler):
                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
-                graph_file = mgs.render_scan(parent_task.modf_filename)
+                graph_file = mgs.render_scan(parent_task.modf_filename,
+                                             ref_data=self._project.scans[parent_task.id.scan].modulation)
                self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')

            del self._pending_ids_per_parent[parent_task.id]
--- a/pmsco/igor.py
+++ b/pmsco/igor.py
@ -0,0 +1,143 @@
+"""
+@package pmsco.igor
+data exchange with wavemetrics igor pro.
+
+this module provides functions for loading/saving pmsco data in igor pro.
+
+@author Matthias Muntwiler
+
+@copyright (c) 2019 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from pmsco.compat import open
+
+
+def _escape_igor_string(s):
+    s = s.replace('\\', '\\\\')
+    s = s.replace('"', '\\"')
+    return s
+
+
+def namefix_double(name):
+    """
+    fix 1-character wave name by doubling
+
+    replaces length-1 string by a doubled version.
+
+    @param name: (str) proposed wave name
+
+    @return: corrected name
+    """
+    return name*2 if len(name) == 1 else name
+
+
+def namefix_etpais(name):
+    """
+    fix 1-character wave name according to ETPAIS scheme
+
+    replaces 'e' by 'en' etc.
+
+    @param name: (str) proposed wave name
+
+    @return: corrected name
+    """
+    name_map = {'e': 'en', 't': 'th', 'p': 'ph', 'i': 'in', 'm': 'mo', 's': 'si'}
+    try:
+        return name_map[name]
+    except KeyError:
+        return name
+
+
+class IgorExport(object):
+    """
+    class exports pmsco data to an Igor text (ITX) file.
+
+    usage:
+    1) create an object instance.
+    2) set @ref data.
+    3) set optional attributes: @ref prefix and @ref namefix.
+    4) call @ref export.
+    """
+
+    def __init__(self):
+        super(IgorExport, self).__init__()
+        self.data = None
+        self.prefix = ""
+        self.namefix = namefix_double
+
+    def set_data(self, data):
+        """
+        set the data array to export.
+
+        this must (currently) be a one-dimensional structured array.
+        the column names will become wave names.
+
+        @param data: numpy.ndarray
+        @return:
+        """
+        self.data = data
+
+    def export(self, filename):
+        """
+        write to igor file.
+        """
+        with open(filename, 'w') as f:
+            self._write_header(f)
+            self._write_data(f)
+
+    def _fix_name(self, name):
+        """
+        fix a wave name.
+
+        this function first applies @ref namefix and @ref prefix to the proposed wave name.
+
+        @param name: (str) proposed wave name
+
+        @return: corrected name
+        """
+        if self.namefix is not None:
+            name = self.namefix(name)
+        return self.prefix + name
+
+    def _write_header(self, f):
+        """
+        write the header of the igor text file
+
+        @param f: open file or stream
+
+        @return: None
+        """
+        f.write('IGOR' + '\n')
+        f.write('X // pmsco data export\n')
+
+    def _write_data(self, f):
+        """
+        write a data section to the igor text file.
+
+        @param f: open file or stream
+
+        @return: None
+        """
+        assert isinstance(self.data, np.ndarray)
+        assert len(self.data.shape) == 1
+        assert len(self.data.dtype.names[0]) >= 1
+
+        arr = self.data
+        shape = ",".join(map(str, arr.shape))
+        names = (self._fix_name(name) for name in arr.dtype.names)
+        names = ", ".join(names)
+
+        f.write('Waves/O/D/N=({shape}) {names}\n'.format(shape=shape, names=names))
+        f.write('BEGIN\n')
+        np.savetxt(f, arr, fmt='%g')
+        f.write('END\n')
--- a/pmsco/makefile
+++ b/pmsco/makefile
@ -1,15 +1,16 @@
 SHELL=/bin/sh

-# makefile for EDAC, MSC, and MUFPOT programs and modules
+# makefile for external programs and modules
 #
 # see the top-level makefile for additional information.

-.PHONY: all clean edac loess msc mufpot
+.PHONY: all clean edac loess msc mufpot phagen

 EDAC_DIR = edac
 MSC_DIR = msc
 MUFPOT_DIR = mufpot
 LOESS_DIR = loess
+PHAGEN_DIR = calculators/phagen

 all: edac loess

@ -25,9 +26,13 @@ msc:
 mufpot:
 	$(MAKE) -C $(MUFPOT_DIR)

+phagen:
+	$(MAKE) -C $(PHAGEN_DIR)
+
 clean:
 	$(MAKE) -C $(EDAC_DIR) clean
 	$(MAKE) -C $(LOESS_DIR) clean
 	$(MAKE) -C $(MSC_DIR) clean
 	$(MAKE) -C $(MUFPOT_DIR) clean
+	$(MAKE) -C $(PHAGEN_DIR) clean
 	rm -f *.pyc
--- a/pmsco/optimizers/grid.py
+++ b/pmsco/optimizers/grid.py
@ -344,9 +344,14 @@ class GridSearchHandler(handlers.ModelHandler):
        time_pending += self._model_time
        if time_pending > time_avail:
            self._timeout = True
+            logger.warning("time limit reached")
+
+        if self._invalid_count > self._invalid_limit:
+            self._timeout = True
+            logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)

        model = self._next_model
-        if not self._timeout and model < self._pop.model_count and self._invalid_count < self._invalid_limit:
+        if not self._timeout and model < self._pop.model_count:
            new_task = parent_task.copy()
            new_task.parent_id = parent_id
            pos = self._pop.positions[model]
--- a/pmsco/optimizers/population.py
+++ b/pmsco/optimizers/population.py
@ -491,9 +491,9 @@ class Population(object):
        seed = np.genfromtxt(seed_file, names=True)
        try:
            seed = seed[seed['_rfac'] <= rfac_limit]
-        except KeyError:
-            logger.warning(BMsg("missing _rfac column in seed file {hf}. ignoring seed file.", hf=seed_file))
-            return 0
+        except ValueError:
+            recalc_seed = True
+            logger.warning(BMsg("missing _rfac column in seed file {hf}. re-calculating.", hf=seed_file))
        else:
            seed.sort(order='_rfac')
        seed_size = min(seed.shape[0], count_limit)
@ -508,9 +508,12 @@ class Population(object):
        if len(common_fields) < len(dest_fields):
            logger.warning(BMsg("missing columns in seed file {hf}.", hf=seed_file))

-        logger.info(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))
+        logger.warning(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))

-        self.pos['_rfac'][first:last] = seed['_rfac']
+        try:
+            self.pos['_rfac'][first:last] = seed['_rfac']
+        except ValueError:
+            self.pos['_rfac'][first:last] = 2.1
        dest_index = np.arange(first, last)
        for name in common_fields:
            sel1 = np.less_equal(self.model_min[name], seed[name])
@ -525,11 +528,11 @@ class Population(object):
        self.pos['_model'][first:last] = np.arange(seed_size) + first
        if recalc_seed:
            self.pos['_rfac'][first:last] = 2.1
-            logger.info("models from seed file are re-calculated.")
+            logger.warning("models from seed file are re-calculated.")
        else:
            sel = self.pos['_rfac'][first:last] <= rfac_limit
            self.pos['_gen'][dest_index[sel]] = -1
-            logger.info(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))
+            logger.warning(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))

        return seed_size

@ -579,7 +582,7 @@ class Population(object):
        if len(common_fields) < len(dest_fields):
            logger.warning(BMsg("loaded patch file {pf}. some columns are missing.", pf=patch_file))
        else:
-            logger.info(BMsg("loaded patch file {pf}.", pf=patch_file))
+            logger.warning(BMsg("loaded patch file {pf}.", pf=patch_file))

    def _apply_patch(self):
        """
@ -592,7 +595,7 @@ class Population(object):
        parameter values that lie outside the parameter domain (min/max) are ignored.
        """
        if self.pos_patch is not None:
-            logger.info(BMsg("patching the population with new positions."))
+            logger.warning(BMsg("patching generation {gen} with new positions.", gen=self.generation))
            source_fields = set(self.pos_patch.dtype.names)
            dest_fields = set(self.model_start.keys())
            common_fields = source_fields & dest_fields
@ -996,10 +999,10 @@ class Population(object):

        # rewrite model, tolerance and results as two-dimensional array
        if search_array is None:
-            results = self.results[names].copy()
-        else:
-            results = search_array[names].copy()
-        results = results.view((results.dtype[0], len(names)))
+            search_array = self.results
+        results = np.empty((search_array.shape[0], len(names)))
+        for col, name in enumerate(names):
+            results[:, col] = search_array[name]
        model = np.asarray(model_tuple, results.dtype)
        tol = np.asarray([max(abs(self.model_max[name]), abs(self.model_min[name]), precision)
                          for name in names])
@ -1169,8 +1172,9 @@ class PopulationHandler(handlers.ModelHandler):
        """
        initialize the particle swarm and open an output file.

-        the population size is set to project.optimizer_params.['pop_size'] if it is defined and greater than 4.
-        otherwise, it defaults to <code>max(2 * slots, 4)</code>.
+        the population size is set to `project.optimizer_params['pop_size']`
+        if it is defined and greater than 4.
+        otherwise, it defaults to `max(slots, 4)`.

        for good efficiency the population size (number of particles) should be
        greater or equal to the number of available processing slots,
@ -1191,7 +1195,9 @@ class PopulationHandler(handlers.ModelHandler):
        super(PopulationHandler, self).setup(project, slots)

        _min_size = 4
-        self._pop_size = max(project.optimizer_params.get('pop_size', self._slots * 2), _min_size)
+        _def_size = self._slots
+        _req_size = project.optimizer_params.get('pop_size', 0)
+        self._pop_size = _req_size if _req_size >= _min_size else _def_size
        self.setup_population()
        self._invalid_limit = self._pop_size * 10

@ -1228,7 +1234,11 @@ class PopulationHandler(handlers.ModelHandler):
        because the best peer position in the generation may not be known yet.
        the effect can be reduced by making the population larger than the number of processes.

-        @return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
+        the created tasks are returned as the function result and added to self._pending_tasks.
+
+        @return list of generated tasks.
+            empty list if the optimization has converged (see Population.is_converged())
+            or if the time limit is approaching.
        """

        super(PopulationHandler, self).create_tasks(parent_task)
@ -1241,7 +1251,7 @@ class PopulationHandler(handlers.ModelHandler):
        time_pending = self._model_time * len(self._pending_tasks)
        time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)

-        out_tasks = []
+        new_tasks = []
        if not self._timeout and not self._converged:
            self._check_patch_file()
            self._pop.advance_population()
@ -1250,7 +1260,8 @@ class PopulationHandler(handlers.ModelHandler):
                time_pending += self._model_time
                if time_pending > time_avail:
                    self._timeout = True
-                    logger.info("time limit reached")
+                    logger.warning("time limit reached")
+                    new_tasks = []
                    break

                if pos['_gen'] >= 0:
@ -1258,12 +1269,12 @@ class PopulationHandler(handlers.ModelHandler):
                    new_task.parent_id = parent_id
                    new_task.model = pos
                    new_task.change_id(model=pos['_model'])
+                    new_tasks.append(new_task)

-                    child_id = new_task.id
-                    self._pending_tasks[child_id] = new_task
-                    out_tasks.append(new_task)
+        for task in new_tasks:
+            self._pending_tasks[task.id] = task

-        return out_tasks
+        return new_tasks

    def _check_patch_file(self):
        """
@ -1323,7 +1334,7 @@ class PopulationHandler(handlers.ModelHandler):

        if task.result_valid:
            if self._pop.is_converged() and not self._converged:
-                logger.info("population converged")
+                logger.warning("population converged")
                self._converged = True

            if task.time > self._model_time:
--- a/pmsco/pmsco.py
+++ b/pmsco/pmsco.py
@ -46,8 +46,6 @@ import sys

 from mpi4py import MPI

-import pmsco.calculators.calculator as calculator
-import pmsco.cluster as cluster
 import pmsco.dispatch as dispatch
 import pmsco.files as files
 import pmsco.handlers as handlers
@ -154,12 +152,6 @@ def set_common_args(project, args):
    if args.table_file:
        project.optimizer_params['table_file'] = args.table_file

-    code = args.code.lower()
-    if code in {'edac', 'msc', 'test'}:
-        project.code = code
-    else:
-        logger.error("invalid code argument")
-
    if args.time_limit:
        project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit))

@ -178,38 +170,6 @@ def set_common_args(project, args):
        project.keep_best = args.keep_best


-def log_project_args(project):
-    """
-    send some common project arguments to the log.
-
-    @param project: project instance (sub-class of pmsco.project.Project).
-    @return: None
-    """
-    try:
-        logger.info("scattering code: {0}".format(project.code))
-        logger.info("optimization mode: {0}".format(project.mode))
-        try:
-            logger.info("minimum population size: {0}".format(project.optimizer_params['pop_size']))
-        except KeyError:
-            pass
-        try:
-            logger.info("seed file: {0}".format(project.optimizer_params['seed_file']))
-            logger.info("seed limit: {0}".format(project.optimizer_params['seed_limit']))
-        except KeyError:
-            pass
-        try:
-            logger.info("table file: {0}".format(project.optimizer_params['table_file']))
-        except KeyError:
-            pass
-        logger.info("data directory: {0}".format(project.data_dir))
-        logger.info("output file: {0}".format(project.output_file))
-
-        _files_to_keep = files.FILE_CATEGORIES - project.files.categories_to_delete
-        logger.info("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
-    except AttributeError:
-        logger.warning("AttributeError in log_project_args")
-
-
 def run_project(project):
    """
    run a calculation project.
@ -217,7 +177,11 @@ def run_project(project):
    @param project:
    @return:
    """
-    log_project_args(project)
+    # log project arguments only in rank 0
+    mpi_comm = MPI.COMM_WORLD
+    mpi_rank = mpi_comm.Get_rank()
+    if mpi_rank == 0:
+        project.log_project_args()

    optimizer_class = None
    if project.mode == 'single':
@ -240,26 +204,7 @@ def run_project(project):

    project.handler_classes['region'] = handlers.choose_region_handler_class(project)

-    calculator_class = None
-    if project.code == 'edac':
-        logger.debug("importing EDAC interface")
-        from pmsco.calculators import edac
-        project.cluster_format = cluster.FMT_EDAC
-        calculator_class = edac.EdacCalculator
-    elif project.code == 'msc':
-        logger.debug("importing MSC interface")
-        from pmsco.calculators import msc
-        project.cluster_format = cluster.FMT_MSC
-        calculator_class = msc.MscCalculator
-    elif project.code == 'test':
-        logger.debug("importing TEST interface")
-        project.cluster_format = cluster.FMT_EDAC
-        calculator_class = calculator.TestCalculator
-    else:
-        logger.error("invalid code argument")
-    project.calculator_class = calculator_class
-
-    if project and optimizer_class and calculator_class:
+    if project and optimizer_class:
        logger.info("starting calculations")
        try:
            dispatch.run_calculations(project)
@ -286,7 +231,7 @@ class Args(object):
    values as the command line parser.
    """

-    def __init__(self, mode="single", code="edac", output_file="pmsco_data"):
+    def __init__(self, mode="single", output_file="pmsco_data"):
        """
        constructor.
        
@ -299,8 +244,7 @@ class Args(object):
        self.pop_size = 0
        self.seed_file = ""
        self.seed_limit = 0
-        self.code = code
-        self.data_dir = os.getcwd()
+        self.data_dir = ""
        self.output_file = output_file
        self.time_limit = 24.0
        self.keep_files = files.FILE_CATEGORIES_TO_KEEP
@ -319,6 +263,7 @@ def get_cli_parser(default_args=None):
    KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}

    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="""
        multiple-scattering calculations and optimization

@ -349,7 +294,7 @@ def get_cli_parser(default_args=None):
                        help='calculation mode')
    parser.add_argument('--pop-size', type=int, default=default_args.pop_size,
                        help='population size (number of particles) in swarm or genetic optimization mode. ' +
-                        'default is the greater of 4 or two times the number of calculation processes.')
+                        'default is the greater of 4 or the number of calculation processes.')
    parser.add_argument('--seed-file',
                        help='path and name of population seed file. ' +
                        'population data of previous optimizations can be used to seed a new optimization. ' +
@ -357,14 +302,11 @@ def get_cli_parser(default_args=None):
    parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit,
                        help='maximum number of models to use from the seed file. ' +
                        'the models with the best R-factors are selected.')
-    parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default=default_args.code,
-                        help='scattering code (default: edac)')
    parser.add_argument('-d', '--data-dir', default=default_args.data_dir,
                        help='directory path for experimental data files (if required by project). ' +
                             'default: working directory')
    parser.add_argument('-o', '--output-file', default=default_args.output_file,
-                        help='base path for intermediate and output files.' +
-                             'default: pmsco_data')
+                        help='base path for intermediate and output files.')
    parser.add_argument('--table-file',
                        help='path and name of population table file for table optimization mode. ' +
                        'the file must have the same structure as the .pop or .dat files.')
@ -375,21 +317,18 @@ def get_cli_parser(default_args=None):
                             'of a limited number of best models are kept.')
    parser.add_argument('--keep-best', type=int, default=default_args.keep_best,
                        help='number of best models for which to keep result files '
-                             '(at each node from root down to keep-levels). '
-                             'default 10 (project can define higher default).')
+                             '(at each node from root down to keep-levels).')
    parser.add_argument('--keep-levels', type=int, choices=range(5),
                        default=default_args.keep_levels,
                        help='task level down to which result files of best models are kept. '
-                             '0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region. '
-                             'default 1 (project can define higher default).')
+                             '0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region.')
    parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit,
-                        help='wall time limit in hours. the optimizers try to finish before the limit. default: 24.')
+                        help='wall time limit in hours. the optimizers try to finish before the limit.')
    parser.add_argument('--log-file', default=default_args.log_file,
                        help='name of the main log file. ' +
-                             'under MPI, the rank of the process is inserted before the extension. ' +
-                             'defaults: output file + log, or pmsco.log.')
+                             'under MPI, the rank of the process is inserted before the extension.')
    parser.add_argument('--log-level', default=default_args.log_level,
-                        help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL. default: WARNING.')
+                        help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.')
    feature_parser = parser.add_mutually_exclusive_group(required=False)
    feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
                        help="enable logging. by default, logging is on.")
--- a/pmsco/project.py
+++ b/pmsco/project.py
@ -33,12 +33,15 @@ from __future__ import print_function
 import collections
 import copy
 import datetime
+import git
 import logging
 import numpy as np
 import os.path
 import socket
 import sys

+from pmsco.calculators.calculator import InternalAtomicCalculator
+from pmsco.calculators.edac import EdacCalculator
 import pmsco.cluster as mc
 from pmsco.compat import open
 import pmsco.data as md
@ -177,32 +180,89 @@ class Params(object):
    # @arg emission angle window (EDAC)
    # @arg angular_broadening (MSC)

+    ## @var binding_energy (float)
+    # initial state binding energy with respect to the Fermi level in eV
+    #
+
+    ## @var initial_state (str)
+    # initial state
+    #
+    # 1s, 2p, 2p1/2, etc.
+    #
+
    ## @var phase_files (dict)
-    # dictionary of phase files.
+    # dictionary of phase or scattering matrix element files.
    #
    # the keys are atomic numbers, the values file names.
-    # if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
+    # whether the files contain phase shifts or matrix elements depends on the calculator.
+    # EDAC determines the kind of information from the first line in the file.
+    #
+    # if the dictionary is empty or the files don't exist,
+    # the scattering matrix is computed by the calculator (if supported).
    #
    # maps to:
    # @arg scatterer (EDAC)
    # @arg atomic_number, phase_file (MSC)

+    ## @var phase_output_classes (int or iterable of int)
+    # atom classes for which to output phase files
+    #
+    # if the atomic scattering factors are calculated internally,
+    # EDAC can export them to scattering files.
+    #
+    # this parameter can be one of
+    # @arg None (default) no phase output,
+    # @arg integer number defining a range 0:N-1 of atom classes,
+    # @arg iterable (e.g., set or sequence) of atom classes to export.
+    #
+    # the problem is that EDAC expects the user to list each atom class to export,
+    # though it is not possible to know how many classes there will be
+    # or which atoms belong to which class before the calculation is actually done.
+    # the number of classes will be between the number of different elements and the number of atoms.
+    #
+    # thus, this parameter should normally be left at its default value
+    # and used only in specific situations that can be processed manually.
+    # if the parameter is non-default, EDAC will also produce a cluster output
+    # that includes a mapping between atomic coordinates and atom classes.
+    #
+    # @note the files generated belong to the category "output".
+    # you need to specify `--keep-files output` to prevent them from getting cleaned up.
+
+    ## @var polarization (str)
+    # photon polarization
+    #
+    # 'H', 'V', 'L', 'R', 'U'
+    #
+
+    ## @var rme_files (dict)
+    # dictionary of radial matrix element files.
+    #
+    # if the dictionary is empty or the files don't exist,
+    # the radial matrix defaults to the rme_xxx_xxx attributes.
+    #
+    # in EDAC, RME files or constants are considered only if @ref phase_files are specified.
+    #
+
+    ## @var work function (float)
+    # work function in eV
+    #
+    # the energy scale of EDAC is referenced to the vacuum level
+    # but data files are referenced to the Fermi level.
+    # the @ref pmsco.calculators.edac module adds the work function to the kinetic energy before it calls EDAC.
+    #
+
    def __init__(self):
        self.title = "default parameters"
        self.comment = "set by project.Params()"
        self.cluster_file = ""
        self.output_file = ""
        self.scan_file = ""
-        # EDAC convention: 1s, 2p, 2p1/2, etc.
        self.initial_state = "1s"
-        # MSC convention: H, V, L, R, U
+        self.binding_energy = 0.0
        self.polarization = "H"
        self.angular_resolution = 1.0
        self.z_surface = 0.0
        self.inner_potential = 10.0
-        # the energy scale of EDAC is referenced to the vacuum level
-        # but data files are referenced to the Fermi level
-        # the msc_edac module adds the work function to the kinetic energy before it calls EDAC
        self.work_function = 0.0
        self.symmetry_range = 360.0
        self.polar_incidence_angle = 60.0
@ -211,6 +271,11 @@ class Params(object):
        self.debye_temperature = 400.0
        self.debye_wavevector = 1.0
        self.phase_files = {}
+        self.rme_files = {}
+        self.rme_minus_value = 0.1
+        self.rme_minus_shift = 0.0
+        self.rme_plus_value = 1.0
+        self.rme_plus_shift = 0.0
        # used by MSC only
        self.spherical_order = 2
        self.scattering_level = 5
@ -221,15 +286,23 @@ class Params(object):
        self.planewave_attenuation = 1.0
        self.vibration_model = "N"
        self.substrate_atomic_mass = 1.0
-        self.rme_minus_value = 0.5
-        self.rme_minus_shift = 0.0
-        self.rme_plus_value = 0.5
-        self.rme_plus_shift = 0.0
        # used by EDAC only
        self.emitters = [(0.0, 0.0, 0.0, 0)]
        self.lmax = 15
        self.dmax = 5.0
        self.orders = [20]
+        self.phase_output_classes = None
+
+    @property
+    def l_init(self):
+        """
+        initial state l quantum number.
+
+        this is converted from the initial_state property.
+
+        @return: (int) 0..3
+        """
+        return "spdf".index(self.initial_state[1])


 class Scan(object):
@ -301,11 +374,45 @@ class Scan(object):
        self.mode = []
        self.emitter = ""
        self.initial_state = "1s"
-        self.energies = np.zeros((0))
-        self.thetas = np.zeros((0))
-        self.phis = np.zeros((0))
-        self.alphas = np.zeros((0))
-        
+        self.positions = {
+            'e': np.empty(0),
+            't': np.empty(0),
+            'p': np.empty(0),
+            'a': np.empty(0),
+        }
+
+    @property
+    def energies(self):
+        return self.positions['e']
+
+    @energies.setter
+    def energies(self, value):
+        self.positions['e'] = value
+
+    @property
+    def thetas(self):
+        return self.positions['t']
+
+    @thetas.setter
+    def thetas(self, value):
+        self.positions['t'] = value
+
+    @property
+    def phis(self):
+        return self.positions['p']
+
+    @phis.setter
+    def phis(self, value):
+        self.positions['p'] = value
+
+    @property
+    def alphas(self):
+        return self.positions['a']
+
+    @alphas.setter
+    def alphas(self, value):
+        self.positions['a'] = value
+
    def copy(self):
        """
        create a copy of the scan.
@ -314,9 +421,9 @@ class Scan(object):
        """
        return copy.deepcopy(self)

-    def set_scan(self, filename, emitter, initial_state):
+    def import_scan_file(self, filename, emitter, initial_state):
        """
-        set file name of reference experiment and load it.
+        import the reference experiment.

        the extension must be one of msc_data.DATATYPES (case insensitive)
        corresponding to the meaning of the columns in the file.
@ -324,9 +431,8 @@ class Scan(object):
        this method does not calculate the modulation function.

        @attention EDAC can only calculate equidistant, rectangular scans.
-        this version introduces holo scans as an experimental feature.
-        for all other scan types, the scan file must exactly conform with a rectangular scan.
-        the following scans are currently supported:
+        holo scans are transparently mapped to rectangular scans by pmsco.
+        this method accepts the following scans:

        * intensity vs energy at fixed theta, phi
        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
@ -347,43 +453,120 @@ class Scan(object):
        if self.filename:
            self.raw_data = md.load_data(self.filename)
            self.dtype = self.raw_data.dtype
-            self.mode, positions = md.detect_scan_mode(self.raw_data)
+            self.mode, self.positions = md.detect_scan_mode(self.raw_data)

-            if 'e' in self.mode:
-                self.energies = positions['e']
-            else:
+            if 'e' not in self.mode:
                try:
                    self.energies = np.asarray((self.raw_data['e'][0], ))
                except ValueError:
                    logger.error("missing energy in scan file %s", self.filename)
                    raise

-            if 't' in self.mode:
-                self.thetas = positions['t']
-            else:
+            if 't' not in self.mode:
                try:
                    self.thetas = np.asarray((self.raw_data['t'][0], ))
                except ValueError:
                    logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
-                    self.thetas = np.zeros((1))
+                    self.thetas = np.zeros(1)

-            if 'p' in self.mode:
-                self.phis = positions['p']
-            else:
+            if 'p' not in self.mode:
                try:
                    self.phis = np.asarray((self.raw_data['p'][0], ))
                except ValueError:
                    logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
-                    self.phis = np.zeros((1))
+                    self.phis = np.zeros(1)

-            if 'a' in self.mode:
-                self.alphas = positions['a']
-            else:
+            if 'a' not in self.mode:
                try:
                    self.alphas = np.asarray((self.raw_data['a'][0], ))
                except ValueError:
                    logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
-                    self.alphas = np.zeros((1))
+                    self.alphas = np.zeros(1)
+
+    def define_scan(self, positions, emitter, initial_state):
+        """
+        define a cartesian (rectangular/grid) scan.
+
+        this method initializes the scan with a one- or two-dimensional cartesian scan
+        of the four possible scan dimensions.
+        the scan range is given as arguments, the intensity values are initialized as 1.
+        the file name and modulation functions are reset to empty and None, respectively.
+
+        the method can create the following scan schemes:
+
+        * intensity vs energy at fixed theta, phi
+        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
+        * intensity vs theta, phi, or alpha
+        * intensity vs theta and phi (rectangular holo scan)
+
+        @param positions: (dictionary of numpy arrays)
+            the dictionary must contain a one-dimensional array for each scan dimension 'e', 't', 'p' and 'a'.
+            these array must contain unique, equidistant positions.
+            constant dimensions must contain exactly one value.
+            missing angle dimensions default to 0,
+            a missing energy dimension results in a KeyError.
+
+        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
+
+        @param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
+
+        """
+        self.filename = ""
+        self.emitter = emitter
+        self.initial_state = initial_state
+        self.mode = []
+        shape = 1
+
+        try:
+            self.energies = np.copy(positions['e'])
+        except KeyError:
+            logger.error("missing energy in define_scan arguments")
+            raise
+        else:
+            if self.energies.shape[0] > 1:
+                self.mode.append('e')
+                shape *= self.energies.shape[0]
+
+        try:
+            self.thetas = np.copy(positions['t'])
+        except KeyError:
+            logger.info("missing theta in define_scan arguments, defaulting to 0.0")
+            self.thetas = np.zeros(1)
+        else:
+            if self.thetas.shape[0] > 1:
+                self.mode.append('t')
+                shape *= self.thetas.shape[0]
+
+        try:
+            self.phis = np.copy(positions['p'])
+        except KeyError:
+            logger.info("missing phi in define_scan arguments, defaulting to 0.0")
+            self.phis = np.zeros(1)
+        else:
+            if self.phis.shape[0] > 1:
+                self.mode.append('p')
+                shape *= self.phis.shape[0]
+
+        try:
+            self.alphas = np.copy(positions['a'])
+        except KeyError:
+            logger.info("missing alpha in define_scan arguments, defaulting to 0.0")
+            self.alphas = np.zeros(1)
+        else:
+            if self.alphas.shape[0] > 1:
+                self.mode.append('a')
+                shape *= self.alphas.shape[0]
+
+        assert 0 < len(self.mode) <= 2, "unacceptable number of dimensions in define_scan"
+        assert not ('t' in self.mode and 'a' in self.mode), "unacceptable combination of dimensions in define_scan"
+
+        self.dtype = md.DTYPE_ETPAI
+        self.raw_data = np.zeros(shape, self.dtype)
+        dimensions = [self.positions[dim] for dim in ['e', 't', 'p', 'a']]
+        grid = np.meshgrid(*dimensions)
+        for i, dim in enumerate(['e', 't', 'p', 'a']):
+            self.raw_data[dim] = grid[i].reshape(-1)
+        self.raw_data['i'] = 1


 # noinspection PyMethodMayBeStatic
@ -465,9 +648,8 @@ class Project(object):
    #
    # @arg @c 'pop_size' (int)
    #   population size (number of particles) in the swarm or genetic optimization mode.
-    #   by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
+    #   by default, the population size is set to the number of parallel processes or 4, whichever is greater.
    #   you may want to override the default value in cases where the automatic choice is not appropriate.
-    #   the value can be set by the command line.
    # @arg @c 'seed_file' (string)
    #   name of a file containing the results from previous optimization runs.
    #   this can be used to resume a swarm or genetic optimization where it was interrupted before.
@ -537,9 +719,27 @@ class Project(object):
    # @arg 3 = emitter level: emitter nodes in addition to level 1.
    # @arg 4 = region level: region nodes in addition to level 1.

+    ## @var atomic_scattering_factory
+    # factory function to create an atomic scattering calculator
+    #
+    # this can also be the name of a class.
+    # the calculator must inherit from pmsco.calculators.calculator.AtomicCalculator.
+    # the name of atomic scattering calculator classes should end in AtomicCalculator.
+
+    ## @var multiple_scattering_factory
+    # factory function to create a multiple scattering calculator
+    #
+    # this can also be the name of a class.
+    # the calculator must inherit from pmsco.calculators.calculator.Calculator
+    #
+    # example: pmsco.calculators.edac.EdacCalculator
+    #
+
    def __init__(self):
        self.mode = "single"
-        self.code = "edac"
+        self.job_name = ""
+        self.git_hash = ""
+        self.description = ""
        self.features = {}
        self.cluster_format = mc.FMT_EDAC
        self.cluster_generator = mc.LegacyClusterGenerator(self)
@ -568,7 +768,8 @@ class Project(object):
            'emit': handlers.EmitterHandler,
            'region': handlers.SingleRegionHandler
        }
-        self.calculator_class = None
+        self.atomic_scattering_factory = InternalAtomicCalculator
+        self.multiple_scattering_factory = EdacCalculator
        self._tasks_fields = []
        self._db = database.ResultsDatabase()

@ -608,7 +809,7 @@ class Project(object):
        self.combined_scan = None
        self.combined_modf = None

-    def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
+    def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None, positions=None):
        """
        add the file name of reference experiment and load it.
        
@ -627,6 +828,15 @@ class Project(object):
        it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.

        @param filename: (string) file name of the experimental data, possibly including a path.
+            the file is not loaded when the optional positions argument is present,
+            but the filename may serve as basename for output files (e.g. modulation function).
+
+        @param positions: (optional, dictionary of numpy arrays) scan positions.
+            if specified, the file given by filename is _not_ loaded,
+            and the scan positions are initialized from this dictionary.
+            the dictionary keys are the possible scan dimensions: 'e', 't', 'p', 'a'.
+            the arrays are one-dimensional and contain unique, equidistant positions.
+            constant dimensions have shape 1. see @ref Scan.define_scan.

        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".

@ -638,11 +848,13 @@ class Project(object):
        @param modf_model: (dict) model parameters to be passed to the modulation function.

        @return (Scan) the new scan object (which is also a member of self.scans).
-
-        @todo the accepted scanning schemes should be generalized.
        """
        scan = Scan()
-        scan.set_scan(filename, emitter, initial_state)
+        if positions is not None:
+            scan.define_scan(positions, emitter, initial_state)
+            scan.filename = filename
+        else:
+            scan.import_scan_file(filename, emitter, initial_state)
        self.scans.append(scan)

        if modf_model is None:
@ -735,6 +947,41 @@ class Project(object):
        """
        self.timedelta_limit = timedelta

+    def log_project_args(self):
+        """
+        send some common project attributes to the log.
+
+        the attributes are normally logged at WARNING level.
+
+        this method is called by the main pmsco module after creating the project and assigning command line arguments.
+        it may be overridden to add logs of attributes of the sub-class.
+
+        @return: None
+        """
+        try:
+            logger.warning("atomic scattering: {0}".format(self.atomic_scattering_factory))
+            logger.warning("multiple scattering: {0}".format(self.multiple_scattering_factory))
+            logger.warning("optimization mode: {0}".format(self.mode))
+
+            for key in sorted(self.optimizer_params):
+                val = self.optimizer_params[key]
+                lev = logging.WARNING if val else logging.DEBUG
+                logger.log(lev, "optimizer_params['{k}']: {v}".format(k=key, v=val))
+
+            logger.warning("data directory: {0}".format(self.data_dir))
+            logger.warning("output file: {0}".format(self.output_file))
+
+            _files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
+            logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
+
+            for idx, scan in enumerate(self.scans):
+                logger.warning(BMsg("scan {0}: {filename} ({emitter} {initial_state})", idx, **vars(scan)))
+            for idx, sym in enumerate(self.symmetries):
+                logger.warning(BMsg("symmetry {0}: {sym}", idx, sym=sym))
+
+        except AttributeError:
+            logger.warning("AttributeError in log_project_args")
+
    def combine_symmetries(self, parent_task, child_tasks):
        """
        combine results of different symmetry into one result and calculate the modulation function.
@ -937,6 +1184,23 @@ class Project(object):
        else:
            md.save_data(parent_task.modf_filename, modf)

+    def get_git_hash(self):
+        """
+        get the git commit (hash) of the running code (HEAD)
+
+        the method looks for a git repository in the source tree of this module.
+        if successful, it returns the hash string of the HEAD commit.
+
+        @return: hexadecimal hash string.
+            empty string if the file is not in a git repository.
+        """
+        try:
+            repo = git.Repo(__file__, search_parent_directories=True)
+        except git.exc.InvalidGitRepositoryError:
+            return ""
+        else:
+            return repo.head.commit.hexsha
+
    def setup(self, handlers):
        """
        prepare for calculations.
@ -954,11 +1218,13 @@ class Project(object):

        @return: None
        """
+        self.git_hash = self.get_git_hash()
        fields = ["rfac"]
        fields.extend(dispatch.CalcID._fields)
+        fields.append("secs")
        fields = ["_" + f for f in fields]
        dom = self.create_domain()
-        model_fields = dom.start.keys()
+        model_fields = list(dom.start.keys())
        model_fields.sort(key=lambda name: name.lower())
        fields.extend(model_fields)
        self._tasks_fields = fields
@ -968,16 +1234,16 @@ class Project(object):
            outfile.write(" ".join(fields))
            outfile.write("\n")

-        # todo : fill in the descriptive fields, change to file-database
+        # todo : change to file-database
        self._db.connect(":memory:")
        project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
        job_id = self._db.register_job(project_id,
-                                       "job-name",
+                                       self.job_name,
                                       self.mode,
                                       socket.gethostname(),
-                                       "git-hash",
+                                       self.git_hash,
                                       datetime.datetime.now(),
-                                       "description")
+                                       self.description)
        self._db.register_params(model_fields)
        self._db.create_models_view()

@ -1012,6 +1278,7 @@ class Project(object):
                values_dict = {"_" + k: v for k, v in values_dict.items()}
                values_dict.update(parent_task.model)
                values_dict['_rfac'] = parent_task.rfac
+                values_dict['_secs'] = parent_task.time.total_seconds()
                values_list = [values_dict[field] for field in self._tasks_fields]
                with open(self.output_file + ".tasks.dat", "a") as outfile:
                    outfile.write(" ".join(format(value) for value in values_list) + "\n")
@ -1258,6 +1525,76 @@ class Project(object):

        return _files

+    def before_atomic_scattering(self, task, par, clu):
+        """
+        project hook before atomic scattering factors are calculated.
+
+        this method derives modified Params and Cluster objects for the atomic scattering calculation
+        from the original objects that will be used in the multiple scattering calculation.
+
+        in the basic version, the method does not change the objects
+        except that it returns None for the root task (reference cluster).
+        subclasses may override it to modify or replace the cluster.
+
+        @param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
+            if the model index is -1, the project can return the global reference cluster
+            (to calculate the fixed scattering factors that will be used for all models)
+            or None if no global scattering factors should be calculated.
+            do not modify this object!
+
+        @param par: @ref pmsco.project.Params object representing the preliminary
+            multiple scattering input parameters of the current task.
+            the method can make modifications to this object instance directly.
+
+        @param clu: @ref pmsco.cluster.Cluster object representing the preliminary
+            multiple scattering cluster of the current task.
+            the method can make modifications to this object instance directly.
+
+        @return: a tuple (par, clu) where par and clu are the input parameters and cluster
+            to be used for the calculation of atomic scattering factors.
+            these should either be the original function arguments,
+            or copies of the original arguments.
+            if atomic scattering factors should not be calculated, the return values should be None.
+        """
+        if task.id.model >= 0:
+            return par, clu
+        else:
+            return None, None
+
+    def after_atomic_scattering(self, task, par, clu):
+        """
+        project hook after atomic scattering factors are calculated.
+
+        this method cleans up the Params and Cluster objects from the atomic scattering calculation
+        so that they can be used in the multiple scattering calculation.
+
+        in the basic version, the method just passes the input parameters for model tasks
+        and returns None for the root task.
+        subclasses may override it and modify the cluster and/or input parameters
+        so that the desired atomic scattering factors are used.
+
+        @param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
+            if the model index is -1, the project should return the global reference cluster
+            (to calculate the fixed scattering factors that will be used for all models)
+            or None if no global scattering factors should be calculated.
+
+        @param par: @ref pmsco.project.Params object representing the preliminary
+            multiple scattering input parameters of the current task.
+
+        @param clu: @ref pmsco.cluster.Cluster object representing the preliminary
+            multiple scattering cluster of the current task.
+            do not modify this object, make a copy!
+
+        @return: a tuple (par, clu) where par and clu are the input parameters and cluster
+            to be used for the calculation of atomic scattering factors.
+            these should either be the original function arguments,
+            or copies of the original arguments.
+        """
+        if task.id.model >= 0:
+            return par, clu
+        else:
+            return None, None
+
    def cleanup(self):
        """
        delete unwanted files at the end of a project.