pmsco-public/pmsco/project.py

"""
@package pmsco.project
project-independent classes which store and handle model parameters.

the most important class defined here is Project.
each calculation project needs to derive its own project class from it.
the Domain and Params classes are typically used unchanged.

@note nomenclature: the term @e parameters has several meanings in the code and documentation.
    the following distinctive terms are used in updated documentation sections.
    ambiguous terms may still be present in older code sections.
@arg <em>calculation parameters</em> set of specific parameters passed as input to the calculation programs.
    the amount and meaning of these parameters depend on the calculation code used.
    typically, many of these parameters remain fixed, or change very rarely in the course of the study.
@arg <em>model parameters</em> concise set of independent physical parameters
    that define the system in one calculation instance.
    these parameters are varied systematically by the optimization process.
    they are mapped to calculation parameters and a cluster by code derived from the Project class.

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

@copyright (c) 2015 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
"""

from __future__ import division
import copy
import datetime
import logging
import numpy as np
import collections
import data as md
import cluster as mc
import files
import handlers

logger = logging.getLogger(__name__)

ParamDomain = collections.namedtuple('ParamDomain', ['start', 'min', 'max', 'step'])


class Domain(object):
    """
    Domain of model parameters.

    Each member contains a dictionary of model parameter names and their values.
    Parameter names can be defined almost freely by the project,
    except that they should contain only alphanumeric and underscore characters.
    furthermore, names starting with an underscore are reserved for the optimizers.
    """

    ## @var start (dict)
    # dictionary of start values for each model parameter.
    #
    # the start value can be the initial guess for an optimization run,
    # or the actual value for a single calculation.
    #
    # there must be one item for each model parameter,
    # where the key is the name of the parameter, and the value its physical value.

    ## @var min (dict)
    # dictionary of minimum values for each model parameter.
    #
    # the minimum defines the lower bound of the allowed interval for a model parameter.
    #
    # there must be one item for each model parameter,
    # where the key is the name of the parameter, and the value its physical value.

    ## @var max (dict)
    # dictionary of maximum values for each model parameter.
    #
    # the maximum defines the upper bound of the allowed interval for a model parameter.
    #
    # there must be one item for each model parameter,
    # where the key is the name of the parameter, and the value its physical value.

    ## @var step (dict)
    # dictionary of step sizes for each model parameter.
    #
    # depending on the optimization mode, the step is a guess of how fast values should vary,
    # e.g. step size, gradient, velocity, ...
    #
    # there must be one item for each model parameter,
    # where the key is the name of the parameter, and the value its physical value.

    def __init__(self):
        """
        initialize the domain object with empty dictionaries.
        """
        self.start = {}
        self.min = {}
        self.max = {}
        self.step = {}

    def add_param(self, name, start, min, max, step):
        """
        set the domain of one parameter with all necessary values at once.

        the exact meaning of the arguments depends on the calculation mode.

        @param name (string) name of the parameter (alphanumeric and underscore characters only).
            it is recommended to use short but distinctive names.

        @param start (float) start value.

        @param min (float) lower bound of the parameter interval.

        @param max (float) upper bound of the parameter interval.

        @param step (float) step size.
        """
        self.start[name] = start
        self.min[name] = min
        self.max[name] = max
        self.step[name] = step

    def get_param(self, name):
        """
        get all values of a model parameter in a tuple.

        @param name (string) name of the parameter.

        @return named tuple ParamDomain(start, min, max, step) of the parameter.

        @raise IndexError if the parameter is not defined.
        """
        return ParamDomain(self.start[name], self.min[name], self.max[name], self.step[name])


class Params(object):
    """
    calculation parameters for a single scattering calculation job.

    this class holds all the calculation parameters that are passed via input file to the calculation program.

    the class can hold parameters for both the MSC and EDAC codes.
    some parameters are used by both codes, others are used just by one of them.
    newer features such as multiple emitters, multiple symmetries, and others are supported in EDAC mode only.
    MSC mode is currently not maintained.

    objects of this class are created by the implementation of the create_params() method
    of the actual project class.
    """
    def __init__(self):
        self.title = "MSC default parameters"
        self.comment = "from msc_project.Params()"
        self.cluster_file = ""
        self.output_file = ""
        self.scan_file = ""
        # EDAC convention: 1s, 2p, 2p1/2, etc.
        self.initial_state = "1s"
        # MSC convention: H, V, L, R, U
        self.polarization = "H"
        self.angular_broadening = 0.0
        self.z_surface = 0.0
        self.inner_potential = 10.0
        # the energy scale of EDAC is referenced to the vacuum level
        # but data files are referenced to the Fermi level
        # the msc_edac module adds the work function to the kinetic energy before it calls EDAC
        self.work_function = 0.0
        self.symmetry_range = 360.0
        self.polar_incidence_angle = 60.0
        self.azimuthal_incidence_angle = 0.0
        self.experiment_temperature = 300.0
        self.debye_temperature = 400.0
        self.debye_wavevector = 1.0
        # used by MSC only
        self.spherical_order = 2
        self.scattering_level = 5
        self.fcut = 15.0
        self.cut = 15.0
        self.lattice_constant = 1.0
        self.atom_types = 0
        self.atomic_number = [1, 2, 3, 4]
        self.phase_file = ["1.pha", "2.pha", "3.pha", "4.pha"]
        self.msq_displacement = [0.1, 0.1, 0.1, 0.1]
        self.planewave_attenuation = 1.0
        self.vibration_model = "N"
        self.substrate_atomic_mass = 1.0
        self.rme_minus_value = 0.5
        self.rme_minus_shift = 0.0
        self.rme_plus_value = 0.5
        self.rme_plus_shift = 0.0
        # used by EDAC only
        self.emitters = [(0.0, 0.0, 0.0, 0)]
        self.lmax = 15
        self.dmax = 5.0
        self.orders = [20]


class Scan(object):
    """
    class to describe the scanning scheme or store the experimental data set.
    """

    ## @var filename (string)
    # file name from which a scan was loaded

    ## @var raw_data (numpy.ndarray)
    # original scan data (ETPAIS array)

    ## @var dtype (dict)
    # data type of self.raw_data.
    #
    # one of the data.DTYPE_Xxxx constants.

    ## @var modulation (numpy.ndarray)
    # modulation function calculated from original scan (ETPAIS array)

    ## @var mode (list of characters)
    # list of ETPAI column names which are scanned in self.raw_data.
    #
    # example: ['t','p']

    ## @var emitter (string)
    # chemical symbol of emitter atom
    #
    # example: 'Cu'

    ## @var initial_state (string)
    # nl term of initial state
    #
    # in the form expected by EDAC, for example: '1s'

    ## @var energies (numpy.ndarray)
    # kinetic energy referenced to Fermi level.
    #
    # one-dimensional array.

    ## @var thetas (numpy.ndarray)
    # polar angle referenced to normal emission
    #
    # one-dimensional array.
    #
    # note: in the case of a hemispherical scan, the values in this array will not be unique.

    ## @var phis (numpy.ndarray)
    # azimuthal angle referenced to arbitrary origin
    #
    # one-dimensional array.
    #
    # note: in the case of a hemispherical scan, the values in this array will not be unique, and not monotonic.

    ## @var alphas (numpy.ndarray)
    # polar angle referenced to normal emission
    #
    # one-dimensional array.

    def __init__(self):
        self.filename = ""
        self.raw_data = None
        self.dtype = None
        self.modulation = None
        self.mode = []
        self.emitter = ""
        self.initial_state = "1s"
        self.energies = np.zeros((0))
        self.thetas = np.zeros((0))
        self.phis = np.zeros((0))
        self.alphas = np.zeros((0))

    def copy(self):
        """
        create a copy of the scan.

        @return: new independent scan object with the same attributes as the original one.
        """
        return copy.deepcopy(self)

    def set_scan(self, filename, emitter, initial_state):
        """
        set file name of reference experiment and load it.

        the extension must be one of msc_data.DATATYPES (case insensitive)
        corresponding to the meaning of the columns in the file.

        this method does not calculate the modulation function.

        @attention EDAC can only calculate equidistant, rectangular scans.
        this version introduces holo scans as an experimental feature.
        for all other scan types, the scan file must exactly conform with a rectangular scan.
        the following scans are currently supported:

        * intensity vs energy at fixed theta, phi
        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
        * intensity vs theta, phi, or alpha
        * holo scan (theta,phi)

        @param filename: (string) file name of the experimental data, possibly including a path.

        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".

        @param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".

        """
        self.filename = filename
        self.emitter = emitter
        self.initial_state = initial_state

        if self.filename:
            self.raw_data = md.load_data(self.filename)
            self.dtype = self.raw_data.dtype
            self.mode, positions = md.detect_scan_mode(self.raw_data)

            if 'e' in self.mode:
                self.energies = positions['e']
            else:
                try:
                    self.energies = np.asarray((self.raw_data['e'][0], ))
                except ValueError:
                    logger.error("missing energy in scan file %s", self.filename)
                    raise

            if 't' in self.mode:
                self.thetas = positions['t']
            else:
                try:
                    self.thetas = np.asarray((self.raw_data['t'][0], ))
                except ValueError:
                    logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
                    self.thetas = np.zeros((1))

            if 'p' in self.mode:
                self.phis = positions['p']
            else:
                try:
                    self.phis = np.asarray((self.raw_data['p'][0], ))
                except ValueError:
                    logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
                    self.phis = np.zeros((1))

            if 'a' in self.mode:
                self.alphas = positions['a']
            else:
                try:
                    self.alphas = np.asarray((self.raw_data['a'][0], ))
                except ValueError:
                    logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
                    self.alphas = np.zeros((1))


class ClusterGenerator(object):
    """
    cluster generator class.

    this class bundles the cluster methods in one place
    so that it's easier to exchange them for different kinds of clusters.

    the project must override at least the create_cluster method.
    if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
    """

    def __init__(self, project):
        """
        initialize the cluster generator.

        @param project: reference to the project object.
            cluster generators may need to look up project parameters.
        """
        self.project = project

    def count_emitters(self, model, index):
        """
        return the number of emitter configurations for a particular model.

        the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
        by default, the method returns 1, which means that there is only one emitter configuration.

        emitter configurations are mainly a way to distribute the calculations to multiple processes
        based on emitters since the resulting diffraction patterns add up incoherently.
        for this to work, the create_cluster() method must pay attention to the emitter index
        and generate either a full cluster with all emitters (single process)
        or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
        whether all emitters are calculated in one or multiple processes is decided at run-time
        based on the available resources.

        note that this function returns the number of _configurations_ not _atoms_.
        an emitter configuration (declared in a Cluster) may include more than one atom.
        it is up to the project, what is included in a particular configuration.

        to enable multiple emitter configurations, the derived project class must override this method
        and return a number greater than 1.

        @note in some cases it may be most efficient to call create_cluster and
            return Cluster.get_emitter_count() of the generated cluster.
            this is possible because the method is called with emitter index -1.
            model and index can be passed unchanged to create_cluster.

        @param model (dictionary) model parameters to be used in the calculation.

        @param index (named tuple CalcID) calculation index.
            the method should consider only the following attributes:
            @arg @c scan   scan index (index into Project.scans)
            @arg @c sym    symmetry index (index into Project.symmetries)
            @arg @c emit   emitter index is -1 if called by the emitter handler.

        @return number of emitter configurations.
            this implementation returns the default value of 1.
        """
        return 1

    def create_cluster(self, model, index):
        """
        create a Cluster object given the model parameters and calculation index.

        the generated cluster will typically depend on the model parameters.
        depending on the project, it may also depend on the scan index, symmetry index and emitter index.

        the scan index can be used to generate a different cluster for different scan geometry,
        e.g., if some atoms can be excluded due to a longer mean free path.
        if this is not the case for the specific project, the scan index can be ignored.

        the symmetry index may select a particular domain that has a different atomic arrangement.
        in this case, depending on the value of index.sym, the function must generate a cluster corresponding
        to the particular domain/symmetry.
        the method can ignore the symmetry index if the project defines only one symmetry,
        or if the symmetry does not correspond to a different atomic structure.

        the emitter index selects a particular emitter configuration.
        depending on the value of the emitter index, the method must react differently:

        1. if the value lower or equal to zero, return the full cluster and mark all inequivalent emitter atoms.
           emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
           the full diffraction scan will be calculated in one calculation.

        2. if the value is greater than zero, generate the cluster with the emitter configuration
           selected by the emitter index.
           the index is in the range between 1 and the return value of count_emitters().
           the results of the individual emitter calculations are summed up in combine_emitters().

        the code should ideally be written such that either case yields the same diffraction result.
        if count_emitters() always returns 1 (default), the second case does not have to be implemented,
        and the method can ignore the emitter index.

        the method must ignore the model and energy index.

        @param model (dictionary) model parameters to be used in the calculation.

        @param index (named tuple CalcID) calculation index.
            the method should consider only the following attributes:
            @arg @c scan   scan index (index into Project.scans)
            @arg @c sym    symmetry index (index into Project.symmetries)
            @arg @c emit   emitter index.
                            if lower or equal to zero, generate the full cluster and mark all emitters.
                            if greater than zero, the value is a 1-based index of the emitter configuration.
        """
        return None


class LegacyClusterGenerator(ClusterGenerator):
    """
    cluster generator class for projects that don't declare a generator.

    in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
    this class redirects generator calls to the project methods
    providing compatibility to older project code.
    """

    def __init__(self, project):
        super(LegacyClusterGenerator, self).__init__(project)

    def count_emitters(self, model, index):
        """
        redirect the call to the corresponding project method if implemented.
        """
        try:
            return self.project.count_emitters(model, index)
        except AttributeError:
            return 1

    def create_cluster(self, model, index):
        """
        redirect the call to the corresponding project method.
        """
        return self.project.create_cluster(model, index)


# noinspection PyMethodMayBeStatic
class Project(object):
    """
    base class of a calculation project.

    a 'calculation project' is a coded set of prescriptions
    on how to get from a set of model parameters to simulated data
    which correspond to provided experimental data.
    the results include a measure of the quality of the simulated data compared to experimental data.

    each calculation project must derive from this class.
    it must implement the create_domain(), create_cluster(), and create_params() methods.

    the other methods and attributes of this class
    are for passing command line parameters to the calculation modules.
    the attributes should be populated in the constructor of the derived class,
    or (recommended) in the create_project() function of the module.
    it is essential that the attributes are set correctly before calculation.
    """

    ## @var features (dictionary)
    #
    # calculation features and versions supported by the project.
    #
    # the dictionary contains key-value pairs where the key is the name of the feature and value is a version number.
    # this field conditionally enables new software features that may break backward compatibility.
    # derived projects should fill this field with the supported version
    # upon creation (in their __init__ method or create_project() factory).
    # version 0 (default) means that the feature is disabled.
    #
    # the following features can be enabled (list may be incomplete):
    # as of this version, no optional features are defined.
    #
    # @note rather than introducing new features and, particularly, new versions that rely on this mechanism,
    # developers of generic code should check whether backward compatibility could be achieved in a simpler way,
    # e.g. by implementing addition methods whose default behaviour is the same as of the previous version.
    # in some cases it may be better to refactor all current project code.
    #

    ## @var scans (list of Scan objects)
    #  list of experimental or scan files for which calculations are to be run.
    #
    #  the list must be populated by calling the add_scan() method.
    #  this should be done in the create_project() function, or through the command line arguments.
    #
    #  the modulation function is calculated internally.
    #  if your scan files contain the modulation function (as opposed to intensity),
    #  you must add the files in the create_project() function.
    #  the command line does not support loading modulation functions.
    #
    #  @c scans must be considered read-only. use project methods to change it.

    ## @var symmetries (list of arbitrary objects)
    #  list of symmetries for which calculations are to be run.
    #
    # it is up to the derived class what kind of objects are stored in the list.
    # the recommended kind of objects are dictionaries which hold parameter values,
    # similar to the model dictionaries.
    #
    # the list must be populated by calling the add_symmetry() method.

    ## @var cluster_generator (ClusterGenerator object)
    #  provides the cluster generator methods.
    #
    # a project must provide a cluster generator object that is derived from ClusterGenerator.
    # at least the ClusterGenerator.create_cluster method must be implemented.
    # if emitters should be run in parallel, the ClusterGenerator.count_emitters must be implemented as well.
    #
    # the initial value is a LegacyClusterGenerator object
    # which routes cluster calls back to the project for compatibility with older project code.

    ## @var pop_size (int)
    #  population size (number of particles) in the particle swarm optimization.
    #
    # by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
    # you may want to override the default value in cases where the automatic choice is not appropriate, e.g.:
    # - the calculation of a model takes a long time compared to the available computing time.
    # - the calculation of a model spawns many sub-tasks due to complex symmetry.
    # - you want to increase the number of generations compared to the number of particles.
    #
    # the default value is 0.
    #
    # the value can be set by the command line.

    ## @var history_file (string)
    # name of a file containing the results from previous optimization runs.
    # this can be used to resume a swarm optimization where it was interrupted before.
    #
    # the history file is a space-delimited, multi-column, text file.
    # output files of a previous optimization run can be used as is.
    # there must be one column for each model parameter, and one column of R factors.
    # the first row must contain the names of the model parameters.
    # the name of th R factor column must be '_rfac'.
    # additional columns may be included and are ignored.
    #
    # by default, no history is loaded.

    ## @var recalc_history (bool)
    # select whether the R-factors of the historic models are calculated again.
    #
    # this is useful if the historic data was calculated for a different cluster, different set of parameters,
    # or different experimental data, and if the R-factors of the new optimization may be systematically greater.
    # set this argument to False only if the calculation is a continuation of a previous one
    # without any changes to the code.

    ## @var data_dir
    # directory path to experimental data.
    #
    # the project should load experimental data (scan files) from this path.
    # this attribute receives the --data-dir argument from the command line
    # if the project parses the common arguments (pmsco.set_common_args).
    #
    # it is up to the project to define where to load scan files from.
    # if the location of the files may depend on the machine or user account,
    # the user may want to specify the data path on the command line.

    ## @var output_file (string)
    # file name root for data files produced during the calculation, including intermediate files.
    #

    ## @var timedelta_limit (datetime.timedelta)
    # wall time after which no new calculations should be started.
    #
    # the actual wall time may be longer by the remaining time of running calculations.
    # running calculations will not be aborted.

    ## @var _combined_scan
    # combined raw data from scans.
    # updated by add_scan().

    ## @var _combined_modf
    # combined modulation function from scans.
    # updated by add_scan().

    ## @var files
    # list of all generated data files with metadata.
    # the list is used by model handlers to decide which files can be deleted at run time to save disk space.
    #
    # files.categories_to_delete determines which files can be deleted.

    def __init__(self):
        self.mode = "single"
        self.code = "edac"
        self.features = {}
        self.cluster_format = mc.FMT_EDAC
        self.cluster_generator = LegacyClusterGenerator(self)
        self.scans = []
        self.symmetries = []
        self.pop_size = 0
        self.history_file = ""
        self.recalc_history = True
        self.data_dir = ""
        self.output_file = "pmsco_data"
        self.timedelta_limit = datetime.timedelta(days=1)
        self._combined_scan = None
        self._combined_modf = None
        self.files = files.FileTracker()
        self.handler_classes = {}
        self.handler_classes['model'] = handlers.SingleModelHandler
        self.handler_classes['scan'] = handlers.ScanHandler
        self.handler_classes['symmetry'] = handlers.SymmetryHandler
        self.handler_classes['emitter'] = handlers.EmitterHandler
        self.handler_classes['region'] = handlers.SingleRegionHandler
        self.calculator_class = None

    def create_domain(self):
        """
        create a msc_project.Domain object which defines the allowed range for model parameters.

        this method must be implemented by the actual project class.
        the Domain object must declare all model parameters used in the project.

        @return Domain object
        """
        return None

    def create_params(self, model, index):
        """
        create a Params object given the model parameters and calculation index.

        @param model (dictionary) model parameters to be used in the calculation.

        @param index (named tuple CalcID) calculation index.
            the method should consider only the following attributes:
            @arg @c scan   scan index (index into Project.scans)
            @arg @c sym    symmetry index (index into Project.symmetries)
        """
        return None

    def clear_scans(self):
        """
        clear scans.

        delete all scans in self.scans and empty the list.

        @return: None
        """
        self.scans = []
        self._combined_scan = None
        self._combined_modf = None

    def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
        """
        add the file name of reference experiment and load it.

        the extension must be one of msc_data.DATATYPES (case insensitive)
        corresponding to the meaning of the columns in the file.

        caution: EDAC can only calculate equidistant, rectangular scans.
        the following scans are currently supported:

        * intensity vs energy at fixed theta, phi
        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
        * intensity vs theta, phi, or alpha
        * intensity vs theta and phi (hemisphere or hologram scan)

        the method calculates the modulation function if @c is_modf is @c False.
        it also updates @c _combined_scan and @c _combined_modf which may be used as R-factor comparison targets.

        @param filename: (string) file name of the experimental data, possibly including a path.

        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".

        @param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".

        @param is_modf: (bool) declares whether the file contains the modulation function (True),
            or intensity (False, default). In the latter case, the modulation function is calculated internally.

        @param modf_model: (dict) model parameters to be passed to the modulation function.

        @return (Scan) the new scan object (which is also a member of self.scans).

        @todo the accepted scanning schemes should be generalized.
        """
        scan = Scan()
        scan.set_scan(filename, emitter, initial_state)
        self.scans.append(scan)

        if modf_model is None:
            modf_model = {}

        if scan.raw_data is not None:
            if is_modf:
                scan.modulation = scan.raw_data
            else:
                try:
                    scan.modulation = self.calc_modulation(scan.raw_data, modf_model)
                except ValueError:
                    logger.error("error calculating the modulation function of experimental data.")
                    scan.modulation = None
        else:
            scan.modulation = None

        if scan.raw_data is not None:
            if self._combined_scan is not None:
                dtype = md.common_dtype((self._combined_scan, scan.raw_data))
                self._combined_scan = np.hstack((self._combined_scan, md.restructure_data(scan.raw_data, dtype)))
            else:
                self._combined_scan = scan.raw_data.copy()
        else:
            self._combined_scan = None

        if scan.modulation is not None:
            if self._combined_modf is not None:
                dtype = md.common_dtype((self._combined_modf, scan.modulation))
                self._combined_modf = np.hstack((self._combined_modf, md.restructure_data(scan.modulation, dtype)))
            else:
                self._combined_modf = scan.modulation.copy()
        else:
            self._combined_modf = None

        return scan

    def clear_symmetries(self):
        """
        clear symmetries.

        delete all symmetries in self.symmetries and empty the list.

        @return: None
        """
        self.symmetries = []

    def add_symmetry(self, symmetry):
        """
        add a symmetry to the list of symmetries.

        this class declares the list of symmetries.
        it does not define what should be in the list of symmetries.
        however, there must be an entry for each symmetry to be calculated.
        if the list is empty, no calculation will be executed.

        @attention initially, the symmetries list is empty.
            your project needs to add at least one symmetry.
            otherwise, no calculation will be executed.

        @param symmetry: it is up to the derived project class to specify and interpret the data stored here.
            it is recommended to store a dictionary with symmetry parameters similar to the model parameters.

        @return: None
        """
        self.symmetries.append(symmetry)

    def set_output(self, filename):
        """
        set base name of output file
        """
        self.output_file = filename

    def set_timedelta_limit(self, timedelta):
        """
        set the walltime limit

        timedelta (datetime.timedelta)
        """
        self.timedelta_limit = timedelta

    def combine_symmetries(self, parent_task, child_tasks):
        """
        combine results of different symmetry into one result. calculate the modulation function.

        the symmetry results are read from the file system using the indices defined by the child_tasks,
        and the combined result is written to the file system with the index defined by parent_task.

        by default, this method adds all symmetries with equal weight.

        @param parent_task: (CalculationTask) parent task of the symmetry tasks.
            the method must write the results to the files indicated
            by the @c result_filename and @c modf_filename attributes.

        @param child_tasks: (sequence of CalculationTask) tasks which identify each symmetry.
            the method must read the source data from the files
            indicated by the @c result_filename attributes.
            the sequence is sorted by task ID, i.e., essentially, by symmetry index.

        @return: None

        @raise IndexError if child_tasks is empty

        @raise KeyError if a filename is missing

        @note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters.
            the model parameters are available as the @c model attribute of the calculation tasks.
        """

        result_data = None
        for task in child_tasks:
            data = md.load_data(task.result_filename)
            if result_data is not None:
                result_data['i'] += data['i']
            else:
                result_data = data

        md.save_data(parent_task.result_filename, result_data)

        # todo : the handling of missing modulation functions may need some cleanup
        if self.scans[parent_task.id.scan].modulation is not None:
            result_modf = self.calc_modulation(result_data, parent_task.model)
            md.save_data(parent_task.modf_filename, result_modf)
        else:
            parent_task.modf_filename = ""

    def combine_emitters(self, parent_task, child_tasks):
        """
        combine results of different emitters into one result. calculate the modulation function.

        the emitter results are read from the file system using the indices defined by the child_tasks,
        and the combined result is written to the file system with the index defined by parent_task.

        by default, this method adds all emitters with equal weight.

        sub-classes may override this method and implement expansion of equivalent emitters,
        unequal weights, etc.

        @param parent_task: (CalculationTask) parent task of the emitter tasks.
            the method must write the results to the files indicated
            by the @c result_filename and @c modf_filename attributes.

        @param child_tasks: (sequence of CalculationTask) tasks which identify each emitter.
            the method must read the source data from the files
            indicated by the @c result_filename attributes.
            the sequence is sorted by task ID, i.e., essentially, by the emitter index.

        @return: None

        @raise IndexError if child_tasks is empty

        @raise KeyError if a filename is missing

        @note the weights of the emitters (in derived classes) can be part of the optimizable model parameters.
            the model parameters are available as the @c model attribute of the calculation tasks.
        """

        result_data = None
        for task in child_tasks:
            data = md.load_data(task.result_filename)
            if result_data is not None:
                result_data['i'] += data['i']
            else:
                result_data = data

        md.save_data(parent_task.result_filename, result_data)

        # todo : the handling of missing modulation functions may need some cleanup
        if self.scans[parent_task.id.scan].modulation is not None:
            result_modf = self.calc_modulation(result_data, parent_task.model)
            md.save_data(parent_task.modf_filename, result_modf)
        else:
            parent_task.modf_filename = ""

    def combine_scans(self, parent_task, child_tasks):
        """
        combine results of different scans into one result, for intensity and modulation.

        the scan results are read from the file system using the indices defined by the child_tasks,
        and the combined result is written to the file system with the index defined by parent_task.

        the datasets of the scans are appended.
        this is done for intensity and modulation data independently.

        @param parent_task: (CalculationTask) parent task of the symmetry tasks.
            the method must write the results to the files indicated
            by the @c result_filename and @c modf_filename attributes.

        @param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
            the method must read the source data from the files
            indicated by the @c result_filename attributes.
            the sequence is sorted by task ID, i.e., essentially, by scan index.

        @return: None

        @raise IndexError if child_tasks is empty.

        @raise KeyError if a filename is missing.
        """

        # intensity
        try:
            stack1 = [md.load_data(task.result_filename) for task in child_tasks]
        except (KeyError, IOError):
            parent_task.result_filename = ""
        else:
            dtype = md.common_dtype(stack1)
            stack2 = [md.restructure_data(data, dtype) for data in stack1]
            result_data = np.hstack(tuple(stack2))
            md.save_data(parent_task.result_filename, result_data)

        # modulation
        try:
            stack1 = [md.load_data(task.modf_filename) for task in child_tasks]
        except (KeyError, IOError):
            parent_task.modf_filename = ""
        else:
            dtype = md.common_dtype(stack1)
            stack2 = [md.restructure_data(data, dtype) for data in stack1]
            result_modf = np.hstack(tuple(stack2))
            md.save_data(parent_task.modf_filename, result_modf)

    # noinspection PyUnusedLocal
    def calc_modulation(self, data, model):
        """
        calculate the project-dependent modulation function.

        the modulation function of I(x) is (I(x) - S(x)) / S(x)
        where S(x) is a smooth copy of I(x).

        by default, the modulation function is calculated by data.calc_modfunc_loess().
        override this method in your project to use a different modulation function.

        @param data structured numpy.ndarray in EI, ETPI, or ETPAI format.
            can contain a one- or multi-dimensional scan.
            the scan coordinates must be on a rectangular or hemisperical grid.
            for maximum compatibility, the array should be sorted,
            though for the default calc_modfunc_loess() function this is not required.

            if data contains a hemispherical scan, the phi dimension is ignored,
            i.e. the modulation function is calcualted on a phi-average.

        @param model: (dict) model parameters of the calculation task.
            can be used to pass parameters from the project.
            this argument is a dictionary of the model parameters.

        @return copy of the data array with the modulation function in the 'i' column.
        """

        return md.calc_modfunc_loess(data)

    def calc_rfactor(self, task):
        """
        calculate the R-factor of a task.

        the method calculates the R-factor over the combined scans.
        the corresponding experimental data is taken from self._combined_modf.

        this method is called by the model handler.

        by default, the R-factor is calculated by data.rfactor() over the combined scans.
        override this method in your project to use a different R-factor algorithm.

        @param task: (CalculationTask) a model task.

        @return (int) calculated R-factor.
        """
        task_data = md.load_data(task.modf_filename)
        result_r = md.rfactor(self._combined_modf, task_data)

        return result_r

    def cleanup(self):
        """
        delete unwanted files at the end of a project.

        @return: None
        """
        self.files.delete_files()