add files for public distribution

based on internal repository 0a462b6 2017-11-22 14:41:39 +0100
2017-11-22 14:55:20 +01:00
parent 96d206fc7b
commit bbd16d0f94
102 changed files with 230209 additions and 0 deletions
--- a/pmsco/handlers.py
+++ b/pmsco/handlers.py
@ -0,0 +1,948 @@
+"""
+@package pmsco.handlers
+project-independent task handlers for models, scans, symmetries, emitters and energies.
+
+calculation tasks are organized in a hierarchical tree.
+at each node, a task handler (feel free to find a better name)
+creates a set of child tasks according to the optimization mode and requirements of the project.
+at the end points of the tree, the tasks are ready to be sent to calculation program.
+the handlers collect the results, and return one combined dataset per node.
+the passing of tasks and results between handlers is managed by the processing loop.
+
+<em>model handlers</em> define the model parameters used in calculations.
+the parameters can be chosen according to user input, or according to a structural optimization algorithm.
+a model handler class derives from the ModelHandler class.
+the most simple one, SingleModelHandler, is implemented in this module.
+it calculates the diffraction pattern of a single model with the start parameters given in the domain object.
+the handlers of the structural optimizers are declared in separate modules.
+
+<em>scan handlers</em> split a task into one child task per scan file.
+scans are defined by the project.
+the actual merging step from multiple scans into one result dataset is delegated to the project class.
+
+<em>symmetry handlers</em> split a task into one child per symmetry.
+symmetries are defined by the project.
+the actual merging step from multiple symmetries into one result dataset is delegated to the project class.
+
+<em>emitter handlers</em> split a task into one child per emitter configuration (inequivalent sets of emitting atoms).
+emitter configurations are defined by the project.
+the merging of calculation results of emitter configurations is delegated to the project class.
+since emitters contribute incoherently to the diffraction pattern,
+it should make no difference how the emitters are grouped and calculated.
+code inspection and tests have shown that per-emitter results from EDAC can be simply added.
+
+<em>energy handlers</em> may split a calculation task into multiple tasks
+in order to take advantage of parallel processing.
+
+while several classes of model handlers are available,
+the default handlers for scans, symmetries, emitters and energies should be sufficient in most situations.
+the scan and symmetry handlers call methods of the project class to invoke project-specific functionality.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2015-17 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import division
+import datetime
+import os
+import logging
+import math
+import numpy as np
+import data as md
+from helpers import BraceMessage as BMsg
+
+logger = logging.getLogger(__name__)
+
+
+class TaskHandler(object):
+    """
+    common ancestor for task handlers.
+
+    this class defines the common interface of task handlers.
+    """
+
+    ## @var project
+    #       (Project) project instance.
+
+    ## @var slots
+    #       (int) number of calculation slots (processes).
+    #
+    #       for best efficiency the number of tasks generated should be greater or equal the number of slots.
+    #       it should not exceed N times the number of slots, where N is a reasonably small number.
+
+    ## @var _pending_tasks
+    #       (dict) pending tasks by ID (created but not yet calculated).
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id,
+    #       the values are the corresponding CalculationTask objects.
+
+    ## @var _complete_tasks
+    #       (dict) complete tasks by ID (calculation finished, parent not yet complete).
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id,
+    #       the values are the corresponding CalculationTask objects.
+
+    ## @var _parent_tasks
+    #       (dict) pending parent tasks by ID.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id,
+    #       the values are the corresponding CalculationTask objects.
+
+    ## @var invalid_count (int)
+    #  accumulated total number of invalid results received.
+    #
+    #  the number is incremented by add_result if an invalid task is reported.
+    #  the number can be used by descendants to terminate a hopeless calculation.
+
+    def __init__(self):
+        self._project = None
+        self._slots = 0
+        self._pending_tasks = {}
+        self._parent_tasks = {}
+        self._complete_tasks = {}
+        self._invalid_count = 0
+
+    def setup(self, project, slots):
+        """
+        initialize the handler with project data and the process environment.
+
+        the method is called once by the dispatcher before the calculation loop starts.
+        the handler can initialize internal variables which it hasn't done in the constructor.
+
+        @param project (Project) project instance.
+
+        @param slots (int) number of calculation slots (processes).
+            for best efficiency the number of tasks generated should be greater or equal the number of slots.
+            it should not exceed N times the number of slots, where N is a reasonably small number.
+
+        @return None
+        """
+        self._project = project
+        self._slots = slots
+
+    def cleanup(self):
+        """
+        clean up whatever is necessary, e.g. close files.
+
+        this method is called once after all calculations have finished.
+
+        @return None
+        """
+        pass
+
+    def create_tasks(self, parent_task):
+        """
+        create the next series of child tasks for the given parent task.
+
+        the method is called by the dispatcher when a new series of tasks should be generated.
+
+        when no more tasks are to be calculated, the method must return an empty list.
+        processing will finish when all pending and running tasks are complete.
+
+        @param parent_task (CalculationTask) task with initial model parameters.
+
+        @return list of CalculationTask objects holding the parameters for the next calculations.
+            the list must be empty if there are no more tasks.
+        """
+
+        return []
+
+    def add_result(self, task):
+        """
+        collect and combine the results of tasks created by the same handler.
+
+        this method collects the results of tasks that were created by self.create_tasks() and
+        passes them on to the parent whenever a family (i.e. all tasks that have the same parent) is complete.
+        when the family is complete, the method creates the data files that are represented by the parent task and
+        signals to the caller that the parent task is complete.
+
+        the method is called by the dispatcher whenever a calculation task belonging to this handler completes.
+
+        as of this class, the method counts invalid results and
+        adds the list of data files to the project's file tracker.
+        collecting the tasks and combining their data must be implemented in sub-classes.
+
+        @param task: (CalculationTask) calculation task that completed.
+
+        @return parent task (CalculationTask) if the family is complete,
+            None if the family is not complete yet.
+            As of this class, the method returns None.
+        """
+        if not task.result_valid:
+            self._invalid_count += 1
+
+        self.track_files(task)
+
+        return None
+
+    def track_files(self, task):
+        """
+        register all task files with the file tracker of the project.
+
+        @param task: CalculationTask object.
+            the id, model, and files attributes are required.
+            if model contains a '_rfac' value, the r-factor is
+
+        @return: None
+        """
+        model_id = task.id.model
+        for path, cat in task.files.iteritems():
+            self._project.files.add_file(path, model_id, category=cat)
+
+    def cleanup_files(self, keep=10):
+        """
+        delete uninteresting files.
+
+        @param: number of best ranking models to keep.
+
+        @return: None
+        """
+        self._project.files.delete_files(keep_rfac=keep)
+
+
+class ModelHandler(TaskHandler):
+    """
+    abstract model handler.
+
+    structural optimizers must be derived from this class and implement a loop on the model.
+    """
+
+    ## @var datetime_limit (datetime.datetime)
+    #  date and time when the model handler should finish (regardless of result)
+    #  because the process may get killed by the scheduler after this time.
+    #
+    #  the default is 100 days after creation of the handler.
+
+    def __init__(self):
+        super(ModelHandler, self).__init__()
+        self.datetime_limit = datetime.datetime.now() + datetime.timedelta(days=100)
+
+    def create_tasks(self, parent_task):
+        """
+        create tasks for the next population of models.
+
+        the method is called repeatedly by the dispatcher when the calculation queue runs empty.
+        the model should then create the next round of tasks, e.g. the next generation of a population.
+        the number of tasks created can be as low as one.
+
+        when no more tasks are to be calculated, the method must return an empty list.
+        processing will finish when all pending and running tasks are complete.
+
+        @note it is not possible to hold back calculations, or to wait for results.
+        the handler must either return a task, or signal the end of the optimization process.
+
+        @param parent_task (CalculationTask) task with initial model parameters.
+
+        @return list of CalculationTask objects holding the parameters for the next calculations.
+            the list must be empty if there are no more tasks.
+        """
+        super(ModelHandler, self).create_tasks(parent_task)
+
+        return []
+
+    def add_result(self, task):
+        """
+        collect and combine results of a scan.
+
+        this method is called by the dispatcher when all results for a scan are available.
+        """
+        super(ModelHandler, self).add_result(task)
+
+        return None
+
+
+class SingleModelHandler(ModelHandler):
+    """
+    single model calculation handler.
+
+    this class runs a single calculation on the start parameters defined in the domain of the project.
+    """
+
+    def create_tasks(self, parent_task):
+        """
+        start one task with the start parameters.
+
+        subsequent calls will return an empty task list.
+
+        @param parent_task (CalculationTask) task with initial model parameters.
+        """
+        super(SingleModelHandler, self).create_tasks(parent_task)
+
+        out_tasks = []
+        if len(self._complete_tasks) + len(self._pending_tasks) == 0:
+            parent_id = parent_task.id
+            self._parent_tasks[parent_id] = parent_task
+            new_task = parent_task.copy()
+            new_task.change_id(model=0)
+            new_task.parent_id = parent_id
+            child_id = new_task.id
+            self._pending_tasks[child_id] = new_task
+            out_tasks.append(new_task)
+
+        return out_tasks
+
+    def add_result(self, task):
+        """
+        collect the end result of a single calculation.
+
+        the SingleModelHandler runs calculations for a single model.
+        this method assumes that it will be called just once.
+        it returns the parent task to signal the end of the calculations.
+
+        the result file is not deleted regardless of the files_to_delete project option.
+        the task ID is removed from the file name.
+
+        @param task: (CalculationTask) calculation task that completed.
+
+        @return (CalculationTask) parent task.
+
+        """
+        super(SingleModelHandler, self).add_result(task)
+
+        self._complete_tasks[task.id] = task
+        del self._pending_tasks[task.id]
+
+        parent_task = self._parent_tasks[task.parent_id]
+        del self._parent_tasks[task.parent_id]
+
+        parent_task.result_valid = task.result_valid
+        parent_task.file_ext = task.file_ext
+        parent_task.result_filename = parent_task.file_root + parent_task.file_ext
+        modf_ext = ".modf" + parent_task.file_ext
+        parent_task.modf_filename = parent_task.file_root + modf_ext
+
+        rfac = 1.0
+        if task.result_valid:
+            try:
+                rfac = self._project.calc_rfactor(task)
+            except ValueError:
+                task.result_valid = False
+                logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
+
+            task.model['_rfac'] = rfac
+            self.save_report_file(task.model)
+
+        self._project.files.update_model_rfac(task.id.model, rfac)
+        self._project.files.set_model_complete(task.id.model, True)
+
+        parent_task.time = task.time
+
+        return parent_task
+
+    def save_report_file(self, result):
+        """
+        save model parameters and r-factor to a file.
+
+        the file name is derived from the project's output_file with '.dat' extension.
+        the file has a space-separated column format.
+        the first line contains the parameter names.
+        this is the same format as used by the swarm and grid handlers.
+
+        @param result: dictionary of results and parameters. the values should be scalars and strings.
+
+        @return: None
+        """
+        keys = [key for key in result]
+        keys.sort(key=lambda t: t[0].lower())
+        vals = (str(result[key]) for key in keys)
+        with open(self._project.output_file + ".dat", "w") as outfile:
+            outfile.write("# ")
+            outfile.write(" ".join(keys))
+            outfile.write("\n")
+            outfile.write(" ".join(vals))
+            outfile.write("\n")
+
+
+class ScanHandler(TaskHandler):
+    """
+    split the parameters into one set per scan and gather the results.
+
+    the scan selection takes effect in MscoProcess.calc().
+    """
+
+    ## @var _pending_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of IDs referring to pending calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    ## @var _complete_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of complete calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    def __init__(self):
+        super(ScanHandler, self).__init__()
+        self._pending_ids_per_parent = {}
+        self._complete_ids_per_parent = {}
+
+    def create_tasks(self, parent_task):
+        """
+        generate a calculation task for each scan of the given parent task.
+
+        all scans share the model parameters.
+
+        @return list of CalculationTask objects, with one element per scan.
+            the scan index varies according to project.scans.
+        """
+        super(ScanHandler, self).create_tasks(parent_task)
+
+        parent_id = parent_task.id
+        self._parent_tasks[parent_id] = parent_task
+        assert parent_id not in self._pending_ids_per_parent.keys()
+        self._pending_ids_per_parent[parent_id] = set()
+        self._complete_ids_per_parent[parent_id] = set()
+
+        out_tasks = []
+        for (i_scan, scan) in enumerate(self._project.scans):
+            new_task = parent_task.copy()
+            new_task.parent_id = parent_id
+            new_task.change_id(scan=i_scan)
+
+            child_id = new_task.id
+            self._pending_tasks[child_id] = new_task
+            self._pending_ids_per_parent[parent_id].add(child_id)
+
+            out_tasks.append(new_task)
+
+        if not out_tasks:
+            logger.error("no scan tasks generated. your project must link to at least one scan file.")
+
+        return out_tasks
+
+    def add_result(self, task):
+        """
+        collect and combine the calculation results versus scan.
+
+        * mark the task as complete
+        * store its result for later
+        * check whether this was the last pending task of the family (belonging to the same parent).
+
+        the actual merging of data is delegated to the project's combine_scans() method.
+
+        @param task: (CalculationTask) calculation task that completed.
+
+        @return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
+        """
+        super(ScanHandler, self).add_result(task)
+
+        self._complete_tasks[task.id] = task
+        del self._pending_tasks[task.id]
+
+        family_pending = self._pending_ids_per_parent[task.parent_id]
+        family_complete = self._complete_ids_per_parent[task.parent_id]
+        family_pending.remove(task.id)
+        family_complete.add(task.id)
+
+        # all scans complete?
+        if len(family_pending) == 0:
+            parent_task = self._parent_tasks[task.parent_id]
+
+            parent_task.file_ext = task.file_ext
+            parent_task.result_filename = parent_task.format_filename()
+            modf_ext = ".modf" + parent_task.file_ext
+            parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
+
+            child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
+
+            child_valid = [t.result_valid for t in child_tasks]
+            parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
+            child_times = [t.time for t in child_tasks]
+            parent_task.time = reduce(lambda a, b: a + b, child_times)
+
+            if parent_task.result_valid:
+                self._project.combine_scans(parent_task, child_tasks)
+                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'model')
+                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'model')
+
+            del self._pending_ids_per_parent[parent_task.id]
+            del self._complete_ids_per_parent[parent_task.id]
+            del self._parent_tasks[parent_task.id]
+
+            return parent_task
+        else:
+            return None
+
+
+class SymmetryHandler(TaskHandler):
+    ## @var _pending_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of IDs referring to pending calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    ## @var _complete_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of complete calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    def __init__(self):
+        super(SymmetryHandler, self).__init__()
+        self._pending_ids_per_parent = {}
+        self._complete_ids_per_parent = {}
+
+    def create_tasks(self, parent_task):
+        """
+        generate a calculation task for each symmetry of the given parent task.
+
+        all symmetries share the same model parameters.
+
+        @return list of CalculationTask objects, with one element per symmetry.
+            the symmetry index varies according to project.symmetries.
+        """
+        super(SymmetryHandler, self).create_tasks(parent_task)
+
+        parent_id = parent_task.id
+        self._parent_tasks[parent_id] = parent_task
+        self._pending_ids_per_parent[parent_id] = set()
+        self._complete_ids_per_parent[parent_id] = set()
+
+        out_tasks = []
+        for (i_sym, sym) in enumerate(self._project.symmetries):
+            new_task = parent_task.copy()
+            new_task.parent_id = parent_id
+            new_task.change_id(sym=i_sym)
+
+            child_id = new_task.id
+            self._pending_tasks[child_id] = new_task
+            self._pending_ids_per_parent[parent_id].add(child_id)
+
+            out_tasks.append(new_task)
+
+        if not out_tasks:
+            logger.error("no symmetry tasks generated. your project must declare at least one symmetry.")
+
+        return out_tasks
+
+    def add_result(self, task):
+        """
+        collect and combine the calculation results versus symmetry.
+
+        * mark the task as complete
+        * store its result for later
+        * check whether this was the last pending task of the family (belonging to the same parent).
+
+        the actual merging of data is delegated to the project's combine_symmetries() method.
+
+        @param task: (CalculationTask) calculation task that completed.
+
+        @return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
+        """
+        super(SymmetryHandler, self).add_result(task)
+
+        self._complete_tasks[task.id] = task
+        del self._pending_tasks[task.id]
+
+        family_pending = self._pending_ids_per_parent[task.parent_id]
+        family_complete = self._complete_ids_per_parent[task.parent_id]
+        family_pending.remove(task.id)
+        family_complete.add(task.id)
+
+        # all symmetries complete?
+        if len(family_pending) == 0:
+            parent_task = self._parent_tasks[task.parent_id]
+
+            parent_task.file_ext = task.file_ext
+            parent_task.result_filename = parent_task.format_filename()
+            modf_ext = ".modf" + parent_task.file_ext
+            parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
+
+            child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
+
+            child_valid = [t.result_valid for t in child_tasks]
+            parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
+            child_times = [t.time for t in child_tasks]
+            parent_task.time = reduce(lambda a, b: a + b, child_times)
+
+            if parent_task.result_valid:
+                self._project.combine_symmetries(parent_task, child_tasks)
+                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
+                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
+
+            del self._pending_ids_per_parent[parent_task.id]
+            del self._complete_ids_per_parent[parent_task.id]
+            del self._parent_tasks[parent_task.id]
+
+            return parent_task
+        else:
+            return None
+
+
+class EmitterHandler(TaskHandler):
+    """
+    the emitter handler distributes emitter configurations to calculation tasks and collects their results.
+
+    """
+    ## @var _pending_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of IDs referring to pending calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    ## @var _complete_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of complete calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    def __init__(self):
+        super(EmitterHandler, self).__init__()
+        self._pending_ids_per_parent = {}
+        self._complete_ids_per_parent = {}
+
+    def create_tasks(self, parent_task):
+        """
+        generate a calculation task for each emitter configuration of the given parent task.
+
+        all emitters share the same model parameters.
+
+        @return list of @ref CalculationTask objects with one element per emitter configuration
+            if parallel processing is enabled.
+            otherwise the list contains a single CalculationTask object with emitter index 0.
+            the emitter index is used by the project's create_cluster method.
+        """
+        super(EmitterHandler, self).create_tasks(parent_task)
+
+        parent_id = parent_task.id
+        self._parent_tasks[parent_id] = parent_task
+        self._pending_ids_per_parent[parent_id] = set()
+        self._complete_ids_per_parent[parent_id] = set()
+
+        n_emitters = self._project.cluster_generator.count_emitters(parent_task.model, parent_task.id)
+        if n_emitters > 1 and self._slots > 1:
+            emitters = range(1, n_emitters + 1)
+        else:
+            emitters = [0]
+
+        out_tasks = []
+        for em in emitters:
+            new_task = parent_task.copy()
+            new_task.parent_id = parent_id
+            new_task.change_id(emit=em)
+
+            child_id = new_task.id
+            self._pending_tasks[child_id] = new_task
+            self._pending_ids_per_parent[parent_id].add(child_id)
+
+            out_tasks.append(new_task)
+
+        if not out_tasks:
+            logger.error("no emitter tasks generated. your project must declare at least one emitter configuration.")
+
+        return out_tasks
+
+    def add_result(self, task):
+        """
+        collect and combine the calculation results of inequivalent emitters.
+
+        * mark the task as complete
+        * store its result for later
+        * check whether this was the last pending task of the family (belonging to the same parent).
+
+        the actual merging of data is delegated to the project's combine_emitters() method.
+
+        @param task: (CalculationTask) calculation task that completed.
+
+        @return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
+        """
+        super(EmitterHandler, self).add_result(task)
+
+        self._complete_tasks[task.id] = task
+        del self._pending_tasks[task.id]
+
+        family_pending = self._pending_ids_per_parent[task.parent_id]
+        family_complete = self._complete_ids_per_parent[task.parent_id]
+        family_pending.remove(task.id)
+        family_complete.add(task.id)
+
+        # all emitters complete?
+        if len(family_pending) == 0:
+            parent_task = self._parent_tasks[task.parent_id]
+
+            parent_task.file_ext = task.file_ext
+            parent_task.result_filename = parent_task.format_filename()
+            modf_ext = ".modf" + parent_task.file_ext
+            parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
+
+            child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
+
+            child_valid = [t.result_valid for t in child_tasks]
+            parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
+            child_times = [t.time for t in child_tasks]
+            parent_task.time = reduce(lambda a, b: a + b, child_times)
+
+            if parent_task.result_valid:
+                self._project.combine_emitters(parent_task, child_tasks)
+                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
+                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
+
+            del self._pending_ids_per_parent[parent_task.id]
+            del self._complete_ids_per_parent[parent_task.id]
+            del self._parent_tasks[parent_task.id]
+
+            return parent_task
+        else:
+            return None
+
+
+class RegionHandler(TaskHandler):
+    """
+    region handlers split a scan into a number of regions that can be calculated in parallel.
+
+    this class is an abstract base class.
+    it implements only common code to combine different regions into one result.
+    """
+
+    ## @var _pending_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of IDs referring to pending calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    ## @var _complete_ids_per_parent
+    #       (dict) sets of child task IDs per parent
+    #
+    #       each dictionary element is a set of complete calculation tasks (children)
+    #       belonging to a parent task identified by the key.
+    #
+    #       the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
+    #       the values are sets of all child CalculationTask.id belonging to the parent.
+
+    def __init__(self):
+        super(RegionHandler, self).__init__()
+        self._pending_ids_per_parent = {}
+        self._complete_ids_per_parent = {}
+
+    def add_result(self, task):
+        """
+        gather results of all regions that belong to the same parent.
+
+        @param task: (CalculationTask) calculation task that completed.
+
+        @return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
+        """
+        super(RegionHandler, self).add_result(task)
+
+        self._complete_tasks[task.id] = task
+        del self._pending_tasks[task.id]
+
+        family_pending = self._pending_ids_per_parent[task.parent_id]
+        family_complete = self._complete_ids_per_parent[task.parent_id]
+        family_pending.remove(task.id)
+        family_complete.add(task.id)
+
+        # all regions ready?
+        if len(family_pending) == 0:
+            parent_task = self._parent_tasks[task.parent_id]
+
+            parent_task.file_ext = task.file_ext
+            parent_task.result_filename = parent_task.format_filename()
+            modf_ext = ".modf" + parent_task.file_ext
+            parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
+
+            child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
+
+            child_valid = [t.result_valid for t in child_tasks]
+            parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
+            child_times = [t.time for t in child_tasks]
+            parent_task.time = reduce(lambda a, b: a + b, child_times)
+
+            if parent_task.result_valid:
+                stack1 = [md.load_data(t.result_filename) for t in child_tasks]
+                dtype = md.common_dtype(stack1)
+                stack2 = [md.restructure_data(d, dtype) for d in stack1]
+                result_data = np.hstack(tuple(stack2))
+                md.sort_data(result_data)
+                md.save_data(parent_task.result_filename, result_data)
+                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, "emitter")
+                for t in child_tasks:
+                    self._project.files.remove_file(t.result_filename)
+
+            del self._pending_ids_per_parent[parent_task.id]
+            del self._complete_ids_per_parent[parent_task.id]
+            del self._parent_tasks[parent_task.id]
+
+            return parent_task
+        else:
+            return None
+
+
+class SingleRegionHandler(RegionHandler):
+    """
+    trivial region handler
+
+    this is a trivial region handler.
+    the whole parent task is identified as one region and calculated at once.
+    """
+
+    def create_tasks(self, parent_task):
+        """
+        generate one calculation task for the parent task.
+
+        @return list of CalculationTask objects, with one element per region.
+            the energy index enumerates the regions.
+        """
+        super(SingleRegionHandler, self).create_tasks(parent_task)
+
+        parent_id = parent_task.id
+        self._parent_tasks[parent_id] = parent_task
+        self._pending_ids_per_parent[parent_id] = set()
+        self._complete_ids_per_parent[parent_id] = set()
+
+        new_task = parent_task.copy()
+        new_task.parent_id = parent_id
+        new_task.change_id(region=0)
+
+        child_id = new_task.id
+        self._pending_tasks[child_id] = new_task
+        self._pending_ids_per_parent[parent_id].add(child_id)
+
+        out_tasks = [new_task]
+        return out_tasks
+
+
+class EnergyRegionHandler(RegionHandler):
+    """
+    split a scan into a number of energy regions that can be run in parallel.
+
+    the purpose of this task handler is to save wall clock time on a multi-processor machine
+    by splitting energy scans into smaller chunks.
+
+    the handler distributes the processing slots to the scans proportional to their scan lengths
+    so that all child tasks of the same parent finish approximately in the same time.
+    pure angle scans are not split.
+
+    to use this feature, the project assigns this class to its @ref handler_classes['region'].
+    it is safe to use this handler for calculations that do not involve energy scans.
+    the handler is best used for single calculations.
+    in optimizations that calculate many models there is no advantage in using it
+    (on the contrary, the overhead increases the total run time slightly.)
+    """
+
+    ## @var _slots_per_scan
+    #       (list of integers) number of processor slots assigned to each scan,
+    #       i.e. number of chunks to split a scan region into.
+    #
+    #       the sequence has the same order as self._project.scans.
+
+    def __init__(self):
+        super(EnergyRegionHandler, self).__init__()
+        self._slots_per_scan = []
+
+    def setup(self, project, slots):
+        """
+        initialize the handler with project data and the process environment.
+
+        this function distributes the processing slots to the scans.
+        the slots are distributed proportional to the scan lengths of the energy scans
+        so that all chunks have approximately the same size.
+
+        the number of slots per scan is stored in @ref _slots_per_scan for later use by @ref create_tasks.
+
+        @param project (Project) project instance.
+
+        @param slots (int) number of calculation slots (processes).
+
+        @return None
+        """
+        super(EnergyRegionHandler, self).setup(project, slots)
+
+        scan_lengths = [scan.energies.shape[0] for scan in self._project.scans]
+        total_length = sum(scan_lengths)
+        f = min(1.0, float(self._slots) / total_length)
+        self._slots_per_scan = [max(1, int(round(l * f))) for l in scan_lengths]
+
+        for i, scan in enumerate(self._project.scans):
+            logger.debug(BMsg("region handler: split scan {file} into {slots} chunks",
+                              file=os.path.basename(scan.filename), slots=self._slots_per_scan[i]))
+
+    def create_tasks(self, parent_task):
+        """
+        generate a calculation task for each energy region of the given parent task.
+
+        all child tasks share the model parameters.
+
+        @return list of CalculationTask objects, with one element per region.
+            the energy index enumerates the regions.
+        """
+        super(EnergyRegionHandler, self).create_tasks(parent_task)
+
+        parent_id = parent_task.id
+        self._parent_tasks[parent_id] = parent_task
+        self._pending_ids_per_parent[parent_id] = set()
+        self._complete_ids_per_parent[parent_id] = set()
+
+        energies = self._project.scans[parent_id.scan].energies
+        n_regions = self._slots_per_scan[parent_id.scan]
+        regions = np.array_split(energies, n_regions)
+
+        out_tasks = []
+        for ireg, reg in enumerate(regions):
+            new_task = parent_task.copy()
+            new_task.parent_id = parent_id
+            new_task.change_id(region=ireg)
+            if n_regions > 1:
+                new_task.region['e'] = reg
+
+            child_id = new_task.id
+            self._pending_tasks[child_id] = new_task
+            self._pending_ids_per_parent[parent_id].add(child_id)
+
+            out_tasks.append(new_task)
+
+        if not out_tasks:
+            logger.error("no region tasks generated. this is probably a bug.")
+
+        return out_tasks
+
+
+def choose_region_handler_class(project):
+    """
+    choose a suitable region handler for the project.
+
+    the function returns the EnergyRegionHandler class
+    if the project includes an energy scan with at least 10 steps.
+    Otherwise, it returns the SingleRegionHandler.
+
+    angle scans do not benefit from region splitting in EDAC.
+
+    @param project: Project instance.
+    @return: SingleRegionHandler or EnergyRegionHandler class.
+    """
+    energy_scans = 0
+    for scan in project.scans:
+        if scan.energies.shape[0] >= 10:
+            energy_scans += 1
+
+    if energy_scans >= 1:
+        return EnergyRegionHandler
+    else:
+        return SingleRegionHandler