update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100 tagged rev-master-2.0.0
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions
--- a/pmsco/dispatch.py
+++ b/pmsco/dispatch.py
@ -11,7 +11,9 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
  http://www.apache.org/licenses/LICENSE-2.0
 """

+from __future__ import absolute_import
 from __future__ import division
+from __future__ import print_function
 import os
 import os.path
 import datetime
@ -19,8 +21,9 @@ import signal
 import collections
 import copy
 import logging
+from attrdict import AttrDict
 from mpi4py import MPI
-from helpers import BraceMessage as BMsg
+from pmsco.helpers import BraceMessage as BMsg

 logger = logging.getLogger(__name__)

@ -48,7 +51,99 @@ TAG_INVALID_RESULT = 3
 ## the message is empty
 TAG_ERROR_ABORTING = 4

-CalcID = collections.namedtuple('CalcID', ['model', 'scan', 'sym', 'emit', 'region'])
+## levels of calculation tasks
+#
+CALC_LEVELS = ('model', 'scan', 'sym', 'emit', 'region')
+
+## intermediate sub-class of CalcID
+#
+# this class should not be instantiated.
+# instead, use CalcID which provides some useful helper methods.
+#
+_CalcID = collections.namedtuple('_CalcID', CALC_LEVELS)
+
+
+class CalcID(_CalcID):
+    """
+    named tuple class to uniquely identify a calculation task.
+
+    this is a 5-tuple of indices, one index per task level.
+    a positive index refers to a specific instance in the task hierarchy.
+
+    the ID is defined as a named tuple so that it can be used as key of a dictionary.
+    cf. @ref CalculationTask for further detail.
+    compared to a plain named tuple, the CalcID class provides additional helper methods and properties.
+
+    example constructor: CalcID(1, 2, 3, 4, 5).
+    """
+
+    @property
+    def levels(self):
+        """
+        level names.
+
+        this property returns the defined level names in a tuple.
+        this is the same as @ref CALC_LEVELS.
+
+        @return: tuple of level names
+        """
+        return self._fields
+
+    @property
+    def level(self):
+        """
+        specific level of a task, dictionary key form.
+
+        this corresponds to the name of the last positive component.
+
+        @return: attribute name corresponding to the task level.
+            empty string if all members are negative (the root task).
+        """
+        for k in reversed(self._fields):
+            if self.__getattribute__(k) >= 0:
+                return k
+        return ''
+
+    @property
+    def numeric_level(self):
+        """
+        specific level of a task, numeric form.
+
+        this corresponds to the last positive value in the sequence of indices.
+
+        @return: index corresponding to the significant task level component of the id.
+            the value ranges from -1 to len(CalcID) - 1.
+            it is -1 if all indices are negative (root task).
+        """
+        for k in reversed(range(len(self))):
+            if self[k] >= 0:
+                return k
+        return -1
+
+    def collapse_levels(self, level):
+        """
+        return a new CalcID that is collapsed at a specific level.
+
+        the method returns a new CalcID object where the indices below the given level are -1 (undefined).
+        this can be seen as collapsing the tree at the specified node (level).
+
+        @note because a CalcID is immutable, this method returns a new instance.
+
+        @param level: level at which to collapse.
+            the index at this level remains unchanged, lower ones are set to -1.
+            the level can be specified by attribute name (str) or numeric index (-1..4).
+
+        @raise ValueError if level is not numeric and not in CALC_LEVELS.
+
+        @return: new CalcID instance.
+        """
+        try:
+            level = int(level)
+        except ValueError:
+            level = CALC_LEVELS.index(level)
+        assert -1 <= level < len(CALC_LEVELS)
+        mask = {l: -1 for (i, l) in enumerate(CALC_LEVELS) if i > level}
+        return self._replace(**mask)


 class CalculationTask(object):
@ -71,11 +166,11 @@ class CalculationTask(object):
    specified members must be greater or equal to zero.
    -1 is the wildcard which is used in parent tasks,
    where, e.g., no specific symmetry is chosen.
-    the root task has the ID (-1, -1, -1, -1).
+    the root task has the ID (-1, -1, -1, -1, -1).
    """

    ## @var id (CalcID)
-    #  named tuple CalcID containing the 4-part calculation task identifier.
+    #  named tuple CalcID containing the 5-part calculation task identifier.

    ## @var parent_id (CalcID)
    # named tuple CalcID containing the task identifier of the parent task.
@ -105,6 +200,21 @@ class CalculationTask(object):
    ## @var modf_filename (string)
    #  name of the ETPI or ETPAI file that contains the resulting modulation function.

+    ## @var result_valid (bool)
+    # validity status of the result file @ref result_filename.
+    #
+    # if True, the file must exist and contain valid data according to the task specification.
+    # the value is set True when a calculation task completes successfully.
+    # it may be reset later to invalidate the data if an error occurs during processing.
+    #
+    # validity of a parent task requires validity of all child tasks.
+
+    ## @var rfac (float)
+    # r-factor value of the task result.
+    #
+    # the rfac field is written by @ref pmsco.project.Project.evaluate_result.
+    # the initial value is Not A Number.
+
    ## @var time (timedelta)
    #  execution time of the task.
    #
@ -125,7 +235,7 @@ class CalculationTask(object):
    # scan positions to substitute the ones from the original scan.
    #
    # this is used to distribute scans over multiple calculator processes,
-    # cf. e.g. @ref EnergyRegionHandler.
+    # cf. e.g. @ref pmsco.handlers.EnergyRegionHandler.
    #
    # dictionary key must be the scan dimension 'e', 't', 'p', 'a'.
    # the value is a numpy.ndarray containing the scan positions.
@ -147,6 +257,7 @@ class CalculationTask(object):
        self.time = datetime.timedelta()
        self.files = {}
        self.region = {}
+        self.rfac = float('nan')

    def __eq__(self, other):
        """
@ -192,7 +303,7 @@ class CalculationTask(object):
            msg['id'] = CalcID(**msg['id'])
        if isinstance(msg['parent_id'], dict):
            msg['parent_id'] = CalcID(**msg['parent_id'])
-        for k, v in msg.iteritems():
+        for k, v in msg.items():
            self.__setattr__(k, v)

    def format_filename(self, **overrides):
@ -204,13 +315,8 @@ class CalculationTask(object):

        @return a string consisting of the concatenation of the base name, the ID, and the extension.
        """
-        parts = {}
+        parts = self.id._asdict()
        parts['root'] = self.file_root
-        parts['model'] = self.id.model
-        parts['scan'] = self.id.scan
-        parts['sym'] = self.id.sym
-        parts['emit'] = self.id.emit
-        parts['region'] = self.id.region
        parts['ext'] = self.file_ext

        for key in overrides.keys():
@ -237,6 +343,30 @@ class CalculationTask(object):
        """
        self.id = self.id._replace(**kwargs)

+    @property
+    def level(self):
+        """
+        specific level of a task, dictionary key form.
+
+        this corresponds to the name of the last positive component of self.id.
+
+        @return: attribute name corresponding to the task level.
+            empty string for the root task.
+        """
+        return self.id.level
+
+    @property
+    def numeric_level(self):
+        """
+        specific level of a task, numeric form.
+
+        this corresponds to the index of the last positive component of self.id.
+
+        @return: index corresponding to the significant task level component of the id.
+            -1 for the root task.
+        """
+        return self.id.numeric_level
+
    def add_task_file(self, name, category):
        """
        register a file that was generated by the calculation task.
@ -284,6 +414,82 @@ class CalculationTask(object):
            logger.warning("CalculationTask.remove_task_file: could not remove file {0}".format(filename))


+class CachedCalculationMethod(object):
+    """
+    decorator to cache results of expensive calculation functions.
+
+    this decorator can be used to transparently cache any expensive calculation result
+    that depends in a deterministic way on the calculation index.
+    for example, each cluster gets a unique calculation index.
+    if a cluster needs to be calculated repeatedly, it may be more efficient to cache it.
+
+    the method to decorate must have the following signature:
+    result = func(self, model, index).
+    the index (neglecting emitter and region) identifies the result (completely and uniquely).
+
+    the number of cached results is limited by the ttl (time to live) attribute.
+    the items' ttl values are decreased each time a requested calculation is not found in the cache (miss).
+    on a cache hit, the corresponding item's ttl is reset.
+
+    the target ttl (time to live) can be specified as an optional parameter of the decorator.
+    time increases with every cache miss.
+    """
+
+    ## @var _cache (dict)
+    #
+    # key = calculation index,
+    # value = function result
+
+    ## @var _ttl (dict)
+    #
+    # key = calculation index,
+    # value (int) = remaining time to live
+    # where time is the number of subsequent cache misses.
+
+    ## @var ttl (int)
+    #
+    # target time to live of cache items.
+    # time is given by the number cache misses.
+
+    def __init__(self, ttl=10):
+        super(CachedCalculationMethod, self).__init__()
+        self._cache = {}
+        self._ttl = {}
+        self.ttl = ttl
+
+    def __call__(self, func):
+
+        def wrapped_func(inst, model, index):
+            # note: _replace returns a new instance of the namedtuple
+            index = index._replace(emit=-1, region=-1)
+            cache_index = (id(inst), index.model, index.scan, index.sym)
+            try:
+                result = self._cache[cache_index]
+            except KeyError:
+                result = func(inst, model, index)
+                self._expire()
+                self._cache[cache_index] = result
+
+            self._ttl[cache_index] = self.ttl
+
+            return result
+
+        return wrapped_func
+
+    def _expire(self):
+        """
+        decrease the remaining ttl of cache items and delete items whose ttl has fallen below 0.
+
+        @return: None
+        """
+        for index in self._ttl:
+            self._ttl[index] -= 1
+        old_items = [index for index in self._ttl if self._ttl[index] < 0]
+        for index in old_items:
+            del self._ttl[index]
+            del self._cache[index]
+
+
 class MscoProcess(object):
    """
    code shared by MscoMaster and MscoSlave.
@ -357,11 +563,9 @@ class MscoProcess(object):
        """
        clean up after all calculations.

-        this method calls the clean up function of the project.
-
        @return: None
        """
-        self._project.cleanup()
+        pass

    def calc(self, task):
        """
@ -387,14 +591,36 @@ class MscoProcess(object):
        logger.info("model %s", s_model)
        start_time = datetime.datetime.now()

-        # create parameter and cluster structures
-        clu = self._project.cluster_generator.create_cluster(task.model, task.id)
-        par = self._project.create_params(task.model, task.id)
-
-        # generate file names
+        clu = self._create_cluster(task)
+        par = self._create_params(task)
+        scan = self._define_scan(task)
        output_file = task.format_filename(ext="")

-        # determine scan range
+        # check parameters and call the msc program
+        if clu.get_atom_count() < 2:
+            logger.error("empty cluster in calculation %s", s_id)
+            task.result_valid = False
+        elif clu.get_emitter_count() < 1:
+            logger.error("no emitters in cluster of calculation %s.", s_id)
+            task.result_valid = False
+        else:
+            task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
+            (root, ext) = os.path.splitext(task.result_filename)
+            task.file_ext = ext
+            task.result_valid = True
+            task.files.update(files)
+
+        task.time = datetime.datetime.now() - start_time
+
+        return task
+
+    def _define_scan(self, task):
+        """
+        define the scan range.
+
+        @param task: CalculationTask with all attributes set for the calculation.
+        @return: pmsco.project.Scan object for the calculator.
+        """
        scan = self._project.scans[task.id.scan]
        if task.region:
            scan = scan.copy()
@ -419,26 +645,56 @@ class MscoProcess(object):
            except KeyError:
                pass

-        # check parameters and call the msc program
-        if clu.get_atom_count() < 2:
-            logger.error("empty cluster in calculation %s", s_id)
-            task.result_valid = False
-        elif clu.get_emitter_count() < 1:
-            logger.error("no emitters in cluster of calculation %s.", s_id)
-            task.result_valid = False
-        else:
-            files = self._calculator.check_cluster(clu, output_file)
+        return scan
+
+    def _create_cluster(self, task):
+        """
+        generate the cluster for the given calculation task.
+
+        cluster generation is delegated to the project's cluster_generator object.
+
+        if the current task has region == 0,
+        the method also exports diagnostic clusters via the project's export_cluster() method.
+        the file name is formatted with the given task index except that region is -1.
+
+        if (in addition to region == 0) the current task has emit == 0 and cluster includes multiple emitters,
+        the method also exports the master cluster and full emitter list.
+        the file name is formatted with the given task index except that emitter and region are -1.
+
+        @param task: CalculationTask with all attributes set for the calculation.
+        @return: pmsco.cluster.Cluster object for the calculator.
+        """
+        nem = self._project.cluster_generator.count_emitters(task.model, task.id)
+        clu = self._project.cluster_generator.create_cluster(task.model, task.id)
+
+        if task.id.region == 0:
+            file_index = task.id._replace(region=-1)
+            filename = task.format_filename(region=-1)
+            files = self._project.export_cluster(file_index, filename, clu)
            task.files.update(files)

-            task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
-            (root, ext) = os.path.splitext(task.result_filename)
-            task.file_ext = ext
-            task.result_valid = True
-            task.files.update(files)
+            # master cluster
+            if nem > 1 and task.id.emit == 0:
+                master_index = task.id._replace(emit=-1, region=-1)
+                filename = task.format_filename(emit=-1, region=-1)
+                master_cluster = self._project.cluster_generator.create_cluster(task.model, master_index)
+                files = self._project.export_cluster(master_index, filename, master_cluster)
+                task.files.update(files)

-        task.time = datetime.datetime.now() - start_time
+        return clu

-        return task
+    def _create_params(self, task):
+        """
+        generate the parameters list.
+
+        parameters generation is delegated to the project's create_params method.
+
+        @param task: CalculationTask with all attributes set for the calculation.
+        @return: pmsco.project.Params object for the calculator.
+        """
+        par = self._project.create_params(task.model, task.id)
+
+        return par


 class MscoMaster(MscoProcess):
@ -505,20 +761,12 @@ class MscoMaster(MscoProcess):
    #       it defines the initial model and the output file name.
    #       it is passed to the model handler during the main loop.

-    # @var _model_handler
-    #       (ModelHandler) model handler instance
-
-    # @var _scan_handler
-    #       (ScanHandler) scan handler instance
-
-    # @var _symmetry_handler
-    #       (SymmetryHandler) symmetry handler instance
-
-    # @var _emitter_handler
-    #       (EmitterHandler) emitter handler instance
-
-    # @var _region_handler
-    #       (RegionHandler) region handler instance
+    ## @var task_handlers
+    #       (AttrDict) dictionary of task handler objects
+    #
+    #       the keys are the task levels 'model', 'scan', 'sym', 'emit' and 'region'.
+    #       the values are handlers.TaskHandler objects.
+    #       the objects can be accessed in attribute or dictionary notation.

    def __init__(self, comm):
        super(MscoMaster, self).__init__(comm)
@ -534,11 +782,8 @@ class MscoMaster(MscoProcess):
        self._min_queue_len = self._slaves + 1

        self._root_task = None
-        self._model_handler = None
-        self._scan_handler = None
-        self._symmetry_handler = None
-        self._emitter_handler = None
-        self._region_handler = None
+        self.task_levels = list(CalcID._fields)
+        self.task_handlers = AttrDict()

    def setup(self, project):
        """
@ -564,30 +809,29 @@ class MscoMaster(MscoProcess):

        logger.debug("master entering setup")
        self._running_slaves = self._slaves
-        self._idle_ranks = range(1, self._running_slaves + 1)
+        self._idle_ranks = list(range(1, self._running_slaves + 1))

        self._root_task = CalculationTask()
        self._root_task.file_root = project.output_file
        self._root_task.model = project.create_domain().start

-        self._model_handler = project.handler_classes['model']()
-        self._scan_handler = project.handler_classes['scan']()
-        self._symmetry_handler = project.handler_classes['symmetry']()
-        self._emitter_handler = project.handler_classes['emitter']()
-        self._region_handler = project.handler_classes['region']()
+        for level in self.task_levels:
+            self.task_handlers[level] = project.handler_classes[level]()

-        self._model_handler.datetime_limit = self.datetime_limit
+        self.task_handlers.model.datetime_limit = self.datetime_limit

        slaves_adj = max(self._slaves, 1)
-        self._model_handler.setup(project, slaves_adj)
+        self.task_handlers.model.setup(project, slaves_adj)
        if project.mode != "single":
            slaves_adj = max(slaves_adj / 2, 1)
-        self._scan_handler.setup(project, slaves_adj)
-        self._symmetry_handler.setup(project, slaves_adj)
-        self._emitter_handler.setup(project, slaves_adj)
+        self.task_handlers.scan.setup(project, slaves_adj)
+        self.task_handlers.sym.setup(project, slaves_adj)
+        self.task_handlers.emit.setup(project, slaves_adj)
        if project.mode != "single":
            slaves_adj = min(slaves_adj, 4)
-        self._region_handler.setup(project, slaves_adj)
+        self.task_handlers.region.setup(project, slaves_adj)
+
+        project.setup(self.task_handlers)

    def run(self):
        """
@ -615,14 +859,13 @@ class MscoMaster(MscoProcess):

        logger.debug("master exiting main loop")
        self._running = False
+        self._save_report()

    def cleanup(self):
        logger.debug("master entering cleanup")
-        self._region_handler.cleanup()
-        self._emitter_handler.cleanup()
-        self._symmetry_handler.cleanup()
-        self._scan_handler.cleanup()
-        self._model_handler.cleanup()
+        for level in reversed(self.task_levels):
+            self.task_handlers[level].cleanup()
+        self._project.cleanup()
        super(MscoMaster, self).cleanup()

    def _dispatch_results(self):
@ -632,29 +875,25 @@ class MscoMaster(MscoProcess):
        logger.debug("dispatching results of %u tasks", len(self._complete_tasks))
        while self._complete_tasks:
            __, task = self._complete_tasks.popitem(last=False)
+            self._dispatch_result(task)

-            logger.debug("passing task %s to region handler", str(task.id))
-            task = self._region_handler.add_result(task)
+    def _dispatch_result(self, task):
+        """
+        pass a result through the post-processing modules.

+        @param task: a CalculationTask object.
+
+        @return None
+        """
+        level = task.level
+        if level:
+            logger.debug(BMsg("passing task {task} to {level} handler", task=str(task.id), level=level))
+            task = self.task_handlers[level].add_result(task)
            if task:
-                logger.debug("passing task %s to emitter handler", str(task.id))
-                task = self._emitter_handler.add_result(task)
-
-            if task:
-                logger.debug("passing task %s to symmetry handler", str(task.id))
-                task = self._symmetry_handler.add_result(task)
-
-            if task:
-                logger.debug("passing task %s to scan handler", str(task.id))
-                task = self._scan_handler.add_result(task)
-
-            if task:
-                logger.debug("passing task %s to model handler", str(task.id))
-                task = self._model_handler.add_result(task)
-
-            if task:
-                logger.debug("root task %s complete", str(task.id))
-                self._finishing = True
+                self._dispatch_result(task)
+        else:
+            self._finishing = True
+            logger.debug(BMsg("root task {task} complete", task=str(task.id)))

    def _create_tasks(self):
        """
@ -668,7 +907,7 @@ class MscoMaster(MscoProcess):
        """
        logger.debug("creating new tasks from root")
        while len(self._pending_tasks) < self._min_queue_len:
-            tasks = self._model_handler.create_tasks(self._root_task)
+            tasks = self.task_handlers.model.create_tasks(self._root_task)
            logger.debug("model handler returned %u new tasks", len(tasks))
            if not tasks:
                self._model_done = True
@ -807,6 +1046,17 @@ class MscoMaster(MscoProcess):

        return self._finishing

+    def _save_report(self):
+        """
+        generate a final report.
+
+        this method is called at the end of the master loop.
+        it passes the call to @ref pmsco.handlers.ModelHandler.save_report.
+
+        @return: None
+        """
+        self.task_handlers.model.save_report(self._root_task)
+
    def add_model_task(self, task):
        """
        add a new model task including all of its children to the task queue.
@ -814,13 +1064,13 @@ class MscoMaster(MscoProcess):
        @param task (CalculationTask) task identifier and model parameters.
        """

-        scan_tasks = self._scan_handler.create_tasks(task)
+        scan_tasks = self.task_handlers.scan.create_tasks(task)
        for scan_task in scan_tasks:
-            sym_tasks = self._symmetry_handler.create_tasks(scan_task)
+            sym_tasks = self.task_handlers.sym.create_tasks(scan_task)
            for sym_task in sym_tasks:
-                emitter_tasks = self._emitter_handler.create_tasks(sym_task)
+                emitter_tasks = self.task_handlers.emit.create_tasks(sym_task)
                for emitter_task in emitter_tasks:
-                    region_tasks = self._region_handler.create_tasks(emitter_task)
+                    region_tasks = self.task_handlers.region.create_tasks(emitter_task)
                    for region_task in region_tasks:
                        self._pending_tasks[region_task.id] = region_task