update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100 tagged rev-master-2.0.0
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions
--- a/pmsco/handlers.py
+++ b/pmsco/handlers.py
@ -40,21 +40,28 @@ the scan and symmetry handlers call methods of the project class to invoke proje

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

-@copyright (c) 2015-17 by Paul Scherrer Institut @n
+@copyright (c) 2015-18 by Paul Scherrer Institut @n
 Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
 """

+from __future__ import absolute_import
 from __future__ import division
+from __future__ import print_function
+
 import datetime
-import os
+from functools import reduce
 import logging
 import math
 import numpy as np
-import data as md
-from helpers import BraceMessage as BMsg
+import os
+
+from pmsco.compat import open
+import pmsco.data as md
+import pmsco.graphics.scan as mgs
+from pmsco.helpers import BraceMessage as BMsg

 logger = logging.getLogger(__name__)

@ -66,10 +73,10 @@ class TaskHandler(object):
    this class defines the common interface of task handlers.
    """

-    ## @var project
+    ## @var _project
    #       (Project) project instance.

-    ## @var slots
+    ## @var _slots
    #       (int) number of calculation slots (processes).
    #
    #       for best efficiency the number of tasks generated should be greater or equal the number of slots.
@ -93,7 +100,7 @@ class TaskHandler(object):
    #       the dictionary keys are the task identifiers CalculationTask.id,
    #       the values are the corresponding CalculationTask objects.

-    ## @var invalid_count (int)
+    ## @var _invalid_count (int)
    #  accumulated total number of invalid results received.
    #
    #  the number is incremented by add_result if an invalid task is reported.
@ -188,21 +195,22 @@ class TaskHandler(object):
            the id, model, and files attributes are required.
            if model contains a '_rfac' value, the r-factor is

-        @return: None
+        @return None
        """
        model_id = task.id.model
-        for path, cat in task.files.iteritems():
+        for path, cat in task.files.items():
            self._project.files.add_file(path, model_id, category=cat)

-    def cleanup_files(self, keep=10):
+    def cleanup_files(self, keep=0):
        """
        delete uninteresting files.

-        @param: number of best ranking models to keep.
+        @param keep: minimum number of models to keep.
+            0 (default): leave the decision to the project.

-        @return: None
+        @return None
        """
-        self._project.files.delete_files(keep_rfac=keep)
+        self._project.cleanup_files(keep=keep)


 class ModelHandler(TaskHandler):
@ -255,6 +263,22 @@ class ModelHandler(TaskHandler):

        return None

+    def save_report(self, root_task):
+        """
+        generate a final report of the optimization procedure.
+
+        detailed calculation results are usually saved as soon as they become available.
+        this method may be implemented in sub-classes to aggregate and summarize the results, generate plots, etc.
+        in this class, the method does nothing.
+
+        @note: implementations must add the path names of generated files to self._project.files.
+
+        @param root_task: (CalculationTask) task with initial model parameters.
+
+        @return: None
+        """
+        pass
+

 class SingleModelHandler(ModelHandler):
    """
@ -263,6 +287,10 @@ class SingleModelHandler(ModelHandler):
    this class runs a single calculation on the start parameters defined in the domain of the project.
    """

+    def __init__(self):
+        super(SingleModelHandler, self).__init__()
+        self.result = {}
+
    def create_tasks(self, parent_task):
        """
        start one task with the start parameters.
@ -316,25 +344,18 @@ class SingleModelHandler(ModelHandler):
        modf_ext = ".modf" + parent_task.file_ext
        parent_task.modf_filename = parent_task.file_root + modf_ext

-        rfac = 1.0
-        if task.result_valid:
-            try:
-                rfac = self._project.calc_rfactor(task)
-            except ValueError:
-                task.result_valid = False
-                logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
+        assert not math.isnan(task.rfac)
+        self.result = task.model.copy()
+        self.result['_rfac'] = task.rfac

-            task.model['_rfac'] = rfac
-            self.save_report_file(task.model)
-
-        self._project.files.update_model_rfac(task.id.model, rfac)
+        self._project.files.update_model_rfac(task.id.model, task.rfac)
        self._project.files.set_model_complete(task.id.model, True)

        parent_task.time = task.time

        return parent_task

-    def save_report_file(self, result):
+    def save_report(self, root_task):
        """
        save model parameters and r-factor to a file.

@ -343,20 +364,25 @@ class SingleModelHandler(ModelHandler):
        the first line contains the parameter names.
        this is the same format as used by the swarm and grid handlers.

-        @param result: dictionary of results and parameters. the values should be scalars and strings.
+        @param root_task: (CalculationTask) the id.model attribute is used to register the generated files.

        @return: None
        """
-        keys = [key for key in result]
+        super(SingleModelHandler, self).save_report(root_task)
+
+        keys = [key for key in self.result]
        keys.sort(key=lambda t: t[0].lower())
-        vals = (str(result[key]) for key in keys)
-        with open(self._project.output_file + ".dat", "w") as outfile:
+        vals = (str(self.result[key]) for key in keys)
+        filename = self._project.output_file + ".dat"
+        with open(filename, "w") as outfile:
            outfile.write("# ")
            outfile.write(" ".join(keys))
            outfile.write("\n")
            outfile.write(" ".join(vals))
            outfile.write("\n")

+            self._project.files.add_file(filename, root_task.id.model, "report")
+

 class ScanHandler(TaskHandler):
    """
@ -388,6 +414,30 @@ class ScanHandler(TaskHandler):
        self._pending_ids_per_parent = {}
        self._complete_ids_per_parent = {}

+    def setup(self, project, slots):
+        """
+        initialize the scan task handler and save processed experimental scans.
+        """
+        super(ScanHandler, self).setup(project, slots)
+
+        for (i_scan, scan) in enumerate(self._project.scans):
+            if scan.modulation is not None:
+                __, filename = os.path.split(scan.filename)
+                pre, ext = os.path.splitext(filename)
+                filename = "{pre}_{scan}.modf{ext}".format(pre=pre, ext=ext, scan=i_scan)
+                filepath = os.path.join(self._project.output_dir, filename)
+                md.save_data(filepath, scan.modulation)
+                mgs.render_scan(filepath, data=scan.modulation)
+
+        if project.combined_scan is not None:
+            ext = md.format_extension(project.combined_scan)
+            filename = project.output_file + ext
+            md.save_data(filename, project.combined_scan)
+        if project.combined_modf is not None:
+            ext = md.format_extension(project.combined_modf)
+            filename = project.output_file + ".modf" + ext
+            md.save_data(filename, project.combined_modf)
+
    def create_tasks(self, parent_task):
        """
        generate a calculation task for each scan of the given parent task.
@ -464,6 +514,7 @@ class ScanHandler(TaskHandler):

            if parent_task.result_valid:
                self._project.combine_scans(parent_task, child_tasks)
+                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'model')
                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'model')

@ -575,8 +626,11 @@ class SymmetryHandler(TaskHandler):

            if parent_task.result_valid:
                self._project.combine_symmetries(parent_task, child_tasks)
+                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
+                graph_file = mgs.render_scan(parent_task.modf_filename)
+                self._project.files.add_file(graph_file, parent_task.id.model, 'scan')

            del self._pending_ids_per_parent[parent_task.id]
            del self._complete_ids_per_parent[parent_task.id]
@ -621,7 +675,7 @@ class EmitterHandler(TaskHandler):

        all emitters share the same model parameters.

-        @return list of @ref CalculationTask objects with one element per emitter configuration
+        @return list of @ref pmsco.dispatch.CalculationTask objects with one element per emitter configuration
            if parallel processing is enabled.
            otherwise the list contains a single CalculationTask object with emitter index 0.
            the emitter index is used by the project's create_cluster method.
@ -634,10 +688,7 @@ class EmitterHandler(TaskHandler):
        self._complete_ids_per_parent[parent_id] = set()

        n_emitters = self._project.cluster_generator.count_emitters(parent_task.model, parent_task.id)
-        if n_emitters > 1 and self._slots > 1:
-            emitters = range(1, n_emitters + 1)
-        else:
-            emitters = [0]
+        emitters = range(n_emitters)

        out_tasks = []
        for em in emitters:
@ -698,8 +749,11 @@ class EmitterHandler(TaskHandler):

            if parent_task.result_valid:
                self._project.combine_emitters(parent_task, child_tasks)
+                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
+                graph_file = mgs.render_scan(parent_task.modf_filename)
+                self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')

            del self._pending_ids_per_parent[parent_task.id]
            del self._complete_ids_per_parent[parent_task.id]
@ -776,15 +830,10 @@ class RegionHandler(TaskHandler):
            parent_task.time = reduce(lambda a, b: a + b, child_times)

            if parent_task.result_valid:
-                stack1 = [md.load_data(t.result_filename) for t in child_tasks]
-                dtype = md.common_dtype(stack1)
-                stack2 = [md.restructure_data(d, dtype) for d in stack1]
-                result_data = np.hstack(tuple(stack2))
-                md.sort_data(result_data)
-                md.save_data(parent_task.result_filename, result_data)
+                self._project.combine_regions(parent_task, child_tasks)
+                self._project.evaluate_result(parent_task, child_tasks)
                self._project.files.add_file(parent_task.result_filename, parent_task.id.model, "emitter")
-                for t in child_tasks:
-                    self._project.files.remove_file(t.result_filename)
+                self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, "emitter")

            del self._pending_ids_per_parent[parent_task.id]
            del self._complete_ids_per_parent[parent_task.id]
@ -840,7 +889,7 @@ class EnergyRegionHandler(RegionHandler):
    so that all child tasks of the same parent finish approximately in the same time.
    pure angle scans are not split.

-    to use this feature, the project assigns this class to its @ref handler_classes['region'].
+    to use this feature, the project assigns this class to its @ref pmsco.project.Project.handler_classes['region'].
    it is safe to use this handler for calculations that do not involve energy scans.
    the handler is best used for single calculations.
    in optimizations that calculate many models there is no advantage in using it