update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100
tagged rev-master-2.0.0
This commit is contained in:
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions

View File

@ -40,21 +40,28 @@ the scan and symmetry handlers call methods of the project class to invoke proje
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-17 by Paul Scherrer Institut @n
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import os
from functools import reduce
import logging
import math
import numpy as np
import data as md
from helpers import BraceMessage as BMsg
import os
from pmsco.compat import open
import pmsco.data as md
import pmsco.graphics.scan as mgs
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -66,10 +73,10 @@ class TaskHandler(object):
this class defines the common interface of task handlers.
"""
## @var project
## @var _project
# (Project) project instance.
## @var slots
## @var _slots
# (int) number of calculation slots (processes).
#
# for best efficiency the number of tasks generated should be greater or equal the number of slots.
@ -93,7 +100,7 @@ class TaskHandler(object):
# the dictionary keys are the task identifiers CalculationTask.id,
# the values are the corresponding CalculationTask objects.
## @var invalid_count (int)
## @var _invalid_count (int)
# accumulated total number of invalid results received.
#
# the number is incremented by add_result if an invalid task is reported.
@ -188,21 +195,22 @@ class TaskHandler(object):
the id, model, and files attributes are required.
if model contains a '_rfac' value, the r-factor is
@return: None
@return None
"""
model_id = task.id.model
for path, cat in task.files.iteritems():
for path, cat in task.files.items():
self._project.files.add_file(path, model_id, category=cat)
def cleanup_files(self, keep=10):
def cleanup_files(self, keep=0):
"""
delete uninteresting files.
@param: number of best ranking models to keep.
@param keep: minimum number of models to keep.
0 (default): leave the decision to the project.
@return: None
@return None
"""
self._project.files.delete_files(keep_rfac=keep)
self._project.cleanup_files(keep=keep)
class ModelHandler(TaskHandler):
@ -255,6 +263,22 @@ class ModelHandler(TaskHandler):
return None
def save_report(self, root_task):
"""
generate a final report of the optimization procedure.
detailed calculation results are usually saved as soon as they become available.
this method may be implemented in sub-classes to aggregate and summarize the results, generate plots, etc.
in this class, the method does nothing.
@note: implementations must add the path names of generated files to self._project.files.
@param root_task: (CalculationTask) task with initial model parameters.
@return: None
"""
pass
class SingleModelHandler(ModelHandler):
"""
@ -263,6 +287,10 @@ class SingleModelHandler(ModelHandler):
this class runs a single calculation on the start parameters defined in the domain of the project.
"""
def __init__(self):
super(SingleModelHandler, self).__init__()
self.result = {}
def create_tasks(self, parent_task):
"""
start one task with the start parameters.
@ -316,25 +344,18 @@ class SingleModelHandler(ModelHandler):
modf_ext = ".modf" + parent_task.file_ext
parent_task.modf_filename = parent_task.file_root + modf_ext
rfac = 1.0
if task.result_valid:
try:
rfac = self._project.calc_rfactor(task)
except ValueError:
task.result_valid = False
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
assert not math.isnan(task.rfac)
self.result = task.model.copy()
self.result['_rfac'] = task.rfac
task.model['_rfac'] = rfac
self.save_report_file(task.model)
self._project.files.update_model_rfac(task.id.model, rfac)
self._project.files.update_model_rfac(task.id.model, task.rfac)
self._project.files.set_model_complete(task.id.model, True)
parent_task.time = task.time
return parent_task
def save_report_file(self, result):
def save_report(self, root_task):
"""
save model parameters and r-factor to a file.
@ -343,20 +364,25 @@ class SingleModelHandler(ModelHandler):
the first line contains the parameter names.
this is the same format as used by the swarm and grid handlers.
@param result: dictionary of results and parameters. the values should be scalars and strings.
@param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
@return: None
"""
keys = [key for key in result]
super(SingleModelHandler, self).save_report(root_task)
keys = [key for key in self.result]
keys.sort(key=lambda t: t[0].lower())
vals = (str(result[key]) for key in keys)
with open(self._project.output_file + ".dat", "w") as outfile:
vals = (str(self.result[key]) for key in keys)
filename = self._project.output_file + ".dat"
with open(filename, "w") as outfile:
outfile.write("# ")
outfile.write(" ".join(keys))
outfile.write("\n")
outfile.write(" ".join(vals))
outfile.write("\n")
self._project.files.add_file(filename, root_task.id.model, "report")
class ScanHandler(TaskHandler):
"""
@ -388,6 +414,30 @@ class ScanHandler(TaskHandler):
self._pending_ids_per_parent = {}
self._complete_ids_per_parent = {}
def setup(self, project, slots):
"""
initialize the scan task handler and save processed experimental scans.
"""
super(ScanHandler, self).setup(project, slots)
for (i_scan, scan) in enumerate(self._project.scans):
if scan.modulation is not None:
__, filename = os.path.split(scan.filename)
pre, ext = os.path.splitext(filename)
filename = "{pre}_{scan}.modf{ext}".format(pre=pre, ext=ext, scan=i_scan)
filepath = os.path.join(self._project.output_dir, filename)
md.save_data(filepath, scan.modulation)
mgs.render_scan(filepath, data=scan.modulation)
if project.combined_scan is not None:
ext = md.format_extension(project.combined_scan)
filename = project.output_file + ext
md.save_data(filename, project.combined_scan)
if project.combined_modf is not None:
ext = md.format_extension(project.combined_modf)
filename = project.output_file + ".modf" + ext
md.save_data(filename, project.combined_modf)
def create_tasks(self, parent_task):
"""
generate a calculation task for each scan of the given parent task.
@ -464,6 +514,7 @@ class ScanHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_scans(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'model')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'model')
@ -575,8 +626,11 @@ class SymmetryHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_symmetries(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
graph_file = mgs.render_scan(parent_task.modf_filename)
self._project.files.add_file(graph_file, parent_task.id.model, 'scan')
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@ -621,7 +675,7 @@ class EmitterHandler(TaskHandler):
all emitters share the same model parameters.
@return list of @ref CalculationTask objects with one element per emitter configuration
@return list of @ref pmsco.dispatch.CalculationTask objects with one element per emitter configuration
if parallel processing is enabled.
otherwise the list contains a single CalculationTask object with emitter index 0.
the emitter index is used by the project's create_cluster method.
@ -634,10 +688,7 @@ class EmitterHandler(TaskHandler):
self._complete_ids_per_parent[parent_id] = set()
n_emitters = self._project.cluster_generator.count_emitters(parent_task.model, parent_task.id)
if n_emitters > 1 and self._slots > 1:
emitters = range(1, n_emitters + 1)
else:
emitters = [0]
emitters = range(n_emitters)
out_tasks = []
for em in emitters:
@ -698,8 +749,11 @@ class EmitterHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_emitters(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
graph_file = mgs.render_scan(parent_task.modf_filename)
self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@ -776,15 +830,10 @@ class RegionHandler(TaskHandler):
parent_task.time = reduce(lambda a, b: a + b, child_times)
if parent_task.result_valid:
stack1 = [md.load_data(t.result_filename) for t in child_tasks]
dtype = md.common_dtype(stack1)
stack2 = [md.restructure_data(d, dtype) for d in stack1]
result_data = np.hstack(tuple(stack2))
md.sort_data(result_data)
md.save_data(parent_task.result_filename, result_data)
self._project.combine_regions(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, "emitter")
for t in child_tasks:
self._project.files.remove_file(t.result_filename)
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, "emitter")
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@ -840,7 +889,7 @@ class EnergyRegionHandler(RegionHandler):
so that all child tasks of the same parent finish approximately in the same time.
pure angle scans are not split.
to use this feature, the project assigns this class to its @ref handler_classes['region'].
to use this feature, the project assigns this class to its @ref pmsco.project.Project.handler_classes['region'].
it is safe to use this handler for calculations that do not involve energy scans.
the handler is best used for single calculations.
in optimizations that calculate many models there is no advantage in using it