update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100
tagged rev-master-2.0.0
This commit is contained in:
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions

View File

@ -11,7 +11,9 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os.path
import datetime
@ -19,8 +21,9 @@ import signal
import collections
import copy
import logging
from attrdict import AttrDict
from mpi4py import MPI
from helpers import BraceMessage as BMsg
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -48,7 +51,99 @@ TAG_INVALID_RESULT = 3
## the message is empty
TAG_ERROR_ABORTING = 4
CalcID = collections.namedtuple('CalcID', ['model', 'scan', 'sym', 'emit', 'region'])
## levels of calculation tasks
#
CALC_LEVELS = ('model', 'scan', 'sym', 'emit', 'region')
## intermediate sub-class of CalcID
#
# this class should not be instantiated.
# instead, use CalcID which provides some useful helper methods.
#
_CalcID = collections.namedtuple('_CalcID', CALC_LEVELS)
class CalcID(_CalcID):
"""
named tuple class to uniquely identify a calculation task.
this is a 5-tuple of indices, one index per task level.
a positive index refers to a specific instance in the task hierarchy.
the ID is defined as a named tuple so that it can be used as key of a dictionary.
cf. @ref CalculationTask for further detail.
compared to a plain named tuple, the CalcID class provides additional helper methods and properties.
example constructor: CalcID(1, 2, 3, 4, 5).
"""
@property
def levels(self):
"""
level names.
this property returns the defined level names in a tuple.
this is the same as @ref CALC_LEVELS.
@return: tuple of level names
"""
return self._fields
@property
def level(self):
"""
specific level of a task, dictionary key form.
this corresponds to the name of the last positive component.
@return: attribute name corresponding to the task level.
empty string if all members are negative (the root task).
"""
for k in reversed(self._fields):
if self.__getattribute__(k) >= 0:
return k
return ''
@property
def numeric_level(self):
"""
specific level of a task, numeric form.
this corresponds to the last positive value in the sequence of indices.
@return: index corresponding to the significant task level component of the id.
the value ranges from -1 to len(CalcID) - 1.
it is -1 if all indices are negative (root task).
"""
for k in reversed(range(len(self))):
if self[k] >= 0:
return k
return -1
def collapse_levels(self, level):
"""
return a new CalcID that is collapsed at a specific level.
the method returns a new CalcID object where the indices below the given level are -1 (undefined).
this can be seen as collapsing the tree at the specified node (level).
@note because a CalcID is immutable, this method returns a new instance.
@param level: level at which to collapse.
the index at this level remains unchanged, lower ones are set to -1.
the level can be specified by attribute name (str) or numeric index (-1..4).
@raise ValueError if level is not numeric and not in CALC_LEVELS.
@return: new CalcID instance.
"""
try:
level = int(level)
except ValueError:
level = CALC_LEVELS.index(level)
assert -1 <= level < len(CALC_LEVELS)
mask = {l: -1 for (i, l) in enumerate(CALC_LEVELS) if i > level}
return self._replace(**mask)
class CalculationTask(object):
@ -71,11 +166,11 @@ class CalculationTask(object):
specified members must be greater or equal to zero.
-1 is the wildcard which is used in parent tasks,
where, e.g., no specific symmetry is chosen.
the root task has the ID (-1, -1, -1, -1).
the root task has the ID (-1, -1, -1, -1, -1).
"""
## @var id (CalcID)
# named tuple CalcID containing the 4-part calculation task identifier.
# named tuple CalcID containing the 5-part calculation task identifier.
## @var parent_id (CalcID)
# named tuple CalcID containing the task identifier of the parent task.
@ -105,6 +200,21 @@ class CalculationTask(object):
## @var modf_filename (string)
# name of the ETPI or ETPAI file that contains the resulting modulation function.
## @var result_valid (bool)
# validity status of the result file @ref result_filename.
#
# if True, the file must exist and contain valid data according to the task specification.
# the value is set True when a calculation task completes successfully.
# it may be reset later to invalidate the data if an error occurs during processing.
#
# validity of a parent task requires validity of all child tasks.
## @var rfac (float)
# r-factor value of the task result.
#
# the rfac field is written by @ref pmsco.project.Project.evaluate_result.
# the initial value is Not A Number.
## @var time (timedelta)
# execution time of the task.
#
@ -125,7 +235,7 @@ class CalculationTask(object):
# scan positions to substitute the ones from the original scan.
#
# this is used to distribute scans over multiple calculator processes,
# cf. e.g. @ref EnergyRegionHandler.
# cf. e.g. @ref pmsco.handlers.EnergyRegionHandler.
#
# dictionary key must be the scan dimension 'e', 't', 'p', 'a'.
# the value is a numpy.ndarray containing the scan positions.
@ -147,6 +257,7 @@ class CalculationTask(object):
self.time = datetime.timedelta()
self.files = {}
self.region = {}
self.rfac = float('nan')
def __eq__(self, other):
"""
@ -192,7 +303,7 @@ class CalculationTask(object):
msg['id'] = CalcID(**msg['id'])
if isinstance(msg['parent_id'], dict):
msg['parent_id'] = CalcID(**msg['parent_id'])
for k, v in msg.iteritems():
for k, v in msg.items():
self.__setattr__(k, v)
def format_filename(self, **overrides):
@ -204,13 +315,8 @@ class CalculationTask(object):
@return a string consisting of the concatenation of the base name, the ID, and the extension.
"""
parts = {}
parts = self.id._asdict()
parts['root'] = self.file_root
parts['model'] = self.id.model
parts['scan'] = self.id.scan
parts['sym'] = self.id.sym
parts['emit'] = self.id.emit
parts['region'] = self.id.region
parts['ext'] = self.file_ext
for key in overrides.keys():
@ -237,6 +343,30 @@ class CalculationTask(object):
"""
self.id = self.id._replace(**kwargs)
@property
def level(self):
"""
specific level of a task, dictionary key form.
this corresponds to the name of the last positive component of self.id.
@return: attribute name corresponding to the task level.
empty string for the root task.
"""
return self.id.level
@property
def numeric_level(self):
"""
specific level of a task, numeric form.
this corresponds to the index of the last positive component of self.id.
@return: index corresponding to the significant task level component of the id.
-1 for the root task.
"""
return self.id.numeric_level
def add_task_file(self, name, category):
"""
register a file that was generated by the calculation task.
@ -284,6 +414,82 @@ class CalculationTask(object):
logger.warning("CalculationTask.remove_task_file: could not remove file {0}".format(filename))
class CachedCalculationMethod(object):
"""
decorator to cache results of expensive calculation functions.
this decorator can be used to transparently cache any expensive calculation result
that depends in a deterministic way on the calculation index.
for example, each cluster gets a unique calculation index.
if a cluster needs to be calculated repeatedly, it may be more efficient to cache it.
the method to decorate must have the following signature:
result = func(self, model, index).
the index (neglecting emitter and region) identifies the result (completely and uniquely).
the number of cached results is limited by the ttl (time to live) attribute.
the items' ttl values are decreased each time a requested calculation is not found in the cache (miss).
on a cache hit, the corresponding item's ttl is reset.
the target ttl (time to live) can be specified as an optional parameter of the decorator.
time increases with every cache miss.
"""
## @var _cache (dict)
#
# key = calculation index,
# value = function result
## @var _ttl (dict)
#
# key = calculation index,
# value (int) = remaining time to live
# where time is the number of subsequent cache misses.
## @var ttl (int)
#
# target time to live of cache items.
# time is given by the number cache misses.
def __init__(self, ttl=10):
super(CachedCalculationMethod, self).__init__()
self._cache = {}
self._ttl = {}
self.ttl = ttl
def __call__(self, func):
def wrapped_func(inst, model, index):
# note: _replace returns a new instance of the namedtuple
index = index._replace(emit=-1, region=-1)
cache_index = (id(inst), index.model, index.scan, index.sym)
try:
result = self._cache[cache_index]
except KeyError:
result = func(inst, model, index)
self._expire()
self._cache[cache_index] = result
self._ttl[cache_index] = self.ttl
return result
return wrapped_func
def _expire(self):
"""
decrease the remaining ttl of cache items and delete items whose ttl has fallen below 0.
@return: None
"""
for index in self._ttl:
self._ttl[index] -= 1
old_items = [index for index in self._ttl if self._ttl[index] < 0]
for index in old_items:
del self._ttl[index]
del self._cache[index]
class MscoProcess(object):
"""
code shared by MscoMaster and MscoSlave.
@ -357,11 +563,9 @@ class MscoProcess(object):
"""
clean up after all calculations.
this method calls the clean up function of the project.
@return: None
"""
self._project.cleanup()
pass
def calc(self, task):
"""
@ -387,14 +591,36 @@ class MscoProcess(object):
logger.info("model %s", s_model)
start_time = datetime.datetime.now()
# create parameter and cluster structures
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
par = self._project.create_params(task.model, task.id)
# generate file names
clu = self._create_cluster(task)
par = self._create_params(task)
scan = self._define_scan(task)
output_file = task.format_filename(ext="")
# determine scan range
# check parameters and call the msc program
if clu.get_atom_count() < 2:
logger.error("empty cluster in calculation %s", s_id)
task.result_valid = False
elif clu.get_emitter_count() < 1:
logger.error("no emitters in cluster of calculation %s.", s_id)
task.result_valid = False
else:
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
(root, ext) = os.path.splitext(task.result_filename)
task.file_ext = ext
task.result_valid = True
task.files.update(files)
task.time = datetime.datetime.now() - start_time
return task
def _define_scan(self, task):
"""
define the scan range.
@param task: CalculationTask with all attributes set for the calculation.
@return: pmsco.project.Scan object for the calculator.
"""
scan = self._project.scans[task.id.scan]
if task.region:
scan = scan.copy()
@ -419,26 +645,56 @@ class MscoProcess(object):
except KeyError:
pass
# check parameters and call the msc program
if clu.get_atom_count() < 2:
logger.error("empty cluster in calculation %s", s_id)
task.result_valid = False
elif clu.get_emitter_count() < 1:
logger.error("no emitters in cluster of calculation %s.", s_id)
task.result_valid = False
else:
files = self._calculator.check_cluster(clu, output_file)
return scan
def _create_cluster(self, task):
"""
generate the cluster for the given calculation task.
cluster generation is delegated to the project's cluster_generator object.
if the current task has region == 0,
the method also exports diagnostic clusters via the project's export_cluster() method.
the file name is formatted with the given task index except that region is -1.
if (in addition to region == 0) the current task has emit == 0 and cluster includes multiple emitters,
the method also exports the master cluster and full emitter list.
the file name is formatted with the given task index except that emitter and region are -1.
@param task: CalculationTask with all attributes set for the calculation.
@return: pmsco.cluster.Cluster object for the calculator.
"""
nem = self._project.cluster_generator.count_emitters(task.model, task.id)
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
if task.id.region == 0:
file_index = task.id._replace(region=-1)
filename = task.format_filename(region=-1)
files = self._project.export_cluster(file_index, filename, clu)
task.files.update(files)
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
(root, ext) = os.path.splitext(task.result_filename)
task.file_ext = ext
task.result_valid = True
task.files.update(files)
# master cluster
if nem > 1 and task.id.emit == 0:
master_index = task.id._replace(emit=-1, region=-1)
filename = task.format_filename(emit=-1, region=-1)
master_cluster = self._project.cluster_generator.create_cluster(task.model, master_index)
files = self._project.export_cluster(master_index, filename, master_cluster)
task.files.update(files)
task.time = datetime.datetime.now() - start_time
return clu
return task
def _create_params(self, task):
"""
generate the parameters list.
parameters generation is delegated to the project's create_params method.
@param task: CalculationTask with all attributes set for the calculation.
@return: pmsco.project.Params object for the calculator.
"""
par = self._project.create_params(task.model, task.id)
return par
class MscoMaster(MscoProcess):
@ -505,20 +761,12 @@ class MscoMaster(MscoProcess):
# it defines the initial model and the output file name.
# it is passed to the model handler during the main loop.
# @var _model_handler
# (ModelHandler) model handler instance
# @var _scan_handler
# (ScanHandler) scan handler instance
# @var _symmetry_handler
# (SymmetryHandler) symmetry handler instance
# @var _emitter_handler
# (EmitterHandler) emitter handler instance
# @var _region_handler
# (RegionHandler) region handler instance
## @var task_handlers
# (AttrDict) dictionary of task handler objects
#
# the keys are the task levels 'model', 'scan', 'sym', 'emit' and 'region'.
# the values are handlers.TaskHandler objects.
# the objects can be accessed in attribute or dictionary notation.
def __init__(self, comm):
super(MscoMaster, self).__init__(comm)
@ -534,11 +782,8 @@ class MscoMaster(MscoProcess):
self._min_queue_len = self._slaves + 1
self._root_task = None
self._model_handler = None
self._scan_handler = None
self._symmetry_handler = None
self._emitter_handler = None
self._region_handler = None
self.task_levels = list(CalcID._fields)
self.task_handlers = AttrDict()
def setup(self, project):
"""
@ -564,30 +809,29 @@ class MscoMaster(MscoProcess):
logger.debug("master entering setup")
self._running_slaves = self._slaves
self._idle_ranks = range(1, self._running_slaves + 1)
self._idle_ranks = list(range(1, self._running_slaves + 1))
self._root_task = CalculationTask()
self._root_task.file_root = project.output_file
self._root_task.model = project.create_domain().start
self._model_handler = project.handler_classes['model']()
self._scan_handler = project.handler_classes['scan']()
self._symmetry_handler = project.handler_classes['symmetry']()
self._emitter_handler = project.handler_classes['emitter']()
self._region_handler = project.handler_classes['region']()
for level in self.task_levels:
self.task_handlers[level] = project.handler_classes[level]()
self._model_handler.datetime_limit = self.datetime_limit
self.task_handlers.model.datetime_limit = self.datetime_limit
slaves_adj = max(self._slaves, 1)
self._model_handler.setup(project, slaves_adj)
self.task_handlers.model.setup(project, slaves_adj)
if project.mode != "single":
slaves_adj = max(slaves_adj / 2, 1)
self._scan_handler.setup(project, slaves_adj)
self._symmetry_handler.setup(project, slaves_adj)
self._emitter_handler.setup(project, slaves_adj)
self.task_handlers.scan.setup(project, slaves_adj)
self.task_handlers.sym.setup(project, slaves_adj)
self.task_handlers.emit.setup(project, slaves_adj)
if project.mode != "single":
slaves_adj = min(slaves_adj, 4)
self._region_handler.setup(project, slaves_adj)
self.task_handlers.region.setup(project, slaves_adj)
project.setup(self.task_handlers)
def run(self):
"""
@ -615,14 +859,13 @@ class MscoMaster(MscoProcess):
logger.debug("master exiting main loop")
self._running = False
self._save_report()
def cleanup(self):
logger.debug("master entering cleanup")
self._region_handler.cleanup()
self._emitter_handler.cleanup()
self._symmetry_handler.cleanup()
self._scan_handler.cleanup()
self._model_handler.cleanup()
for level in reversed(self.task_levels):
self.task_handlers[level].cleanup()
self._project.cleanup()
super(MscoMaster, self).cleanup()
def _dispatch_results(self):
@ -632,29 +875,25 @@ class MscoMaster(MscoProcess):
logger.debug("dispatching results of %u tasks", len(self._complete_tasks))
while self._complete_tasks:
__, task = self._complete_tasks.popitem(last=False)
self._dispatch_result(task)
logger.debug("passing task %s to region handler", str(task.id))
task = self._region_handler.add_result(task)
def _dispatch_result(self, task):
"""
pass a result through the post-processing modules.
@param task: a CalculationTask object.
@return None
"""
level = task.level
if level:
logger.debug(BMsg("passing task {task} to {level} handler", task=str(task.id), level=level))
task = self.task_handlers[level].add_result(task)
if task:
logger.debug("passing task %s to emitter handler", str(task.id))
task = self._emitter_handler.add_result(task)
if task:
logger.debug("passing task %s to symmetry handler", str(task.id))
task = self._symmetry_handler.add_result(task)
if task:
logger.debug("passing task %s to scan handler", str(task.id))
task = self._scan_handler.add_result(task)
if task:
logger.debug("passing task %s to model handler", str(task.id))
task = self._model_handler.add_result(task)
if task:
logger.debug("root task %s complete", str(task.id))
self._finishing = True
self._dispatch_result(task)
else:
self._finishing = True
logger.debug(BMsg("root task {task} complete", task=str(task.id)))
def _create_tasks(self):
"""
@ -668,7 +907,7 @@ class MscoMaster(MscoProcess):
"""
logger.debug("creating new tasks from root")
while len(self._pending_tasks) < self._min_queue_len:
tasks = self._model_handler.create_tasks(self._root_task)
tasks = self.task_handlers.model.create_tasks(self._root_task)
logger.debug("model handler returned %u new tasks", len(tasks))
if not tasks:
self._model_done = True
@ -807,6 +1046,17 @@ class MscoMaster(MscoProcess):
return self._finishing
def _save_report(self):
"""
generate a final report.
this method is called at the end of the master loop.
it passes the call to @ref pmsco.handlers.ModelHandler.save_report.
@return: None
"""
self.task_handlers.model.save_report(self._root_task)
def add_model_task(self, task):
"""
add a new model task including all of its children to the task queue.
@ -814,13 +1064,13 @@ class MscoMaster(MscoProcess):
@param task (CalculationTask) task identifier and model parameters.
"""
scan_tasks = self._scan_handler.create_tasks(task)
scan_tasks = self.task_handlers.scan.create_tasks(task)
for scan_task in scan_tasks:
sym_tasks = self._symmetry_handler.create_tasks(scan_task)
sym_tasks = self.task_handlers.sym.create_tasks(scan_task)
for sym_task in sym_tasks:
emitter_tasks = self._emitter_handler.create_tasks(sym_task)
emitter_tasks = self.task_handlers.emit.create_tasks(sym_task)
for emitter_task in emitter_tasks:
region_tasks = self._region_handler.create_tasks(emitter_task)
region_tasks = self.task_handlers.region.create_tasks(emitter_task)
for region_task in region_tasks:
self._pending_tasks[region_task.id] = region_task