add files for public distribution
based on internal repository 0a462b6 2017-11-22 14:41:39 +0100
This commit is contained in:
948
pmsco/handlers.py
Normal file
948
pmsco/handlers.py
Normal file
@ -0,0 +1,948 @@
|
||||
"""
|
||||
@package pmsco.handlers
|
||||
project-independent task handlers for models, scans, symmetries, emitters and energies.
|
||||
|
||||
calculation tasks are organized in a hierarchical tree.
|
||||
at each node, a task handler (feel free to find a better name)
|
||||
creates a set of child tasks according to the optimization mode and requirements of the project.
|
||||
at the end points of the tree, the tasks are ready to be sent to calculation program.
|
||||
the handlers collect the results, and return one combined dataset per node.
|
||||
the passing of tasks and results between handlers is managed by the processing loop.
|
||||
|
||||
<em>model handlers</em> define the model parameters used in calculations.
|
||||
the parameters can be chosen according to user input, or according to a structural optimization algorithm.
|
||||
a model handler class derives from the ModelHandler class.
|
||||
the most simple one, SingleModelHandler, is implemented in this module.
|
||||
it calculates the diffraction pattern of a single model with the start parameters given in the domain object.
|
||||
the handlers of the structural optimizers are declared in separate modules.
|
||||
|
||||
<em>scan handlers</em> split a task into one child task per scan file.
|
||||
scans are defined by the project.
|
||||
the actual merging step from multiple scans into one result dataset is delegated to the project class.
|
||||
|
||||
<em>symmetry handlers</em> split a task into one child per symmetry.
|
||||
symmetries are defined by the project.
|
||||
the actual merging step from multiple symmetries into one result dataset is delegated to the project class.
|
||||
|
||||
<em>emitter handlers</em> split a task into one child per emitter configuration (inequivalent sets of emitting atoms).
|
||||
emitter configurations are defined by the project.
|
||||
the merging of calculation results of emitter configurations is delegated to the project class.
|
||||
since emitters contribute incoherently to the diffraction pattern,
|
||||
it should make no difference how the emitters are grouped and calculated.
|
||||
code inspection and tests have shown that per-emitter results from EDAC can be simply added.
|
||||
|
||||
<em>energy handlers</em> may split a calculation task into multiple tasks
|
||||
in order to take advantage of parallel processing.
|
||||
|
||||
while several classes of model handlers are available,
|
||||
the default handlers for scans, symmetries, emitters and energies should be sufficient in most situations.
|
||||
the scan and symmetry handlers call methods of the project class to invoke project-specific functionality.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-17 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import datetime
|
||||
import os
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
import data as md
|
||||
from helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TaskHandler(object):
|
||||
"""
|
||||
common ancestor for task handlers.
|
||||
|
||||
this class defines the common interface of task handlers.
|
||||
"""
|
||||
|
||||
## @var project
|
||||
# (Project) project instance.
|
||||
|
||||
## @var slots
|
||||
# (int) number of calculation slots (processes).
|
||||
#
|
||||
# for best efficiency the number of tasks generated should be greater or equal the number of slots.
|
||||
# it should not exceed N times the number of slots, where N is a reasonably small number.
|
||||
|
||||
## @var _pending_tasks
|
||||
# (dict) pending tasks by ID (created but not yet calculated).
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id,
|
||||
# the values are the corresponding CalculationTask objects.
|
||||
|
||||
## @var _complete_tasks
|
||||
# (dict) complete tasks by ID (calculation finished, parent not yet complete).
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id,
|
||||
# the values are the corresponding CalculationTask objects.
|
||||
|
||||
## @var _parent_tasks
|
||||
# (dict) pending parent tasks by ID.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id,
|
||||
# the values are the corresponding CalculationTask objects.
|
||||
|
||||
## @var invalid_count (int)
|
||||
# accumulated total number of invalid results received.
|
||||
#
|
||||
# the number is incremented by add_result if an invalid task is reported.
|
||||
# the number can be used by descendants to terminate a hopeless calculation.
|
||||
|
||||
def __init__(self):
|
||||
self._project = None
|
||||
self._slots = 0
|
||||
self._pending_tasks = {}
|
||||
self._parent_tasks = {}
|
||||
self._complete_tasks = {}
|
||||
self._invalid_count = 0
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the handler with project data and the process environment.
|
||||
|
||||
the method is called once by the dispatcher before the calculation loop starts.
|
||||
the handler can initialize internal variables which it hasn't done in the constructor.
|
||||
|
||||
@param project (Project) project instance.
|
||||
|
||||
@param slots (int) number of calculation slots (processes).
|
||||
for best efficiency the number of tasks generated should be greater or equal the number of slots.
|
||||
it should not exceed N times the number of slots, where N is a reasonably small number.
|
||||
|
||||
@return None
|
||||
"""
|
||||
self._project = project
|
||||
self._slots = slots
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
clean up whatever is necessary, e.g. close files.
|
||||
|
||||
this method is called once after all calculations have finished.
|
||||
|
||||
@return None
|
||||
"""
|
||||
pass
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
create the next series of child tasks for the given parent task.
|
||||
|
||||
the method is called by the dispatcher when a new series of tasks should be generated.
|
||||
|
||||
when no more tasks are to be calculated, the method must return an empty list.
|
||||
processing will finish when all pending and running tasks are complete.
|
||||
|
||||
@param parent_task (CalculationTask) task with initial model parameters.
|
||||
|
||||
@return list of CalculationTask objects holding the parameters for the next calculations.
|
||||
the list must be empty if there are no more tasks.
|
||||
"""
|
||||
|
||||
return []
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the results of tasks created by the same handler.
|
||||
|
||||
this method collects the results of tasks that were created by self.create_tasks() and
|
||||
passes them on to the parent whenever a family (i.e. all tasks that have the same parent) is complete.
|
||||
when the family is complete, the method creates the data files that are represented by the parent task and
|
||||
signals to the caller that the parent task is complete.
|
||||
|
||||
the method is called by the dispatcher whenever a calculation task belonging to this handler completes.
|
||||
|
||||
as of this class, the method counts invalid results and
|
||||
adds the list of data files to the project's file tracker.
|
||||
collecting the tasks and combining their data must be implemented in sub-classes.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete,
|
||||
None if the family is not complete yet.
|
||||
As of this class, the method returns None.
|
||||
"""
|
||||
if not task.result_valid:
|
||||
self._invalid_count += 1
|
||||
|
||||
self.track_files(task)
|
||||
|
||||
return None
|
||||
|
||||
def track_files(self, task):
|
||||
"""
|
||||
register all task files with the file tracker of the project.
|
||||
|
||||
@param task: CalculationTask object.
|
||||
the id, model, and files attributes are required.
|
||||
if model contains a '_rfac' value, the r-factor is
|
||||
|
||||
@return: None
|
||||
"""
|
||||
model_id = task.id.model
|
||||
for path, cat in task.files.iteritems():
|
||||
self._project.files.add_file(path, model_id, category=cat)
|
||||
|
||||
def cleanup_files(self, keep=10):
|
||||
"""
|
||||
delete uninteresting files.
|
||||
|
||||
@param: number of best ranking models to keep.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self._project.files.delete_files(keep_rfac=keep)
|
||||
|
||||
|
||||
class ModelHandler(TaskHandler):
|
||||
"""
|
||||
abstract model handler.
|
||||
|
||||
structural optimizers must be derived from this class and implement a loop on the model.
|
||||
"""
|
||||
|
||||
## @var datetime_limit (datetime.datetime)
|
||||
# date and time when the model handler should finish (regardless of result)
|
||||
# because the process may get killed by the scheduler after this time.
|
||||
#
|
||||
# the default is 100 days after creation of the handler.
|
||||
|
||||
def __init__(self):
|
||||
super(ModelHandler, self).__init__()
|
||||
self.datetime_limit = datetime.datetime.now() + datetime.timedelta(days=100)
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
create tasks for the next population of models.
|
||||
|
||||
the method is called repeatedly by the dispatcher when the calculation queue runs empty.
|
||||
the model should then create the next round of tasks, e.g. the next generation of a population.
|
||||
the number of tasks created can be as low as one.
|
||||
|
||||
when no more tasks are to be calculated, the method must return an empty list.
|
||||
processing will finish when all pending and running tasks are complete.
|
||||
|
||||
@note it is not possible to hold back calculations, or to wait for results.
|
||||
the handler must either return a task, or signal the end of the optimization process.
|
||||
|
||||
@param parent_task (CalculationTask) task with initial model parameters.
|
||||
|
||||
@return list of CalculationTask objects holding the parameters for the next calculations.
|
||||
the list must be empty if there are no more tasks.
|
||||
"""
|
||||
super(ModelHandler, self).create_tasks(parent_task)
|
||||
|
||||
return []
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine results of a scan.
|
||||
|
||||
this method is called by the dispatcher when all results for a scan are available.
|
||||
"""
|
||||
super(ModelHandler, self).add_result(task)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class SingleModelHandler(ModelHandler):
|
||||
"""
|
||||
single model calculation handler.
|
||||
|
||||
this class runs a single calculation on the start parameters defined in the domain of the project.
|
||||
"""
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
start one task with the start parameters.
|
||||
|
||||
subsequent calls will return an empty task list.
|
||||
|
||||
@param parent_task (CalculationTask) task with initial model parameters.
|
||||
"""
|
||||
super(SingleModelHandler, self).create_tasks(parent_task)
|
||||
|
||||
out_tasks = []
|
||||
if len(self._complete_tasks) + len(self._pending_tasks) == 0:
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
new_task = parent_task.copy()
|
||||
new_task.change_id(model=0)
|
||||
new_task.parent_id = parent_id
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
out_tasks.append(new_task)
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect the end result of a single calculation.
|
||||
|
||||
the SingleModelHandler runs calculations for a single model.
|
||||
this method assumes that it will be called just once.
|
||||
it returns the parent task to signal the end of the calculations.
|
||||
|
||||
the result file is not deleted regardless of the files_to_delete project option.
|
||||
the task ID is removed from the file name.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return (CalculationTask) parent task.
|
||||
|
||||
"""
|
||||
super(SingleModelHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
del self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.result_valid = task.result_valid
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.file_root + parent_task.file_ext
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.file_root + modf_ext
|
||||
|
||||
rfac = 1.0
|
||||
if task.result_valid:
|
||||
try:
|
||||
rfac = self._project.calc_rfactor(task)
|
||||
except ValueError:
|
||||
task.result_valid = False
|
||||
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
||||
|
||||
task.model['_rfac'] = rfac
|
||||
self.save_report_file(task.model)
|
||||
|
||||
self._project.files.update_model_rfac(task.id.model, rfac)
|
||||
self._project.files.set_model_complete(task.id.model, True)
|
||||
|
||||
parent_task.time = task.time
|
||||
|
||||
return parent_task
|
||||
|
||||
def save_report_file(self, result):
|
||||
"""
|
||||
save model parameters and r-factor to a file.
|
||||
|
||||
the file name is derived from the project's output_file with '.dat' extension.
|
||||
the file has a space-separated column format.
|
||||
the first line contains the parameter names.
|
||||
this is the same format as used by the swarm and grid handlers.
|
||||
|
||||
@param result: dictionary of results and parameters. the values should be scalars and strings.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
keys = [key for key in result]
|
||||
keys.sort(key=lambda t: t[0].lower())
|
||||
vals = (str(result[key]) for key in keys)
|
||||
with open(self._project.output_file + ".dat", "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(keys))
|
||||
outfile.write("\n")
|
||||
outfile.write(" ".join(vals))
|
||||
outfile.write("\n")
|
||||
|
||||
|
||||
class ScanHandler(TaskHandler):
|
||||
"""
|
||||
split the parameters into one set per scan and gather the results.
|
||||
|
||||
the scan selection takes effect in MscoProcess.calc().
|
||||
"""
|
||||
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(ScanHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each scan of the given parent task.
|
||||
|
||||
all scans share the model parameters.
|
||||
|
||||
@return list of CalculationTask objects, with one element per scan.
|
||||
the scan index varies according to project.scans.
|
||||
"""
|
||||
super(ScanHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
assert parent_id not in self._pending_ids_per_parent.keys()
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
out_tasks = []
|
||||
for (i_scan, scan) in enumerate(self._project.scans):
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(scan=i_scan)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no scan tasks generated. your project must link to at least one scan file.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the calculation results versus scan.
|
||||
|
||||
* mark the task as complete
|
||||
* store its result for later
|
||||
* check whether this was the last pending task of the family (belonging to the same parent).
|
||||
|
||||
the actual merging of data is delegated to the project's combine_scans() method.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(ScanHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all scans complete?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_scans(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'model')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'model')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class SymmetryHandler(TaskHandler):
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(SymmetryHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each symmetry of the given parent task.
|
||||
|
||||
all symmetries share the same model parameters.
|
||||
|
||||
@return list of CalculationTask objects, with one element per symmetry.
|
||||
the symmetry index varies according to project.symmetries.
|
||||
"""
|
||||
super(SymmetryHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
out_tasks = []
|
||||
for (i_sym, sym) in enumerate(self._project.symmetries):
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(sym=i_sym)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no symmetry tasks generated. your project must declare at least one symmetry.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the calculation results versus symmetry.
|
||||
|
||||
* mark the task as complete
|
||||
* store its result for later
|
||||
* check whether this was the last pending task of the family (belonging to the same parent).
|
||||
|
||||
the actual merging of data is delegated to the project's combine_symmetries() method.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(SymmetryHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all symmetries complete?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_symmetries(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class EmitterHandler(TaskHandler):
|
||||
"""
|
||||
the emitter handler distributes emitter configurations to calculation tasks and collects their results.
|
||||
|
||||
"""
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(EmitterHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each emitter configuration of the given parent task.
|
||||
|
||||
all emitters share the same model parameters.
|
||||
|
||||
@return list of @ref CalculationTask objects with one element per emitter configuration
|
||||
if parallel processing is enabled.
|
||||
otherwise the list contains a single CalculationTask object with emitter index 0.
|
||||
the emitter index is used by the project's create_cluster method.
|
||||
"""
|
||||
super(EmitterHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
n_emitters = self._project.cluster_generator.count_emitters(parent_task.model, parent_task.id)
|
||||
if n_emitters > 1 and self._slots > 1:
|
||||
emitters = range(1, n_emitters + 1)
|
||||
else:
|
||||
emitters = [0]
|
||||
|
||||
out_tasks = []
|
||||
for em in emitters:
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(emit=em)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no emitter tasks generated. your project must declare at least one emitter configuration.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the calculation results of inequivalent emitters.
|
||||
|
||||
* mark the task as complete
|
||||
* store its result for later
|
||||
* check whether this was the last pending task of the family (belonging to the same parent).
|
||||
|
||||
the actual merging of data is delegated to the project's combine_emitters() method.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(EmitterHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all emitters complete?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_emitters(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class RegionHandler(TaskHandler):
|
||||
"""
|
||||
region handlers split a scan into a number of regions that can be calculated in parallel.
|
||||
|
||||
this class is an abstract base class.
|
||||
it implements only common code to combine different regions into one result.
|
||||
"""
|
||||
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(RegionHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
gather results of all regions that belong to the same parent.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(RegionHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all regions ready?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
stack1 = [md.load_data(t.result_filename) for t in child_tasks]
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(d, dtype) for d in stack1]
|
||||
result_data = np.hstack(tuple(stack2))
|
||||
md.sort_data(result_data)
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, "emitter")
|
||||
for t in child_tasks:
|
||||
self._project.files.remove_file(t.result_filename)
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class SingleRegionHandler(RegionHandler):
|
||||
"""
|
||||
trivial region handler
|
||||
|
||||
this is a trivial region handler.
|
||||
the whole parent task is identified as one region and calculated at once.
|
||||
"""
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate one calculation task for the parent task.
|
||||
|
||||
@return list of CalculationTask objects, with one element per region.
|
||||
the energy index enumerates the regions.
|
||||
"""
|
||||
super(SingleRegionHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(region=0)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks = [new_task]
|
||||
return out_tasks
|
||||
|
||||
|
||||
class EnergyRegionHandler(RegionHandler):
|
||||
"""
|
||||
split a scan into a number of energy regions that can be run in parallel.
|
||||
|
||||
the purpose of this task handler is to save wall clock time on a multi-processor machine
|
||||
by splitting energy scans into smaller chunks.
|
||||
|
||||
the handler distributes the processing slots to the scans proportional to their scan lengths
|
||||
so that all child tasks of the same parent finish approximately in the same time.
|
||||
pure angle scans are not split.
|
||||
|
||||
to use this feature, the project assigns this class to its @ref handler_classes['region'].
|
||||
it is safe to use this handler for calculations that do not involve energy scans.
|
||||
the handler is best used for single calculations.
|
||||
in optimizations that calculate many models there is no advantage in using it
|
||||
(on the contrary, the overhead increases the total run time slightly.)
|
||||
"""
|
||||
|
||||
## @var _slots_per_scan
|
||||
# (list of integers) number of processor slots assigned to each scan,
|
||||
# i.e. number of chunks to split a scan region into.
|
||||
#
|
||||
# the sequence has the same order as self._project.scans.
|
||||
|
||||
def __init__(self):
|
||||
super(EnergyRegionHandler, self).__init__()
|
||||
self._slots_per_scan = []
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the handler with project data and the process environment.
|
||||
|
||||
this function distributes the processing slots to the scans.
|
||||
the slots are distributed proportional to the scan lengths of the energy scans
|
||||
so that all chunks have approximately the same size.
|
||||
|
||||
the number of slots per scan is stored in @ref _slots_per_scan for later use by @ref create_tasks.
|
||||
|
||||
@param project (Project) project instance.
|
||||
|
||||
@param slots (int) number of calculation slots (processes).
|
||||
|
||||
@return None
|
||||
"""
|
||||
super(EnergyRegionHandler, self).setup(project, slots)
|
||||
|
||||
scan_lengths = [scan.energies.shape[0] for scan in self._project.scans]
|
||||
total_length = sum(scan_lengths)
|
||||
f = min(1.0, float(self._slots) / total_length)
|
||||
self._slots_per_scan = [max(1, int(round(l * f))) for l in scan_lengths]
|
||||
|
||||
for i, scan in enumerate(self._project.scans):
|
||||
logger.debug(BMsg("region handler: split scan {file} into {slots} chunks",
|
||||
file=os.path.basename(scan.filename), slots=self._slots_per_scan[i]))
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each energy region of the given parent task.
|
||||
|
||||
all child tasks share the model parameters.
|
||||
|
||||
@return list of CalculationTask objects, with one element per region.
|
||||
the energy index enumerates the regions.
|
||||
"""
|
||||
super(EnergyRegionHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
energies = self._project.scans[parent_id.scan].energies
|
||||
n_regions = self._slots_per_scan[parent_id.scan]
|
||||
regions = np.array_split(energies, n_regions)
|
||||
|
||||
out_tasks = []
|
||||
for ireg, reg in enumerate(regions):
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(region=ireg)
|
||||
if n_regions > 1:
|
||||
new_task.region['e'] = reg
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no region tasks generated. this is probably a bug.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
|
||||
def choose_region_handler_class(project):
|
||||
"""
|
||||
choose a suitable region handler for the project.
|
||||
|
||||
the function returns the EnergyRegionHandler class
|
||||
if the project includes an energy scan with at least 10 steps.
|
||||
Otherwise, it returns the SingleRegionHandler.
|
||||
|
||||
angle scans do not benefit from region splitting in EDAC.
|
||||
|
||||
@param project: Project instance.
|
||||
@return: SingleRegionHandler or EnergyRegionHandler class.
|
||||
"""
|
||||
energy_scans = 0
|
||||
for scan in project.scans:
|
||||
if scan.energies.shape[0] >= 10:
|
||||
energy_scans += 1
|
||||
|
||||
if energy_scans >= 1:
|
||||
return EnergyRegionHandler
|
||||
else:
|
||||
return SingleRegionHandler
|
Reference in New Issue
Block a user