public release 2.2.0 - see README.md and CHANGES.md for details

This commit is contained in:
2020-09-04 16:22:42 +02:00
parent fbd2d4fa8c
commit 7c61eb1b41
67 changed files with 2934 additions and 682 deletions

View File

@ -1,6 +1,6 @@
"""
@package pmsco.handlers
project-independent task handlers for models, scans, symmetries, emitters and energies.
project-independent task handlers for models, scans, domains, emitters and energies.
calculation tasks are organized in a hierarchical tree.
at each node, a task handler (feel free to find a better name)
@ -20,9 +20,9 @@ the handlers of the structural optimizers are declared in separate modules.
scans are defined by the project.
the actual merging step from multiple scans into one result dataset is delegated to the project class.
<em>symmetry handlers</em> split a task into one child per symmetry.
symmetries are defined by the project.
the actual merging step from multiple symmetries into one result dataset is delegated to the project class.
<em>domain handlers</em> split a task into one child per domain.
domains are defined by the project.
the actual merging step from multiple domains into one result dataset is delegated to the project class.
<em>emitter handlers</em> split a task into one child per emitter configuration (inequivalent sets of emitting atoms).
emitter configurations are defined by the project.
@ -35,8 +35,8 @@ code inspection and tests have shown that per-emitter results from EDAC can be s
in order to take advantage of parallel processing.
while several classes of model handlers are available,
the default handlers for scans, symmetries, emitters and energies should be sufficient in most situations.
the scan and symmetry handlers call methods of the project class to invoke project-specific functionality.
the default handlers for scans, domains, emitters and energies should be sufficient in most situations.
the scan and domain handlers call methods of the project class to invoke project-specific functionality.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@ -60,6 +60,7 @@ import os
from pmsco.compat import open
import pmsco.data as md
import pmsco.dispatch as dispatch
import pmsco.graphics.scan as mgs
from pmsco.helpers import BraceMessage as BMsg
@ -127,10 +128,14 @@ class TaskHandler(object):
for best efficiency the number of tasks generated should be greater or equal the number of slots.
it should not exceed N times the number of slots, where N is a reasonably small number.
@return None
@return (int) number of children that create_tasks() will generate on average.
the number does not need to be accurate, a rough estimate or order of magnitude if greater than 10 is fine.
it is used to distribute processing slots across task levels.
see pmsco.dispatch.MscoMaster.setup().
"""
self._project = project
self._slots = slots
return 1
def cleanup(self):
"""
@ -416,6 +421,8 @@ class ScanHandler(TaskHandler):
def setup(self, project, slots):
"""
initialize the scan task handler and save processed experimental scans.
@return (int) number of scans defined in the project.
"""
super(ScanHandler, self).setup(project, slots)
@ -437,6 +444,8 @@ class ScanHandler(TaskHandler):
filename = project.output_file + ".modf" + ext
md.save_data(filename, project.combined_modf)
return len(self._project.scans)
def create_tasks(self, parent_task):
"""
generate a calculation task for each scan of the given parent task.
@ -526,7 +535,7 @@ class ScanHandler(TaskHandler):
return None
class SymmetryHandler(TaskHandler):
class DomainHandler(TaskHandler):
## @var _pending_ids_per_parent
# (dict) sets of child task IDs per parent
#
@ -546,20 +555,29 @@ class SymmetryHandler(TaskHandler):
# the values are sets of all child CalculationTask.id belonging to the parent.
def __init__(self):
super(SymmetryHandler, self).__init__()
super(DomainHandler, self).__init__()
self._pending_ids_per_parent = {}
self._complete_ids_per_parent = {}
def setup(self, project, slots):
"""
initialize the domain task handler.
@return (int) number of domains defined in the project.
"""
super(DomainHandler, self).setup(project, slots)
return len(self._project.domains)
def create_tasks(self, parent_task):
"""
generate a calculation task for each symmetry of the given parent task.
generate a calculation task for each domain of the given parent task.
all symmetries share the same model parameters.
all domains share the same model parameters.
@return list of CalculationTask objects, with one element per symmetry.
the symmetry index varies according to project.symmetries.
@return list of CalculationTask objects, with one element per domain.
the domain index varies according to project.domains.
"""
super(SymmetryHandler, self).create_tasks(parent_task)
super(DomainHandler, self).create_tasks(parent_task)
parent_id = parent_task.id
self._parent_tasks[parent_id] = parent_task
@ -567,10 +585,10 @@ class SymmetryHandler(TaskHandler):
self._complete_ids_per_parent[parent_id] = set()
out_tasks = []
for (i_sym, sym) in enumerate(self._project.symmetries):
for (i_dom, domain) in enumerate(self._project.domains):
new_task = parent_task.copy()
new_task.parent_id = parent_id
new_task.change_id(sym=i_sym)
new_task.change_id(domain=i_dom)
child_id = new_task.id
self._pending_tasks[child_id] = new_task
@ -579,25 +597,25 @@ class SymmetryHandler(TaskHandler):
out_tasks.append(new_task)
if not out_tasks:
logger.error("no symmetry tasks generated. your project must declare at least one symmetry.")
logger.error("no domain tasks generated. your project must declare at least one domain.")
return out_tasks
def add_result(self, task):
"""
collect and combine the calculation results versus symmetry.
collect and combine the calculation results versus domain.
* mark the task as complete
* store its result for later
* check whether this was the last pending task of the family (belonging to the same parent).
the actual merging of data is delegated to the project's combine_symmetries() method.
the actual merging of data is delegated to the project's combine_domains() method.
@param task: (CalculationTask) calculation task that completed.
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
"""
super(SymmetryHandler, self).add_result(task)
super(DomainHandler, self).add_result(task)
self._complete_tasks[task.id] = task
del self._pending_tasks[task.id]
@ -607,7 +625,7 @@ class SymmetryHandler(TaskHandler):
family_pending.remove(task.id)
family_complete.add(task.id)
# all symmetries complete?
# all domains complete?
if len(family_pending) == 0:
parent_task = self._parent_tasks[task.parent_id]
@ -624,7 +642,7 @@ class SymmetryHandler(TaskHandler):
parent_task.time = reduce(lambda a, b: a + b, child_times)
if parent_task.result_valid:
self._project.combine_symmetries(parent_task, child_tasks)
self._project.combine_domains(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
@ -669,6 +687,19 @@ class EmitterHandler(TaskHandler):
self._pending_ids_per_parent = {}
self._complete_ids_per_parent = {}
def setup(self, project, slots):
"""
initialize the emitter task handler.
@return (int) estimated number of emitter configurations that the cluster generator will generate.
the estimate is based on the start parameters, scan 0 and domain 0.
"""
super(EmitterHandler, self).setup(project, slots)
mock_model = self._project.create_model_space().start
mock_index = dispatch.CalcID(-1, 0, 0, -1, -1)
n_emitters = project.cluster_generator.count_emitters(mock_model, mock_index)
return n_emitters
def create_tasks(self, parent_task):
"""
generate a calculation task for each emitter configuration of the given parent task.
@ -750,11 +781,11 @@ class EmitterHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_emitters(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'domain')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'domain')
graph_file = mgs.render_scan(parent_task.modf_filename,
ref_data=self._project.scans[parent_task.id.scan].modulation)
self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')
self._project.files.add_file(graph_file, parent_task.id.model, 'domain')
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@ -921,7 +952,7 @@ class EnergyRegionHandler(RegionHandler):
@param slots (int) number of calculation slots (processes).
@return None
@return (int) average number of child tasks
"""
super(EnergyRegionHandler, self).setup(project, slots)
@ -934,6 +965,8 @@ class EnergyRegionHandler(RegionHandler):
logger.debug(BMsg("region handler: split scan {file} into {slots} chunks",
file=os.path.basename(scan.filename), slots=self._slots_per_scan[i]))
return max(int(sum(self._slots_per_scan) / len(self._slots_per_scan)), 1)
def create_tasks(self, parent_task):
"""
generate a calculation task for each energy region of the given parent task.