public release 2.2.0 - see README.md and CHANGES.md for details
This commit is contained in:
183
pmsco/project.py
183
pmsco/project.py
@ -4,7 +4,7 @@ project-independent classes which store and handle model parameters.
|
||||
|
||||
the most important class defined here is Project.
|
||||
each calculation project needs to derive its own project class from it.
|
||||
the Domain and Params classes are typically used unchanged.
|
||||
the ModelSpace and CalculatorParams classes are typically used unchanged.
|
||||
|
||||
@note nomenclature: the term @e parameters has several meanings in the code and documentation.
|
||||
the following distinctive terms are used in updated documentation sections.
|
||||
@ -53,10 +53,10 @@ from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ParamDomain = collections.namedtuple('ParamDomain', ['start', 'min', 'max', 'step'])
|
||||
ParamSpace = collections.namedtuple('ParamSpace', ['start', 'min', 'max', 'step'])
|
||||
|
||||
|
||||
class Domain(object):
|
||||
class ModelSpace(object):
|
||||
"""
|
||||
Domain of model parameters.
|
||||
|
||||
@ -151,14 +151,14 @@ class Domain(object):
|
||||
|
||||
@param name (string) name of the parameter.
|
||||
|
||||
@return named tuple ParamDomain(start, min, max, step) of the parameter.
|
||||
@return named tuple ParamSpace(start, min, max, step) of the parameter.
|
||||
|
||||
@raise IndexError if the parameter is not defined.
|
||||
"""
|
||||
return ParamDomain(self.start[name], self.min[name], self.max[name], self.step[name])
|
||||
return ParamSpace(self.start[name], self.min[name], self.max[name], self.step[name])
|
||||
|
||||
|
||||
class Params(object):
|
||||
class CalculatorParams(object):
|
||||
"""
|
||||
calculation parameters for a single scattering calculation job.
|
||||
|
||||
@ -166,7 +166,7 @@ class Params(object):
|
||||
|
||||
the class can hold parameters for both the MSC and EDAC codes.
|
||||
some parameters are used by both codes, others are used just by one of them.
|
||||
newer features such as multiple emitters, multiple symmetries, and others are supported in EDAC mode only.
|
||||
newer features such as multiple emitters, multiple domains, and others are supported in EDAC mode only.
|
||||
MSC mode is currently not maintained.
|
||||
|
||||
objects of this class are created by the implementation of the create_params() method
|
||||
@ -253,7 +253,7 @@ class Params(object):
|
||||
|
||||
def __init__(self):
|
||||
self.title = "default parameters"
|
||||
self.comment = "set by project.Params()"
|
||||
self.comment = "set by project.CalculatorParams()"
|
||||
self.cluster_file = ""
|
||||
self.output_file = ""
|
||||
self.scan_file = ""
|
||||
@ -580,7 +580,7 @@ class Project(object):
|
||||
the results include a measure of the quality of the simulated data compared to experimental data.
|
||||
|
||||
each calculation project must derive from this class.
|
||||
it must implement the create_domain(), create_cluster(), and create_params() methods.
|
||||
it must implement the create_model_space(), create_cluster(), and create_params() methods.
|
||||
|
||||
the other methods and attributes of this class
|
||||
are for passing command line parameters to the calculation modules.
|
||||
@ -621,14 +621,14 @@ class Project(object):
|
||||
#
|
||||
# @c scans must be considered read-only. use project methods to change it.
|
||||
|
||||
## @var symmetries (list of arbitrary objects)
|
||||
# list of symmetries for which calculations are to be run.
|
||||
## @var domains (list of arbitrary objects)
|
||||
# list of domains for which calculations are to be run.
|
||||
#
|
||||
# it is up to the derived class what kind of objects are stored in the list.
|
||||
# the recommended kind of objects are dictionaries which hold parameter values,
|
||||
# similar to the model dictionaries.
|
||||
#
|
||||
# the list must be populated by calling the add_symmetry() method.
|
||||
# the list must be populated by calling the add_domain() method.
|
||||
|
||||
## @var cluster_generator (ClusterGenerator object)
|
||||
# provides the cluster generator methods.
|
||||
@ -684,6 +684,11 @@ class Project(object):
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
|
||||
## @var db_file (string)
|
||||
# name of an sqlite3 database file where the calculation results should be stored.
|
||||
#
|
||||
# the default value is ':memory:', which creates a volatile in-memory database.
|
||||
|
||||
## @var timedelta_limit (datetime.timedelta)
|
||||
# wall time after which no new calculations should be started.
|
||||
#
|
||||
@ -715,7 +720,7 @@ class Project(object):
|
||||
#
|
||||
# @arg 0 = model level: combined results only.
|
||||
# @arg 1 = scan level: scan nodes in addition to combined results (level 0).
|
||||
# @arg 2 = symmetry level: symmetry nodes in addition to level 1.
|
||||
# @arg 2 = domain level: domain nodes in addition to level 1.
|
||||
# @arg 3 = emitter level: emitter nodes in addition to level 1.
|
||||
# @arg 4 = region level: region nodes in addition to level 1.
|
||||
|
||||
@ -738,13 +743,14 @@ class Project(object):
|
||||
def __init__(self):
|
||||
self.mode = "single"
|
||||
self.job_name = ""
|
||||
self.job_tags = {}
|
||||
self.git_hash = ""
|
||||
self.description = ""
|
||||
self.features = {}
|
||||
self.cluster_format = mc.FMT_EDAC
|
||||
self.cluster_generator = mc.LegacyClusterGenerator(self)
|
||||
self.scans = []
|
||||
self.symmetries = []
|
||||
self.domains = []
|
||||
self.optimizer_params = {
|
||||
'pop_size': 0,
|
||||
'seed_file': "",
|
||||
@ -755,6 +761,7 @@ class Project(object):
|
||||
self.data_dir = ""
|
||||
self.output_dir = ""
|
||||
self.output_file = "pmsco_data"
|
||||
self.db_file = ':memory:'
|
||||
self.timedelta_limit = datetime.timedelta(days=1)
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
@ -764,7 +771,7 @@ class Project(object):
|
||||
self.handler_classes = {
|
||||
'model': handlers.SingleModelHandler,
|
||||
'scan': handlers.ScanHandler,
|
||||
'sym': handlers.SymmetryHandler,
|
||||
'domain': handlers.DomainHandler,
|
||||
'emit': handlers.EmitterHandler,
|
||||
'region': handlers.SingleRegionHandler
|
||||
}
|
||||
@ -773,27 +780,27 @@ class Project(object):
|
||||
self._tasks_fields = []
|
||||
self._db = database.ResultsDatabase()
|
||||
|
||||
def create_domain(self):
|
||||
def create_model_space(self):
|
||||
"""
|
||||
create a msc_project.Domain object which defines the allowed range for model parameters.
|
||||
create a project.ModelSpace object which defines the allowed range for model parameters.
|
||||
|
||||
this method must be implemented by the actual project class.
|
||||
the Domain object must declare all model parameters used in the project.
|
||||
the ModelSpace object must declare all model parameters used in the project.
|
||||
|
||||
@return Domain object
|
||||
@return ModelSpace object
|
||||
"""
|
||||
return None
|
||||
|
||||
def create_params(self, model, index):
|
||||
"""
|
||||
create a Params object given the model parameters and calculation index.
|
||||
create a CalculatorParams object given the model parameters and calculation index.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg `scan` scan index (index into Project.scans)
|
||||
@arg `domain` domain index (index into Project.domains)
|
||||
"""
|
||||
return None
|
||||
|
||||
@ -896,35 +903,35 @@ class Project(object):
|
||||
|
||||
return scan
|
||||
|
||||
def clear_symmetries(self):
|
||||
def clear_domains(self):
|
||||
"""
|
||||
clear symmetries.
|
||||
clear domains.
|
||||
|
||||
delete all symmetries in self.symmetries and empty the list.
|
||||
delete all domains in self.domains and empty the list.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.symmetries = []
|
||||
self.domains = []
|
||||
|
||||
def add_symmetry(self, symmetry):
|
||||
def add_domain(self, domain):
|
||||
"""
|
||||
add a symmetry to the list of symmetries.
|
||||
add a domain to the list of domains.
|
||||
|
||||
this class declares the list of symmetries.
|
||||
it does not define what should be in the list of symmetries.
|
||||
however, there must be an entry for each symmetry to be calculated.
|
||||
this class declares the list of domains.
|
||||
it does not define what should be in the list of domains.
|
||||
however, there must be an entry for each domain to be calculated.
|
||||
if the list is empty, no calculation will be executed.
|
||||
|
||||
@attention initially, the symmetries list is empty.
|
||||
your project needs to add at least one symmetry.
|
||||
@attention initially, the domains list is empty.
|
||||
your project needs to add at least one domain.
|
||||
otherwise, no calculation will be executed.
|
||||
|
||||
@param symmetry: it is up to the derived project class to specify and interpret the data stored here.
|
||||
it is recommended to store a dictionary with symmetry parameters similar to the model parameters.
|
||||
@param domain: it is up to the derived project class to specify and interpret the data stored here.
|
||||
it is recommended to store a dictionary with domain parameters similar to the model parameters.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.symmetries.append(symmetry)
|
||||
self.domains.append(domain)
|
||||
|
||||
def set_output(self, filename):
|
||||
"""
|
||||
@ -938,14 +945,29 @@ class Project(object):
|
||||
self.output_file = filename
|
||||
path, name = os.path.split(filename)
|
||||
self.output_dir = path
|
||||
self.job_name = name
|
||||
|
||||
def set_timedelta_limit(self, timedelta):
|
||||
def set_timedelta_limit(self, timedelta, margin_minutes=10):
|
||||
"""
|
||||
set the walltime limit
|
||||
|
||||
timedelta (datetime.timedelta)
|
||||
set the walltime limit with a safety margin.
|
||||
|
||||
this method sets the internal self.timedelta_limit attribute.
|
||||
by default, a safety margin of 10 minutes is subtracted to the main argument
|
||||
in order to increase the probability that the process ends in time.
|
||||
if this is not wanted, the project class may override the method and provide its own margin.
|
||||
|
||||
the method is typically called with the command line time limit from the main module.
|
||||
|
||||
@note the safety margin could be applied at various levels.
|
||||
it is done here because it can easily be overridden by the project subclass.
|
||||
to keep run scripts simple, the command line can be given the same time limit
|
||||
as the job scheduler of the computing cluster.
|
||||
|
||||
@param timedelta: (datetime.timedelta) max. duration of the calculation process (wall time).
|
||||
|
||||
@param margin_minutes: (int) safety margin in minutes to subtract from timedelta.
|
||||
"""
|
||||
self.timedelta_limit = timedelta
|
||||
self.timedelta_limit = timedelta - datetime.timedelta(minutes=margin_minutes)
|
||||
|
||||
def log_project_args(self):
|
||||
"""
|
||||
@ -970,38 +992,40 @@ class Project(object):
|
||||
|
||||
logger.warning("data directory: {0}".format(self.data_dir))
|
||||
logger.warning("output file: {0}".format(self.output_file))
|
||||
logger.warning("database: {0}".format(self.db_file))
|
||||
|
||||
_files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
|
||||
logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
|
||||
|
||||
for idx, scan in enumerate(self.scans):
|
||||
logger.warning(BMsg("scan {0}: {filename} ({emitter} {initial_state})", idx, **vars(scan)))
|
||||
for idx, sym in enumerate(self.symmetries):
|
||||
logger.warning(BMsg("symmetry {0}: {sym}", idx, sym=sym))
|
||||
logger.warning(f"scan {idx}: {scan.filename} ({scan.emitter} {scan.initial_state}")
|
||||
for idx, dom in enumerate(self.domains):
|
||||
logger.warning(f"domain {idx}: {dom}")
|
||||
|
||||
except AttributeError:
|
||||
logger.warning("AttributeError in log_project_args")
|
||||
|
||||
def combine_symmetries(self, parent_task, child_tasks):
|
||||
def combine_domains(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results of different symmetry into one result and calculate the modulation function.
|
||||
combine results of different domain into one result and calculate the modulation function.
|
||||
|
||||
the symmetry results are read from the file system using the indices defined by the child_tasks,
|
||||
the domain results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
by default, this method adds all symmetries with equal weight.
|
||||
weights can be defined in the model dictionary with keys 'wsym0', 'wsym1', etc.
|
||||
by default, this method adds all domains with equal weight.
|
||||
weights can be defined in the model dictionary with keys 'wdom0', 'wdom1', etc.
|
||||
missing weights default to 1.
|
||||
note: to avoid correlated parameters, one symmetry must always have a fixed weight.
|
||||
to avoid correlated parameters, one domain must always have a fixed weight.
|
||||
it is recommended to leave 'wdom0' at its default.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
||||
@param parent_task: (CalculationTask) parent task of the domain tasks.
|
||||
the method must write the results to the files indicated
|
||||
by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each symmetry.
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each domain.
|
||||
the method must read the source data from the files
|
||||
indicated by the @c result_filename attributes.
|
||||
the sequence is sorted by task ID, i.e., essentially, by symmetry index.
|
||||
the sequence is sorted by task ID, i.e., essentially, by domain index.
|
||||
|
||||
@return: None
|
||||
|
||||
@ -1009,7 +1033,7 @@ class Project(object):
|
||||
|
||||
@raise IOError if a filename is missing
|
||||
|
||||
@note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters.
|
||||
@note the weights of the domains (in derived classes) can be part of the optimizable model parameters.
|
||||
the model parameters are available as the @c model attribute of the calculation tasks.
|
||||
"""
|
||||
|
||||
@ -1021,7 +1045,7 @@ class Project(object):
|
||||
result_data = data.copy()
|
||||
result_data['i'] = 0.
|
||||
try:
|
||||
weight = task.model['wsym{}'.format(task.id.sym)]
|
||||
weight = task.model['wdom{}'.format(task.id.domain)]
|
||||
except KeyError:
|
||||
weight = 1.
|
||||
result_data['i'] += weight * data['i']
|
||||
@ -1212,9 +1236,12 @@ class Project(object):
|
||||
this instance writes the header of the tasks.dat file
|
||||
that will receive sub-task evaluation results from the evaluate_result() method.
|
||||
|
||||
it also initializes the database where the task results will be stored.
|
||||
this is either a volatile in-memory database or a user-specified sqlite3 database file.
|
||||
|
||||
@param handlers: dictionary listing the initialized task handler instances.
|
||||
the dictionary keys are the attribute names of pmsco.dispatch.CalcID:
|
||||
'model', 'scan', 'sym', 'emit' and 'region'.
|
||||
'model', 'scan', 'domain', 'emit' and 'region'.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
@ -1223,8 +1250,8 @@ class Project(object):
|
||||
fields.extend(dispatch.CalcID._fields)
|
||||
fields.append("secs")
|
||||
fields = ["_" + f for f in fields]
|
||||
dom = self.create_domain()
|
||||
model_fields = list(dom.start.keys())
|
||||
mspace = self.create_model_space()
|
||||
model_fields = list(mspace.start.keys())
|
||||
model_fields.sort(key=lambda name: name.lower())
|
||||
fields.extend(model_fields)
|
||||
self._tasks_fields = fields
|
||||
@ -1234,9 +1261,10 @@ class Project(object):
|
||||
outfile.write(" ".join(fields))
|
||||
outfile.write("\n")
|
||||
|
||||
# todo : change to file-database
|
||||
self._db.connect(":memory:")
|
||||
project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
|
||||
self._db.connect(self.db_file)
|
||||
project_name = self.__class__.__name__
|
||||
project_module = self.__class__.__module__
|
||||
project_id = self._db.register_project(project_name, project_module)
|
||||
job_id = self._db.register_job(project_id,
|
||||
self.job_name,
|
||||
self.mode,
|
||||
@ -1244,6 +1272,9 @@ class Project(object):
|
||||
self.git_hash,
|
||||
datetime.datetime.now(),
|
||||
self.description)
|
||||
logger.debug(BMsg("database {db_file}, project {proj}, job {job}",
|
||||
db_file=self.db_file, proj=project_id, job=job_id))
|
||||
self._db.insert_jobtags(job_id, self.job_tags)
|
||||
self._db.register_params(model_fields)
|
||||
self._db.create_models_view()
|
||||
|
||||
@ -1283,7 +1314,8 @@ class Project(object):
|
||||
with open(self.output_file + ".tasks.dat", "a") as outfile:
|
||||
outfile.write(" ".join(format(value) for value in values_list) + "\n")
|
||||
|
||||
self._db.insert_result(parent_task.id, values_dict)
|
||||
db_id = self._db.insert_result(parent_task.id, values_dict)
|
||||
logger.debug(BMsg("model {model}, database result {db_id}", model=parent_task.id.model, db_id=db_id))
|
||||
|
||||
return None
|
||||
|
||||
@ -1529,7 +1561,7 @@ class Project(object):
|
||||
"""
|
||||
project hook before atomic scattering factors are calculated.
|
||||
|
||||
this method derives modified Params and Cluster objects for the atomic scattering calculation
|
||||
this method derives modified CalculatorParams and Cluster objects for the atomic scattering calculation
|
||||
from the original objects that will be used in the multiple scattering calculation.
|
||||
|
||||
in the basic version, the method does not change the objects
|
||||
@ -1542,7 +1574,7 @@ class Project(object):
|
||||
or None if no global scattering factors should be calculated.
|
||||
do not modify this object!
|
||||
|
||||
@param par: @ref pmsco.project.Params object representing the preliminary
|
||||
@param par: @ref pmsco.project.CalculatorParams object representing the preliminary
|
||||
multiple scattering input parameters of the current task.
|
||||
the method can make modifications to this object instance directly.
|
||||
|
||||
@ -1565,7 +1597,7 @@ class Project(object):
|
||||
"""
|
||||
project hook after atomic scattering factors are calculated.
|
||||
|
||||
this method cleans up the Params and Cluster objects from the atomic scattering calculation
|
||||
this method cleans up the CalculatorParams and Cluster objects from the atomic scattering calculation
|
||||
so that they can be used in the multiple scattering calculation.
|
||||
|
||||
in the basic version, the method just passes the input parameters for model tasks
|
||||
@ -1578,7 +1610,7 @@ class Project(object):
|
||||
(to calculate the fixed scattering factors that will be used for all models)
|
||||
or None if no global scattering factors should be calculated.
|
||||
|
||||
@param par: @ref pmsco.project.Params object representing the preliminary
|
||||
@param par: @ref pmsco.project.CalculatorParams object representing the preliminary
|
||||
multiple scattering input parameters of the current task.
|
||||
|
||||
@param clu: @ref pmsco.cluster.Cluster object representing the preliminary
|
||||
@ -1597,18 +1629,18 @@ class Project(object):
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
delete unwanted files at the end of a project.
|
||||
delete unwanted files at the end of a project and close the database.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.cleanup_files()
|
||||
self.cleanup_files(incomplete_models=True)
|
||||
self._db.disconnect()
|
||||
|
||||
def cleanup_files(self, keep=0):
|
||||
def cleanup_files(self, keep=0, incomplete_models=False):
|
||||
"""
|
||||
delete uninteresting files.
|
||||
delete uninteresting files (any time).
|
||||
|
||||
these are all files that
|
||||
delete all files that
|
||||
belong to one of the self.files.categories_to_delete categories or
|
||||
do not belong to one of the "best" models.
|
||||
|
||||
@ -1619,12 +1651,19 @@ class Project(object):
|
||||
this means that in total up to `n = 10 + 10 * n_scans` models may be kept,
|
||||
where n_scans is the number of scan files in the job.
|
||||
|
||||
this method can be called at any time during the calculation process.
|
||||
it executes on complete models only
|
||||
unless incomplete_models is True.
|
||||
|
||||
@param keep: minimum number of best models to keep.
|
||||
0 (default): use the project parameter self.keep_best.
|
||||
|
||||
@param incomplete_models: (bool) delete files of incomplete models as well.
|
||||
by default (False), incomplete models are not deleted.
|
||||
|
||||
@return None
|
||||
"""
|
||||
self.files.delete_files()
|
||||
self.files.delete_files(incomplete_models=incomplete_models)
|
||||
if 'rfac' in self.files.categories_to_delete:
|
||||
keep = max(keep, self.keep_best)
|
||||
keepers = self._db.query_best_task_models(self.keep_levels, keep)
|
||||
|
Reference in New Issue
Block a user