update public distribution
based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100 tagged rev-master-2.0.0
This commit is contained in:
771
pmsco/project.py
771
pmsco/project.py
@ -26,16 +26,27 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import datetime
|
||||
import logging
|
||||
import numpy as np
|
||||
import collections
|
||||
import data as md
|
||||
import cluster as mc
|
||||
import files
|
||||
import handlers
|
||||
import os.path
|
||||
import socket
|
||||
import sys
|
||||
|
||||
import pmsco.cluster as mc
|
||||
from pmsco.compat import open
|
||||
import pmsco.data as md
|
||||
import pmsco.database as database
|
||||
import pmsco.dispatch as dispatch
|
||||
import pmsco.files as files
|
||||
import pmsco.handlers as handlers
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -95,7 +106,7 @@ class Domain(object):
|
||||
self.max = {}
|
||||
self.step = {}
|
||||
|
||||
def add_param(self, name, start, min, max, step):
|
||||
def add_param(self, name, start, min=None, max=None, step=None, width=None):
|
||||
"""
|
||||
set the domain of one parameter with all necessary values at once.
|
||||
|
||||
@ -107,15 +118,29 @@ class Domain(object):
|
||||
@param start (float) start value.
|
||||
|
||||
@param min (float) lower bound of the parameter interval.
|
||||
must be lower or equal to start.
|
||||
if None, the field is set to start.
|
||||
|
||||
@param max (float) upper bound of the parameter interval.
|
||||
must be greater or equal to start.
|
||||
if None, the field is set to start.
|
||||
|
||||
@param width (float) width of the parameter interval.
|
||||
instead of min and max, the interval can be set centered around the start value.
|
||||
this is equivalent to min = start - width/2, max = start + width/2.
|
||||
this argument overrides min and max. don't use both arguments.
|
||||
|
||||
@param step (float) step size.
|
||||
must be greater or equal to zero.
|
||||
if None, the field is set to zero.
|
||||
"""
|
||||
self.start[name] = start
|
||||
self.min[name] = min
|
||||
self.max[name] = max
|
||||
self.step[name] = step
|
||||
self.min[name] = min if min is not None else start
|
||||
self.max[name] = max if max is not None else start
|
||||
if width is not None:
|
||||
self.min[name] = start - width / 2.
|
||||
self.max[name] = start + width / 2.
|
||||
self.step[name] = step if step is not None else 0.0
|
||||
|
||||
def get_param(self, name):
|
||||
"""
|
||||
@ -144,9 +169,27 @@ class Params(object):
|
||||
objects of this class are created by the implementation of the create_params() method
|
||||
of the actual project class.
|
||||
"""
|
||||
|
||||
## @var angular_resolution (float)
|
||||
# FWHM angular resolution of the detector.
|
||||
#
|
||||
# maps to:
|
||||
# @arg emission angle window (EDAC)
|
||||
# @arg angular_broadening (MSC)
|
||||
|
||||
## @var phase_files (dict)
|
||||
# dictionary of phase files.
|
||||
#
|
||||
# the keys are atomic numbers, the values file names.
|
||||
# if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
|
||||
#
|
||||
# maps to:
|
||||
# @arg scatterer (EDAC)
|
||||
# @arg atomic_number, phase_file (MSC)
|
||||
|
||||
def __init__(self):
|
||||
self.title = "MSC default parameters"
|
||||
self.comment = "from msc_project.Params()"
|
||||
self.title = "default parameters"
|
||||
self.comment = "set by project.Params()"
|
||||
self.cluster_file = ""
|
||||
self.output_file = ""
|
||||
self.scan_file = ""
|
||||
@ -154,7 +197,7 @@ class Params(object):
|
||||
self.initial_state = "1s"
|
||||
# MSC convention: H, V, L, R, U
|
||||
self.polarization = "H"
|
||||
self.angular_broadening = 0.0
|
||||
self.angular_resolution = 1.0
|
||||
self.z_surface = 0.0
|
||||
self.inner_potential = 10.0
|
||||
# the energy scale of EDAC is referenced to the vacuum level
|
||||
@ -167,16 +210,14 @@ class Params(object):
|
||||
self.experiment_temperature = 300.0
|
||||
self.debye_temperature = 400.0
|
||||
self.debye_wavevector = 1.0
|
||||
self.phase_files = {}
|
||||
# used by MSC only
|
||||
self.spherical_order = 2
|
||||
self.scattering_level = 5
|
||||
self.fcut = 15.0
|
||||
self.cut = 15.0
|
||||
self.lattice_constant = 1.0
|
||||
self.atom_types = 0
|
||||
self.atomic_number = [1, 2, 3, 4]
|
||||
self.phase_file = ["1.pha", "2.pha", "3.pha", "4.pha"]
|
||||
self.msq_displacement = [0.1, 0.1, 0.1, 0.1]
|
||||
self.msq_displacement = {}
|
||||
self.planewave_attenuation = 1.0
|
||||
self.vibration_model = "N"
|
||||
self.substrate_atomic_mass = 1.0
|
||||
@ -216,9 +257,12 @@ class Scan(object):
|
||||
# example: ['t','p']
|
||||
|
||||
## @var emitter (string)
|
||||
# chemical symbol of emitter atom
|
||||
# chemical symbol and, optionally following, further specification (chemical state, environment, ...)
|
||||
# of photo-emitting atoms.
|
||||
# the interpretation of this string is up to the project and its cluster generator.
|
||||
# it should, however, always start with a chemical element symbol.
|
||||
#
|
||||
# example: 'Cu'
|
||||
# examples: 'Ca' (calcium), 'CA' (carbon A), 'C a' (carbon a), 'C 1' (carbon one), 'N=O', 'FeIII'.
|
||||
|
||||
## @var initial_state (string)
|
||||
# nl term of initial state
|
||||
@ -342,142 +386,6 @@ class Scan(object):
|
||||
self.alphas = np.zeros((1))
|
||||
|
||||
|
||||
class ClusterGenerator(object):
|
||||
"""
|
||||
cluster generator class.
|
||||
|
||||
this class bundles the cluster methods in one place
|
||||
so that it's easier to exchange them for different kinds of clusters.
|
||||
|
||||
the project must override at least the create_cluster method.
|
||||
if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
"""
|
||||
initialize the cluster generator.
|
||||
|
||||
@param project: reference to the project object.
|
||||
cluster generators may need to look up project parameters.
|
||||
"""
|
||||
self.project = project
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
return the number of emitter configurations for a particular model.
|
||||
|
||||
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
|
||||
by default, the method returns 1, which means that there is only one emitter configuration.
|
||||
|
||||
emitter configurations are mainly a way to distribute the calculations to multiple processes
|
||||
based on emitters since the resulting diffraction patterns add up incoherently.
|
||||
for this to work, the create_cluster() method must pay attention to the emitter index
|
||||
and generate either a full cluster with all emitters (single process)
|
||||
or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
|
||||
whether all emitters are calculated in one or multiple processes is decided at run-time
|
||||
based on the available resources.
|
||||
|
||||
note that this function returns the number of _configurations_ not _atoms_.
|
||||
an emitter configuration (declared in a Cluster) may include more than one atom.
|
||||
it is up to the project, what is included in a particular configuration.
|
||||
|
||||
to enable multiple emitter configurations, the derived project class must override this method
|
||||
and return a number greater than 1.
|
||||
|
||||
@note in some cases it may be most efficient to call create_cluster and
|
||||
return Cluster.get_emitter_count() of the generated cluster.
|
||||
this is possible because the method is called with emitter index -1.
|
||||
model and index can be passed unchanged to create_cluster.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index is -1 if called by the emitter handler.
|
||||
|
||||
@return number of emitter configurations.
|
||||
this implementation returns the default value of 1.
|
||||
"""
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
create a Cluster object given the model parameters and calculation index.
|
||||
|
||||
the generated cluster will typically depend on the model parameters.
|
||||
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
|
||||
|
||||
the scan index can be used to generate a different cluster for different scan geometry,
|
||||
e.g., if some atoms can be excluded due to a longer mean free path.
|
||||
if this is not the case for the specific project, the scan index can be ignored.
|
||||
|
||||
the symmetry index may select a particular domain that has a different atomic arrangement.
|
||||
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
|
||||
to the particular domain/symmetry.
|
||||
the method can ignore the symmetry index if the project defines only one symmetry,
|
||||
or if the symmetry does not correspond to a different atomic structure.
|
||||
|
||||
the emitter index selects a particular emitter configuration.
|
||||
depending on the value of the emitter index, the method must react differently:
|
||||
|
||||
1. if the value lower or equal to zero, return the full cluster and mark all inequivalent emitter atoms.
|
||||
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
|
||||
the full diffraction scan will be calculated in one calculation.
|
||||
|
||||
2. if the value is greater than zero, generate the cluster with the emitter configuration
|
||||
selected by the emitter index.
|
||||
the index is in the range between 1 and the return value of count_emitters().
|
||||
the results of the individual emitter calculations are summed up in combine_emitters().
|
||||
|
||||
the code should ideally be written such that either case yields the same diffraction result.
|
||||
if count_emitters() always returns 1 (default), the second case does not have to be implemented,
|
||||
and the method can ignore the emitter index.
|
||||
|
||||
the method must ignore the model and energy index.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index.
|
||||
if lower or equal to zero, generate the full cluster and mark all emitters.
|
||||
if greater than zero, the value is a 1-based index of the emitter configuration.
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
class LegacyClusterGenerator(ClusterGenerator):
|
||||
"""
|
||||
cluster generator class for projects that don't declare a generator.
|
||||
|
||||
in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
|
||||
this class redirects generator calls to the project methods
|
||||
providing compatibility to older project code.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
super(LegacyClusterGenerator, self).__init__(project)
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method if implemented.
|
||||
"""
|
||||
try:
|
||||
return self.project.count_emitters(model, index)
|
||||
except AttributeError:
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method.
|
||||
"""
|
||||
return self.project.create_cluster(model, index)
|
||||
|
||||
|
||||
# noinspection PyMethodMayBeStatic
|
||||
class Project(object):
|
||||
"""
|
||||
@ -549,39 +457,27 @@ class Project(object):
|
||||
# the initial value is a LegacyClusterGenerator object
|
||||
# which routes cluster calls back to the project for compatibility with older project code.
|
||||
|
||||
## @var pop_size (int)
|
||||
# population size (number of particles) in the particle swarm optimization.
|
||||
## @var optimizer_params (dict)
|
||||
# optional parameters of the model optimizer.
|
||||
#
|
||||
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
|
||||
# you may want to override the default value in cases where the automatic choice is not appropriate, e.g.:
|
||||
# - the calculation of a model takes a long time compared to the available computing time.
|
||||
# - the calculation of a model spawns many sub-tasks due to complex symmetry.
|
||||
# - you want to increase the number of generations compared to the number of particles.
|
||||
# this is a dictionary that can have (among others) the following values.
|
||||
# for a detailed list, see the documentation of the respective model handler.
|
||||
#
|
||||
# the default value is 0.
|
||||
#
|
||||
# the value can be set by the command line.
|
||||
|
||||
## @var history_file (string)
|
||||
# name of a file containing the results from previous optimization runs.
|
||||
# this can be used to resume a swarm optimization where it was interrupted before.
|
||||
#
|
||||
# the history file is a space-delimited, multi-column, text file.
|
||||
# output files of a previous optimization run can be used as is.
|
||||
# there must be one column for each model parameter, and one column of R factors.
|
||||
# the first row must contain the names of the model parameters.
|
||||
# the name of th R factor column must be '_rfac'.
|
||||
# additional columns may be included and are ignored.
|
||||
#
|
||||
# by default, no history is loaded.
|
||||
|
||||
## @var recalc_history (bool)
|
||||
# select whether the R-factors of the historic models are calculated again.
|
||||
#
|
||||
# this is useful if the historic data was calculated for a different cluster, different set of parameters,
|
||||
# or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
||||
# set this argument to False only if the calculation is a continuation of a previous one
|
||||
# without any changes to the code.
|
||||
# @arg @c 'pop_size' (int)
|
||||
# population size (number of particles) in the swarm or genetic optimization mode.
|
||||
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
|
||||
# you may want to override the default value in cases where the automatic choice is not appropriate.
|
||||
# the value can be set by the command line.
|
||||
# @arg @c 'seed_file' (string)
|
||||
# name of a file containing the results from previous optimization runs.
|
||||
# this can be used to resume a swarm or genetic optimization where it was interrupted before.
|
||||
# the seed file is a space-delimited, multi-column, text file,
|
||||
# e.g., the output file of a previous optimization.
|
||||
# by default, no seed is loaded.
|
||||
# @arg @c 'recalc_seed' (bool)
|
||||
# select whether the R-factors of the seed models are calculated again.
|
||||
# set this argument to False only if the calculation is a continuation of a previous one
|
||||
# without any changes to the code.
|
||||
|
||||
## @var data_dir
|
||||
# directory path to experimental data.
|
||||
@ -594,9 +490,17 @@ class Project(object):
|
||||
# if the location of the files may depend on the machine or user account,
|
||||
# the user may want to specify the data path on the command line.
|
||||
|
||||
## @var output_dir (string)
|
||||
# directory path for data files produced during the calculation, including intermediate files.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
|
||||
## @var output_file (string)
|
||||
# file name root for data files produced during the calculation, including intermediate files.
|
||||
#
|
||||
# the file name should include the path. the path must also be set in @ref output_dir.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
|
||||
## @var timedelta_limit (datetime.timedelta)
|
||||
# wall time after which no new calculations should be started.
|
||||
@ -604,11 +508,11 @@ class Project(object):
|
||||
# the actual wall time may be longer by the remaining time of running calculations.
|
||||
# running calculations will not be aborted.
|
||||
|
||||
## @var _combined_scan
|
||||
## @var combined_scan
|
||||
# combined raw data from scans.
|
||||
# updated by add_scan().
|
||||
|
||||
## @var _combined_modf
|
||||
## @var combined_modf
|
||||
# combined modulation function from scans.
|
||||
# updated by add_scan().
|
||||
|
||||
@ -618,30 +522,55 @@ class Project(object):
|
||||
#
|
||||
# files.categories_to_delete determines which files can be deleted.
|
||||
|
||||
## @var keep_best
|
||||
# number of best models for which result files should be kept.
|
||||
#
|
||||
# this attribute determines how many models are kept based on R-factor ranking at each node of the task tree
|
||||
# (up to keep_levels).
|
||||
|
||||
## @var keep_levels
|
||||
# numeric task level down to which R-factors are considered when model files are cleaned up.
|
||||
#
|
||||
# @arg 0 = model level: combined results only.
|
||||
# @arg 1 = scan level: scan nodes in addition to combined results (level 0).
|
||||
# @arg 2 = symmetry level: symmetry nodes in addition to level 1.
|
||||
# @arg 3 = emitter level: emitter nodes in addition to level 1.
|
||||
# @arg 4 = region level: region nodes in addition to level 1.
|
||||
|
||||
def __init__(self):
|
||||
self.mode = "single"
|
||||
self.code = "edac"
|
||||
self.features = {}
|
||||
self.cluster_format = mc.FMT_EDAC
|
||||
self.cluster_generator = LegacyClusterGenerator(self)
|
||||
self.cluster_generator = mc.LegacyClusterGenerator(self)
|
||||
self.scans = []
|
||||
self.symmetries = []
|
||||
self.pop_size = 0
|
||||
self.history_file = ""
|
||||
self.recalc_history = True
|
||||
self.optimizer_params = {
|
||||
'pop_size': 0,
|
||||
'seed_file': "",
|
||||
'seed_limit': 0,
|
||||
'recalc_seed': True,
|
||||
'table_file': ""
|
||||
}
|
||||
self.data_dir = ""
|
||||
self.output_dir = ""
|
||||
self.output_file = "pmsco_data"
|
||||
self.timedelta_limit = datetime.timedelta(days=1)
|
||||
self._combined_scan = None
|
||||
self._combined_modf = None
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
self.files = files.FileTracker()
|
||||
self.handler_classes = {}
|
||||
self.handler_classes['model'] = handlers.SingleModelHandler
|
||||
self.handler_classes['scan'] = handlers.ScanHandler
|
||||
self.handler_classes['symmetry'] = handlers.SymmetryHandler
|
||||
self.handler_classes['emitter'] = handlers.EmitterHandler
|
||||
self.handler_classes['region'] = handlers.SingleRegionHandler
|
||||
self.keep_levels = 1
|
||||
self.keep_best = 10
|
||||
self.handler_classes = {
|
||||
'model': handlers.SingleModelHandler,
|
||||
'scan': handlers.ScanHandler,
|
||||
'sym': handlers.SymmetryHandler,
|
||||
'emit': handlers.EmitterHandler,
|
||||
'region': handlers.SingleRegionHandler
|
||||
}
|
||||
self.calculator_class = None
|
||||
self._tasks_fields = []
|
||||
self._db = database.ResultsDatabase()
|
||||
|
||||
def create_domain(self):
|
||||
"""
|
||||
@ -676,8 +605,8 @@ class Project(object):
|
||||
@return: None
|
||||
"""
|
||||
self.scans = []
|
||||
self._combined_scan = None
|
||||
self._combined_modf = None
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
|
||||
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
|
||||
"""
|
||||
@ -695,7 +624,7 @@ class Project(object):
|
||||
* intensity vs theta and phi (hemisphere or hologram scan)
|
||||
|
||||
the method calculates the modulation function if @c is_modf is @c False.
|
||||
it also updates @c _combined_scan and @c _combined_modf which may be used as R-factor comparison targets.
|
||||
it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.
|
||||
|
||||
@param filename: (string) file name of the experimental data, possibly including a path.
|
||||
|
||||
@ -732,22 +661,26 @@ class Project(object):
|
||||
scan.modulation = None
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if self._combined_scan is not None:
|
||||
dtype = md.common_dtype((self._combined_scan, scan.raw_data))
|
||||
self._combined_scan = np.hstack((self._combined_scan, md.restructure_data(scan.raw_data, dtype)))
|
||||
if self.combined_scan is not None:
|
||||
dt = md.common_dtype((self.combined_scan, scan.raw_data))
|
||||
d1 = md.restructure_data(self.combined_scan, dt)
|
||||
d2 = md.restructure_data(scan.raw_data, dt)
|
||||
self.combined_scan = np.hstack((d1, d2))
|
||||
else:
|
||||
self._combined_scan = scan.raw_data.copy()
|
||||
self.combined_scan = scan.raw_data.copy()
|
||||
else:
|
||||
self._combined_scan = None
|
||||
self.combined_scan = None
|
||||
|
||||
if scan.modulation is not None:
|
||||
if self._combined_modf is not None:
|
||||
dtype = md.common_dtype((self._combined_modf, scan.modulation))
|
||||
self._combined_modf = np.hstack((self._combined_modf, md.restructure_data(scan.modulation, dtype)))
|
||||
if self.combined_modf is not None:
|
||||
dt = md.common_dtype((self.combined_modf, scan.modulation))
|
||||
d1 = md.restructure_data(self.combined_modf, dt)
|
||||
d2 = md.restructure_data(scan.modulation, dt)
|
||||
self.combined_modf = np.hstack((d1, d2))
|
||||
else:
|
||||
self._combined_modf = scan.modulation.copy()
|
||||
self.combined_modf = scan.modulation.copy()
|
||||
else:
|
||||
self._combined_modf = None
|
||||
self.combined_modf = None
|
||||
|
||||
return scan
|
||||
|
||||
@ -783,9 +716,16 @@ class Project(object):
|
||||
|
||||
def set_output(self, filename):
|
||||
"""
|
||||
set base name of output file
|
||||
set path and base name of output file.
|
||||
|
||||
path and name are copied to the output_file attribute.
|
||||
path is copied to the output_dir attribute.
|
||||
|
||||
if the path is missing, the destination is the current working directory.
|
||||
"""
|
||||
self.output_file = filename
|
||||
path, name = os.path.split(filename)
|
||||
self.output_dir = path
|
||||
|
||||
def set_timedelta_limit(self, timedelta):
|
||||
"""
|
||||
@ -797,12 +737,15 @@ class Project(object):
|
||||
|
||||
def combine_symmetries(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results of different symmetry into one result. calculate the modulation function.
|
||||
combine results of different symmetry into one result and calculate the modulation function.
|
||||
|
||||
the symmetry results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
by default, this method adds all symmetries with equal weight.
|
||||
weights can be defined in the model dictionary with keys 'wsym0', 'wsym1', etc.
|
||||
missing weights default to 1.
|
||||
note: to avoid correlated parameters, one symmetry must always have a fixed weight.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
||||
the method must write the results to the files indicated
|
||||
@ -817,19 +760,26 @@ class Project(object):
|
||||
|
||||
@raise IndexError if child_tasks is empty
|
||||
|
||||
@raise KeyError if a filename is missing
|
||||
@raise IOError if a filename is missing
|
||||
|
||||
@note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters.
|
||||
the model parameters are available as the @c model attribute of the calculation tasks.
|
||||
"""
|
||||
|
||||
result_data = None
|
||||
sum_weights = 0.
|
||||
for task in child_tasks:
|
||||
data = md.load_data(task.result_filename)
|
||||
if result_data is not None:
|
||||
result_data['i'] += data['i']
|
||||
else:
|
||||
result_data = data
|
||||
if result_data is None:
|
||||
result_data = data.copy()
|
||||
result_data['i'] = 0.
|
||||
try:
|
||||
weight = task.model['wsym{}'.format(task.id.sym)]
|
||||
except KeyError:
|
||||
weight = 1.
|
||||
result_data['i'] += weight * data['i']
|
||||
sum_weights += weight
|
||||
result_data['i'] /= sum_weights
|
||||
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
|
||||
@ -865,7 +815,7 @@ class Project(object):
|
||||
|
||||
@raise IndexError if child_tasks is empty
|
||||
|
||||
@raise KeyError if a filename is missing
|
||||
@raise IOError if a filename is missing
|
||||
|
||||
@note the weights of the emitters (in derived classes) can be part of the optimizable model parameters.
|
||||
the model parameters are available as the @c model attribute of the calculation tasks.
|
||||
@ -898,7 +848,7 @@ class Project(object):
|
||||
the datasets of the scans are appended.
|
||||
this is done for intensity and modulation data independently.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
||||
@param parent_task: (CalculationTask) parent task of the scan tasks.
|
||||
the method must write the results to the files indicated
|
||||
by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@ -910,14 +860,12 @@ class Project(object):
|
||||
@return: None
|
||||
|
||||
@raise IndexError if child_tasks is empty.
|
||||
|
||||
@raise KeyError if a filename is missing.
|
||||
"""
|
||||
|
||||
# intensity
|
||||
try:
|
||||
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
|
||||
except (KeyError, IOError):
|
||||
except IOError:
|
||||
parent_task.result_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
@ -928,7 +876,7 @@ class Project(object):
|
||||
# modulation
|
||||
try:
|
||||
stack1 = [md.load_data(task.modf_filename) for task in child_tasks]
|
||||
except (KeyError, IOError):
|
||||
except IOError:
|
||||
parent_task.modf_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
@ -936,6 +884,142 @@ class Project(object):
|
||||
result_modf = np.hstack(tuple(stack2))
|
||||
md.save_data(parent_task.modf_filename, result_modf)
|
||||
|
||||
def combine_regions(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results from different regions into one result, for intensity and modulation.
|
||||
|
||||
the scan results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
the datasets of the regions are appended and sorted in the standard order of the data module.
|
||||
if the resulting length differs from the corresponding experimental scan,
|
||||
an error is printed to the logger, but the calculation continues.
|
||||
|
||||
the modulation function is calculated by calling @ref calc_modulation.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the region tasks.
|
||||
the method writes the results to the file names
|
||||
given by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each region.
|
||||
the reads the source data from the files
|
||||
indicated by the @c result_filename attributes.
|
||||
the sequence is sorted by task ID, i.e., essentially, by region index.
|
||||
|
||||
@return: None
|
||||
|
||||
@raise IndexError if child_tasks is empty.
|
||||
"""
|
||||
# intensity
|
||||
try:
|
||||
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
|
||||
except IOError:
|
||||
parent_task.result_valid = False
|
||||
parent_task.result_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
result_data = np.hstack(tuple(stack2))
|
||||
md.sort_data(result_data)
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
|
||||
scan = self.scans[parent_task.id.scan]
|
||||
if result_data.shape[0] != scan.raw_data.shape[0]:
|
||||
logger.error(BMsg("scan length mismatch: combined result: {result}, experimental data: {expected}",
|
||||
result=result_data.shape[0], expected=scan.raw_data.shape[0]))
|
||||
|
||||
# modulation
|
||||
try:
|
||||
data = md.load_data(parent_task.result_filename)
|
||||
modf = self.calc_modulation(data, parent_task.model)
|
||||
except IOError:
|
||||
parent_task.modf_filename = ""
|
||||
else:
|
||||
md.save_data(parent_task.modf_filename, modf)
|
||||
|
||||
def setup(self, handlers):
|
||||
"""
|
||||
prepare for calculations.
|
||||
|
||||
this method is called in the master process before starting the task loop.
|
||||
at this point the task handlers have been created and set up.
|
||||
if the project needs to change settings of task handlers it can do so in this method.
|
||||
|
||||
this instance writes the header of the tasks.dat file
|
||||
that will receive sub-task evaluation results from the evaluate_result() method.
|
||||
|
||||
@param handlers: dictionary listing the initialized task handler instances.
|
||||
the dictionary keys are the attribute names of pmsco.dispatch.CalcID:
|
||||
'model', 'scan', 'sym', 'emit' and 'region'.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
fields = ["rfac"]
|
||||
fields.extend(dispatch.CalcID._fields)
|
||||
fields = ["_" + f for f in fields]
|
||||
dom = self.create_domain()
|
||||
model_fields = dom.start.keys()
|
||||
model_fields.sort(key=lambda name: name.lower())
|
||||
fields.extend(model_fields)
|
||||
self._tasks_fields = fields
|
||||
|
||||
with open(self.output_file + ".tasks.dat", "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(fields))
|
||||
outfile.write("\n")
|
||||
|
||||
# todo : fill in the descriptive fields, change to file-database
|
||||
self._db.connect(":memory:")
|
||||
project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
|
||||
job_id = self._db.register_job(project_id,
|
||||
"job-name",
|
||||
self.mode,
|
||||
socket.gethostname(),
|
||||
"git-hash",
|
||||
datetime.datetime.now(),
|
||||
"description")
|
||||
self._db.register_params(model_fields)
|
||||
self._db.create_models_view()
|
||||
|
||||
def evaluate_result(self, parent_task, child_tasks):
|
||||
"""
|
||||
evaluate the result of a calculation task.
|
||||
|
||||
this method is called from the add_result of the task handlers at each level.
|
||||
it gives the project a hook to check the progress of a model at any level of the task tree.
|
||||
|
||||
the method calculates the r-factor by calling the Project.calc_rfactor method.
|
||||
the result is written to the task.rfac field and to the .tasks.dat file.
|
||||
invalid and region-level results are skipped.
|
||||
|
||||
this method is called in the master process only.
|
||||
|
||||
@param parent_task: (CalculationTask) a calculation task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
|
||||
the sequence must be sorted by task ID.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if parent_task.result_valid and parent_task.id.region == -1:
|
||||
try:
|
||||
parent_task.rfac = self.calc_rfactor(parent_task, child_tasks)
|
||||
except ValueError:
|
||||
parent_task.result_valid = False
|
||||
logger.warning(BMsg("calculation {0} resulted in an undefined R-factor.", parent_task.id))
|
||||
else:
|
||||
values_dict = parent_task.id._asdict()
|
||||
values_dict = {"_" + k: v for k, v in values_dict.items()}
|
||||
values_dict.update(parent_task.model)
|
||||
values_dict['_rfac'] = parent_task.rfac
|
||||
values_list = [values_dict[field] for field in self._tasks_fields]
|
||||
with open(self.output_file + ".tasks.dat", "a") as outfile:
|
||||
outfile.write(" ".join(format(value) for value in values_list) + "\n")
|
||||
|
||||
self._db.insert_result(parent_task.id, values_dict)
|
||||
|
||||
return None
|
||||
|
||||
# noinspection PyUnusedLocal
|
||||
def calc_modulation(self, data, model):
|
||||
"""
|
||||
@ -965,31 +1049,246 @@ class Project(object):
|
||||
|
||||
return md.calc_modfunc_loess(data)
|
||||
|
||||
def calc_rfactor(self, task):
|
||||
def calc_rfactor(self, parent_task, child_tasks):
|
||||
"""
|
||||
calculate the R-factor of a task.
|
||||
calculate the r-factor of a task.
|
||||
|
||||
the method calculates the R-factor over the combined scans.
|
||||
the corresponding experimental data is taken from self._combined_modf.
|
||||
the r-factor is calculated on the experimental and simulated modulation functions.
|
||||
the algorithm differs for the model level and the lower task levels.
|
||||
at the model level, the calculation is delegated to Project.combine_rfactors.
|
||||
at all other levels, the r-factor is calculated by Project.rfactor,
|
||||
where the simulated is loaded from the file specified by parent_task.modf_filename
|
||||
and the experimental data from Project.scan.
|
||||
|
||||
this method is called by the model handler.
|
||||
this method is called by the task handlers.
|
||||
all child tasks belonging to the parent task must be complete.
|
||||
|
||||
by default, the R-factor is calculated by data.rfactor() over the combined scans.
|
||||
override this method in your project to use a different R-factor algorithm.
|
||||
to select or implement a specific R-factor algorithm,
|
||||
the project sub-class should override Project.rfactor.
|
||||
to combine scan r-factors, it should override or patch Project.combine_rfactors.
|
||||
|
||||
@param task: (CalculationTask) a model task.
|
||||
@version in earlier versions,
|
||||
projects had to override this method to implement their algorithm.
|
||||
this has lead to duplication of common code.
|
||||
the r-factor algorithm is now distributed over several methods,
|
||||
and the method signature has changed.
|
||||
new projects should override Project.rfactor and/or Project.combine_rfactors.
|
||||
|
||||
@return (int) calculated R-factor.
|
||||
@param parent_task: (CalculationTask) a calculation task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
|
||||
the sequence must be sorted by task ID.
|
||||
|
||||
@return (float) calculated R-factor.
|
||||
|
||||
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
task_data = md.load_data(task.modf_filename)
|
||||
result_r = md.rfactor(self._combined_modf, task_data)
|
||||
if parent_task.id.scan >= 0:
|
||||
task_data = md.load_data(parent_task.modf_filename)
|
||||
exp_data = self.scans[parent_task.id.scan].modulation
|
||||
result_r = self.rfactor(exp_data, task_data)
|
||||
else:
|
||||
result_r = self.combine_rfactors(parent_task, child_tasks)
|
||||
|
||||
return result_r
|
||||
|
||||
def rfactor(self, exp_data, theo_data):
|
||||
"""
|
||||
calculate the r-factor of simulated diffraction data.
|
||||
|
||||
in this class, the method calls the data.rfactor function to calculate the r-factor.
|
||||
override this method in your project to use a different R-factor algorithm.
|
||||
|
||||
the input arrays must have the same shape,
|
||||
and the coordinate columns must be identical (they are ignored, however).
|
||||
the array elements are compared element-by-element.
|
||||
terms having NaN intensity are ignored.
|
||||
|
||||
if the sigma column is present in experiment and non-zero,
|
||||
the R-factor terms are weighted.
|
||||
|
||||
@param exp_data: (numpy structured array)
|
||||
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
||||
if an @c s field is present and non-zero,
|
||||
the R-factor terms are weighted by 1/sigma**2.
|
||||
|
||||
@param theo_data: (numpy structured array)
|
||||
ETPI or ETPAI array containing the calculated modulation functions.
|
||||
|
||||
@return: (float) scalar R-factor
|
||||
|
||||
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
return md.rfactor(exp_data, theo_data)
|
||||
|
||||
def opt_rfactor(self, exp_data, theo_data):
|
||||
"""
|
||||
calculate the r-factor of simulated diffraction data, adjusting their amplitude.
|
||||
|
||||
this is an alternative r-factor calculation algorithm
|
||||
using the pmsco.data.optimize_rfactor() function.
|
||||
|
||||
to activate this method (replacing the default one), assign it to Project.rfactor
|
||||
in the overriding __init__ or setup method:
|
||||
@code{.py}
|
||||
self.rfactor = self.opt_rfactor
|
||||
@endcode
|
||||
|
||||
@param exp_data: (numpy structured array)
|
||||
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
||||
if an @c s field is present and non-zero,
|
||||
the R-factor terms are weighted by 1/sigma**2.
|
||||
|
||||
@param theo_data: (numpy structured array)
|
||||
ETPI or ETPAI array containing the calculated modulation functions.
|
||||
|
||||
@return: (float) scalar R-factor
|
||||
|
||||
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
return md.optimize_rfactor(exp_data, theo_data)
|
||||
|
||||
def combine_rfactors(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine r-factors of child tasks.
|
||||
|
||||
the r-factors are taken from the rfac attribute of the child_tasks.
|
||||
the result is an average of the child r-rfactors.
|
||||
|
||||
to produce a balanced result, every child dataset must contain a similar amount of information.
|
||||
if this is not the case, the child r-factors must be weighted.
|
||||
weighting is currently not implemented but may be introduced in a future version.
|
||||
|
||||
the method is intended to be used at the model level (children are scans).
|
||||
though it can technically be used at any level where child r-factors are available.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task for which the r-factor is calculated,
|
||||
i.e. a model task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) child tasks of parent_tasks
|
||||
that may be consulted for calculating the r-factor.
|
||||
|
||||
@return: (float) r-factor, NaN if parent task is invalid
|
||||
|
||||
@raise ValueError or IndexError if child_tasks is empty.
|
||||
"""
|
||||
if parent_task.result_valid:
|
||||
rsum = 0.
|
||||
for task in child_tasks:
|
||||
rsum += task.rfac
|
||||
return rsum / len(child_tasks)
|
||||
else:
|
||||
return float('nan')
|
||||
|
||||
def alt_combine_rfactors(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine r-factors of child tasks by explicit calculation on the combined result.
|
||||
|
||||
this is an alternative implementation of combine_rfactors.
|
||||
instead of using the r-factors from child tasks,
|
||||
it re-calculates the r-factor for the combined dataset.
|
||||
this method avoids the issue of weighting
|
||||
but can introduce bias if the amplitudes of the child datasets differ substantially.
|
||||
|
||||
the experimental dataset is loaded from the file specified by the parent task,
|
||||
the corresponding experimental data is taken from self.combined_modf.
|
||||
|
||||
to activate this method, assign it to combine_rfactors.
|
||||
in the overriding __init__ or setup method:
|
||||
@code{.py}
|
||||
self.combine_rfactors = self.alt_combine_rfactors
|
||||
@endcode
|
||||
|
||||
@param parent_task: (CalculationTask) parent task for which the r-factor is calculated,
|
||||
i.e. a model task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) child tasks of parent_tasks
|
||||
that may be consulted for calculating the r-factor.
|
||||
|
||||
@return: (float) r-factor, NaN if parent task is invalid
|
||||
"""
|
||||
if parent_task.result_valid:
|
||||
task_data = md.load_data(parent_task.modf_filename)
|
||||
exp_data = self.combined_modf
|
||||
return self.rfactor(exp_data, task_data)
|
||||
else:
|
||||
return float('nan')
|
||||
|
||||
def export_cluster(self, index, filename, cluster):
|
||||
"""
|
||||
export the cluster of a calculation task in XYZ format for diagnostics and reporting.
|
||||
|
||||
this method is called with the final cluster just before it is handed over to the calculator.
|
||||
it saves the atom coordinates in XYZ format for future reference (e.g. graphics).
|
||||
|
||||
the method creates two files:
|
||||
@arg a file with extension '.xyz' contains the whole cluster in XYZ format.
|
||||
@arg a file with extension '.emit.xyz' contains only emitter atoms in XYZ format.
|
||||
|
||||
the first part of the file name is formatted with the output name and the complete task identification.
|
||||
the file is registered with the file tracker in the 'cluster' category
|
||||
so that it will be deleted unless the cluster category is selected for keeping.
|
||||
|
||||
derived project class may override or extend this method
|
||||
to carry out further diagnostics or reporting on the cluster.
|
||||
|
||||
@param index: (CalcID) calculation index to which the cluster belongs.
|
||||
region may be -1 if only one cluster is exported for all regions
|
||||
(clusters do not depend on the scan region).
|
||||
emit may be -1 if the cluster is a master from which emitter-related child clusters are derived.
|
||||
|
||||
@param filename: (str) base file name for the output files.
|
||||
the filename should be formatted using pmsco.dispatch.CalculationTask.format_filename().
|
||||
extensions are appended by this method.
|
||||
|
||||
@param cluster: a pmsco.cluster.Cluster() object with all atom positions and emitters.
|
||||
|
||||
@return: dictionary listing the names of the created files with their category.
|
||||
the dictionary key is the file name,
|
||||
the value is the file category (cluster).
|
||||
"""
|
||||
_files = {}
|
||||
xyz_filename = filename + ".xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
xyz_filename = filename + ".emit.xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ, emitters_only=True)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
return _files
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
delete unwanted files at the end of a project.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.cleanup_files()
|
||||
self._db.disconnect()
|
||||
|
||||
def cleanup_files(self, keep=0):
|
||||
"""
|
||||
delete uninteresting files.
|
||||
|
||||
these are all files that
|
||||
belong to one of the self.files.categories_to_delete categories or
|
||||
do not belong to one of the "best" models.
|
||||
|
||||
"best" models are a number (self.keep_best) of models that gave the lowest R-factors
|
||||
at each task level from root to self.keep_levels.
|
||||
for example if `keep_best = 10` and `keep_levels = 1`
|
||||
the 10 best models at the top level, and the 10 best at the scan level are kept.
|
||||
this means that in total up to `n = 10 + 10 * n_scans` models may be kept,
|
||||
where n_scans is the number of scan files in the job.
|
||||
|
||||
@param keep: minimum number of best models to keep.
|
||||
0 (default): use the project parameter self.keep_best.
|
||||
|
||||
@return None
|
||||
"""
|
||||
self.files.delete_files()
|
||||
if 'rfac' in self.files.categories_to_delete:
|
||||
keep = max(keep, self.keep_best)
|
||||
keepers = self._db.query_best_task_models(self.keep_levels, keep)
|
||||
self.files.delete_models(keep=keepers)
|
||||
|
Reference in New Issue
Block a user