996 lines
40 KiB
Python
996 lines
40 KiB
Python
"""
|
|
@package pmsco.project
|
|
project-independent classes which store and handle model parameters.
|
|
|
|
the most important class defined here is Project.
|
|
each calculation project needs to derive its own project class from it.
|
|
the Domain and Params classes are typically used unchanged.
|
|
|
|
@note nomenclature: the term @e parameters has several meanings in the code and documentation.
|
|
the following distinctive terms are used in updated documentation sections.
|
|
ambiguous terms may still be present in older code sections.
|
|
@arg <em>calculation parameters</em> set of specific parameters passed as input to the calculation programs.
|
|
the amount and meaning of these parameters depend on the calculation code used.
|
|
typically, many of these parameters remain fixed, or change very rarely in the course of the study.
|
|
@arg <em>model parameters</em> concise set of independent physical parameters
|
|
that define the system in one calculation instance.
|
|
these parameters are varied systematically by the optimization process.
|
|
they are mapped to calculation parameters and a cluster by code derived from the Project class.
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2015 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
from __future__ import division
|
|
import copy
|
|
import datetime
|
|
import logging
|
|
import numpy as np
|
|
import collections
|
|
import data as md
|
|
import cluster as mc
|
|
import files
|
|
import handlers
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
ParamDomain = collections.namedtuple('ParamDomain', ['start', 'min', 'max', 'step'])
|
|
|
|
|
|
class Domain(object):
|
|
"""
|
|
Domain of model parameters.
|
|
|
|
Each member contains a dictionary of model parameter names and their values.
|
|
Parameter names can be defined almost freely by the project,
|
|
except that they should contain only alphanumeric and underscore characters.
|
|
furthermore, names starting with an underscore are reserved for the optimizers.
|
|
"""
|
|
|
|
## @var start (dict)
|
|
# dictionary of start values for each model parameter.
|
|
#
|
|
# the start value can be the initial guess for an optimization run,
|
|
# or the actual value for a single calculation.
|
|
#
|
|
# there must be one item for each model parameter,
|
|
# where the key is the name of the parameter, and the value its physical value.
|
|
|
|
## @var min (dict)
|
|
# dictionary of minimum values for each model parameter.
|
|
#
|
|
# the minimum defines the lower bound of the allowed interval for a model parameter.
|
|
#
|
|
# there must be one item for each model parameter,
|
|
# where the key is the name of the parameter, and the value its physical value.
|
|
|
|
## @var max (dict)
|
|
# dictionary of maximum values for each model parameter.
|
|
#
|
|
# the maximum defines the upper bound of the allowed interval for a model parameter.
|
|
#
|
|
# there must be one item for each model parameter,
|
|
# where the key is the name of the parameter, and the value its physical value.
|
|
|
|
## @var step (dict)
|
|
# dictionary of step sizes for each model parameter.
|
|
#
|
|
# depending on the optimization mode, the step is a guess of how fast values should vary,
|
|
# e.g. step size, gradient, velocity, ...
|
|
#
|
|
# there must be one item for each model parameter,
|
|
# where the key is the name of the parameter, and the value its physical value.
|
|
|
|
def __init__(self):
|
|
"""
|
|
initialize the domain object with empty dictionaries.
|
|
"""
|
|
self.start = {}
|
|
self.min = {}
|
|
self.max = {}
|
|
self.step = {}
|
|
|
|
def add_param(self, name, start, min, max, step):
|
|
"""
|
|
set the domain of one parameter with all necessary values at once.
|
|
|
|
the exact meaning of the arguments depends on the calculation mode.
|
|
|
|
@param name (string) name of the parameter (alphanumeric and underscore characters only).
|
|
it is recommended to use short but distinctive names.
|
|
|
|
@param start (float) start value.
|
|
|
|
@param min (float) lower bound of the parameter interval.
|
|
|
|
@param max (float) upper bound of the parameter interval.
|
|
|
|
@param step (float) step size.
|
|
"""
|
|
self.start[name] = start
|
|
self.min[name] = min
|
|
self.max[name] = max
|
|
self.step[name] = step
|
|
|
|
def get_param(self, name):
|
|
"""
|
|
get all values of a model parameter in a tuple.
|
|
|
|
@param name (string) name of the parameter.
|
|
|
|
@return named tuple ParamDomain(start, min, max, step) of the parameter.
|
|
|
|
@raise IndexError if the parameter is not defined.
|
|
"""
|
|
return ParamDomain(self.start[name], self.min[name], self.max[name], self.step[name])
|
|
|
|
|
|
class Params(object):
|
|
"""
|
|
calculation parameters for a single scattering calculation job.
|
|
|
|
this class holds all the calculation parameters that are passed via input file to the calculation program.
|
|
|
|
the class can hold parameters for both the MSC and EDAC codes.
|
|
some parameters are used by both codes, others are used just by one of them.
|
|
newer features such as multiple emitters, multiple symmetries, and others are supported in EDAC mode only.
|
|
MSC mode is currently not maintained.
|
|
|
|
objects of this class are created by the implementation of the create_params() method
|
|
of the actual project class.
|
|
"""
|
|
def __init__(self):
|
|
self.title = "MSC default parameters"
|
|
self.comment = "from msc_project.Params()"
|
|
self.cluster_file = ""
|
|
self.output_file = ""
|
|
self.scan_file = ""
|
|
# EDAC convention: 1s, 2p, 2p1/2, etc.
|
|
self.initial_state = "1s"
|
|
# MSC convention: H, V, L, R, U
|
|
self.polarization = "H"
|
|
self.angular_broadening = 0.0
|
|
self.z_surface = 0.0
|
|
self.inner_potential = 10.0
|
|
# the energy scale of EDAC is referenced to the vacuum level
|
|
# but data files are referenced to the Fermi level
|
|
# the msc_edac module adds the work function to the kinetic energy before it calls EDAC
|
|
self.work_function = 0.0
|
|
self.symmetry_range = 360.0
|
|
self.polar_incidence_angle = 60.0
|
|
self.azimuthal_incidence_angle = 0.0
|
|
self.experiment_temperature = 300.0
|
|
self.debye_temperature = 400.0
|
|
self.debye_wavevector = 1.0
|
|
# used by MSC only
|
|
self.spherical_order = 2
|
|
self.scattering_level = 5
|
|
self.fcut = 15.0
|
|
self.cut = 15.0
|
|
self.lattice_constant = 1.0
|
|
self.atom_types = 0
|
|
self.atomic_number = [1, 2, 3, 4]
|
|
self.phase_file = ["1.pha", "2.pha", "3.pha", "4.pha"]
|
|
self.msq_displacement = [0.1, 0.1, 0.1, 0.1]
|
|
self.planewave_attenuation = 1.0
|
|
self.vibration_model = "N"
|
|
self.substrate_atomic_mass = 1.0
|
|
self.rme_minus_value = 0.5
|
|
self.rme_minus_shift = 0.0
|
|
self.rme_plus_value = 0.5
|
|
self.rme_plus_shift = 0.0
|
|
# used by EDAC only
|
|
self.emitters = [(0.0, 0.0, 0.0, 0)]
|
|
self.lmax = 15
|
|
self.dmax = 5.0
|
|
self.orders = [20]
|
|
|
|
|
|
class Scan(object):
|
|
"""
|
|
class to describe the scanning scheme or store the experimental data set.
|
|
"""
|
|
|
|
## @var filename (string)
|
|
# file name from which a scan was loaded
|
|
|
|
## @var raw_data (numpy.ndarray)
|
|
# original scan data (ETPAIS array)
|
|
|
|
## @var dtype (dict)
|
|
# data type of self.raw_data.
|
|
#
|
|
# one of the data.DTYPE_Xxxx constants.
|
|
|
|
## @var modulation (numpy.ndarray)
|
|
# modulation function calculated from original scan (ETPAIS array)
|
|
|
|
## @var mode (list of characters)
|
|
# list of ETPAI column names which are scanned in self.raw_data.
|
|
#
|
|
# example: ['t','p']
|
|
|
|
## @var emitter (string)
|
|
# chemical symbol of emitter atom
|
|
#
|
|
# example: 'Cu'
|
|
|
|
## @var initial_state (string)
|
|
# nl term of initial state
|
|
#
|
|
# in the form expected by EDAC, for example: '1s'
|
|
|
|
## @var energies (numpy.ndarray)
|
|
# kinetic energy referenced to Fermi level.
|
|
#
|
|
# one-dimensional array.
|
|
|
|
## @var thetas (numpy.ndarray)
|
|
# polar angle referenced to normal emission
|
|
#
|
|
# one-dimensional array.
|
|
#
|
|
# note: in the case of a hemispherical scan, the values in this array will not be unique.
|
|
|
|
## @var phis (numpy.ndarray)
|
|
# azimuthal angle referenced to arbitrary origin
|
|
#
|
|
# one-dimensional array.
|
|
#
|
|
# note: in the case of a hemispherical scan, the values in this array will not be unique, and not monotonic.
|
|
|
|
## @var alphas (numpy.ndarray)
|
|
# polar angle referenced to normal emission
|
|
#
|
|
# one-dimensional array.
|
|
|
|
def __init__(self):
|
|
self.filename = ""
|
|
self.raw_data = None
|
|
self.dtype = None
|
|
self.modulation = None
|
|
self.mode = []
|
|
self.emitter = ""
|
|
self.initial_state = "1s"
|
|
self.energies = np.zeros((0))
|
|
self.thetas = np.zeros((0))
|
|
self.phis = np.zeros((0))
|
|
self.alphas = np.zeros((0))
|
|
|
|
def copy(self):
|
|
"""
|
|
create a copy of the scan.
|
|
|
|
@return: new independent scan object with the same attributes as the original one.
|
|
"""
|
|
return copy.deepcopy(self)
|
|
|
|
def set_scan(self, filename, emitter, initial_state):
|
|
"""
|
|
set file name of reference experiment and load it.
|
|
|
|
the extension must be one of msc_data.DATATYPES (case insensitive)
|
|
corresponding to the meaning of the columns in the file.
|
|
|
|
this method does not calculate the modulation function.
|
|
|
|
@attention EDAC can only calculate equidistant, rectangular scans.
|
|
this version introduces holo scans as an experimental feature.
|
|
for all other scan types, the scan file must exactly conform with a rectangular scan.
|
|
the following scans are currently supported:
|
|
|
|
* intensity vs energy at fixed theta, phi
|
|
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
|
|
* intensity vs theta, phi, or alpha
|
|
* holo scan (theta,phi)
|
|
|
|
@param filename: (string) file name of the experimental data, possibly including a path.
|
|
|
|
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
|
|
|
|
@param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
|
|
|
|
"""
|
|
self.filename = filename
|
|
self.emitter = emitter
|
|
self.initial_state = initial_state
|
|
|
|
if self.filename:
|
|
self.raw_data = md.load_data(self.filename)
|
|
self.dtype = self.raw_data.dtype
|
|
self.mode, positions = md.detect_scan_mode(self.raw_data)
|
|
|
|
if 'e' in self.mode:
|
|
self.energies = positions['e']
|
|
else:
|
|
try:
|
|
self.energies = np.asarray((self.raw_data['e'][0], ))
|
|
except ValueError:
|
|
logger.error("missing energy in scan file %s", self.filename)
|
|
raise
|
|
|
|
if 't' in self.mode:
|
|
self.thetas = positions['t']
|
|
else:
|
|
try:
|
|
self.thetas = np.asarray((self.raw_data['t'][0], ))
|
|
except ValueError:
|
|
logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
|
|
self.thetas = np.zeros((1))
|
|
|
|
if 'p' in self.mode:
|
|
self.phis = positions['p']
|
|
else:
|
|
try:
|
|
self.phis = np.asarray((self.raw_data['p'][0], ))
|
|
except ValueError:
|
|
logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
|
|
self.phis = np.zeros((1))
|
|
|
|
if 'a' in self.mode:
|
|
self.alphas = positions['a']
|
|
else:
|
|
try:
|
|
self.alphas = np.asarray((self.raw_data['a'][0], ))
|
|
except ValueError:
|
|
logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
|
|
self.alphas = np.zeros((1))
|
|
|
|
|
|
class ClusterGenerator(object):
|
|
"""
|
|
cluster generator class.
|
|
|
|
this class bundles the cluster methods in one place
|
|
so that it's easier to exchange them for different kinds of clusters.
|
|
|
|
the project must override at least the create_cluster method.
|
|
if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
|
|
"""
|
|
|
|
def __init__(self, project):
|
|
"""
|
|
initialize the cluster generator.
|
|
|
|
@param project: reference to the project object.
|
|
cluster generators may need to look up project parameters.
|
|
"""
|
|
self.project = project
|
|
|
|
def count_emitters(self, model, index):
|
|
"""
|
|
return the number of emitter configurations for a particular model.
|
|
|
|
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
|
|
by default, the method returns 1, which means that there is only one emitter configuration.
|
|
|
|
emitter configurations are mainly a way to distribute the calculations to multiple processes
|
|
based on emitters since the resulting diffraction patterns add up incoherently.
|
|
for this to work, the create_cluster() method must pay attention to the emitter index
|
|
and generate either a full cluster with all emitters (single process)
|
|
or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
|
|
whether all emitters are calculated in one or multiple processes is decided at run-time
|
|
based on the available resources.
|
|
|
|
note that this function returns the number of _configurations_ not _atoms_.
|
|
an emitter configuration (declared in a Cluster) may include more than one atom.
|
|
it is up to the project, what is included in a particular configuration.
|
|
|
|
to enable multiple emitter configurations, the derived project class must override this method
|
|
and return a number greater than 1.
|
|
|
|
@note in some cases it may be most efficient to call create_cluster and
|
|
return Cluster.get_emitter_count() of the generated cluster.
|
|
this is possible because the method is called with emitter index -1.
|
|
model and index can be passed unchanged to create_cluster.
|
|
|
|
@param model (dictionary) model parameters to be used in the calculation.
|
|
|
|
@param index (named tuple CalcID) calculation index.
|
|
the method should consider only the following attributes:
|
|
@arg @c scan scan index (index into Project.scans)
|
|
@arg @c sym symmetry index (index into Project.symmetries)
|
|
@arg @c emit emitter index is -1 if called by the emitter handler.
|
|
|
|
@return number of emitter configurations.
|
|
this implementation returns the default value of 1.
|
|
"""
|
|
return 1
|
|
|
|
def create_cluster(self, model, index):
|
|
"""
|
|
create a Cluster object given the model parameters and calculation index.
|
|
|
|
the generated cluster will typically depend on the model parameters.
|
|
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
|
|
|
|
the scan index can be used to generate a different cluster for different scan geometry,
|
|
e.g., if some atoms can be excluded due to a longer mean free path.
|
|
if this is not the case for the specific project, the scan index can be ignored.
|
|
|
|
the symmetry index may select a particular domain that has a different atomic arrangement.
|
|
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
|
|
to the particular domain/symmetry.
|
|
the method can ignore the symmetry index if the project defines only one symmetry,
|
|
or if the symmetry does not correspond to a different atomic structure.
|
|
|
|
the emitter index selects a particular emitter configuration.
|
|
depending on the value of the emitter index, the method must react differently:
|
|
|
|
1. if the value lower or equal to zero, return the full cluster and mark all inequivalent emitter atoms.
|
|
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
|
|
the full diffraction scan will be calculated in one calculation.
|
|
|
|
2. if the value is greater than zero, generate the cluster with the emitter configuration
|
|
selected by the emitter index.
|
|
the index is in the range between 1 and the return value of count_emitters().
|
|
the results of the individual emitter calculations are summed up in combine_emitters().
|
|
|
|
the code should ideally be written such that either case yields the same diffraction result.
|
|
if count_emitters() always returns 1 (default), the second case does not have to be implemented,
|
|
and the method can ignore the emitter index.
|
|
|
|
the method must ignore the model and energy index.
|
|
|
|
@param model (dictionary) model parameters to be used in the calculation.
|
|
|
|
@param index (named tuple CalcID) calculation index.
|
|
the method should consider only the following attributes:
|
|
@arg @c scan scan index (index into Project.scans)
|
|
@arg @c sym symmetry index (index into Project.symmetries)
|
|
@arg @c emit emitter index.
|
|
if lower or equal to zero, generate the full cluster and mark all emitters.
|
|
if greater than zero, the value is a 1-based index of the emitter configuration.
|
|
"""
|
|
return None
|
|
|
|
|
|
class LegacyClusterGenerator(ClusterGenerator):
|
|
"""
|
|
cluster generator class for projects that don't declare a generator.
|
|
|
|
in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
|
|
this class redirects generator calls to the project methods
|
|
providing compatibility to older project code.
|
|
"""
|
|
|
|
def __init__(self, project):
|
|
super(LegacyClusterGenerator, self).__init__(project)
|
|
|
|
def count_emitters(self, model, index):
|
|
"""
|
|
redirect the call to the corresponding project method if implemented.
|
|
"""
|
|
try:
|
|
return self.project.count_emitters(model, index)
|
|
except AttributeError:
|
|
return 1
|
|
|
|
def create_cluster(self, model, index):
|
|
"""
|
|
redirect the call to the corresponding project method.
|
|
"""
|
|
return self.project.create_cluster(model, index)
|
|
|
|
|
|
# noinspection PyMethodMayBeStatic
|
|
class Project(object):
|
|
"""
|
|
base class of a calculation project.
|
|
|
|
a 'calculation project' is a coded set of prescriptions
|
|
on how to get from a set of model parameters to simulated data
|
|
which correspond to provided experimental data.
|
|
the results include a measure of the quality of the simulated data compared to experimental data.
|
|
|
|
each calculation project must derive from this class.
|
|
it must implement the create_domain(), create_cluster(), and create_params() methods.
|
|
|
|
the other methods and attributes of this class
|
|
are for passing command line parameters to the calculation modules.
|
|
the attributes should be populated in the constructor of the derived class,
|
|
or (recommended) in the create_project() function of the module.
|
|
it is essential that the attributes are set correctly before calculation.
|
|
"""
|
|
|
|
## @var features (dictionary)
|
|
#
|
|
# calculation features and versions supported by the project.
|
|
#
|
|
# the dictionary contains key-value pairs where the key is the name of the feature and value is a version number.
|
|
# this field conditionally enables new software features that may break backward compatibility.
|
|
# derived projects should fill this field with the supported version
|
|
# upon creation (in their __init__ method or create_project() factory).
|
|
# version 0 (default) means that the feature is disabled.
|
|
#
|
|
# the following features can be enabled (list may be incomplete):
|
|
# as of this version, no optional features are defined.
|
|
#
|
|
# @note rather than introducing new features and, particularly, new versions that rely on this mechanism,
|
|
# developers of generic code should check whether backward compatibility could be achieved in a simpler way,
|
|
# e.g. by implementing addition methods whose default behaviour is the same as of the previous version.
|
|
# in some cases it may be better to refactor all current project code.
|
|
#
|
|
|
|
## @var scans (list of Scan objects)
|
|
# list of experimental or scan files for which calculations are to be run.
|
|
#
|
|
# the list must be populated by calling the add_scan() method.
|
|
# this should be done in the create_project() function, or through the command line arguments.
|
|
#
|
|
# the modulation function is calculated internally.
|
|
# if your scan files contain the modulation function (as opposed to intensity),
|
|
# you must add the files in the create_project() function.
|
|
# the command line does not support loading modulation functions.
|
|
#
|
|
# @c scans must be considered read-only. use project methods to change it.
|
|
|
|
## @var symmetries (list of arbitrary objects)
|
|
# list of symmetries for which calculations are to be run.
|
|
#
|
|
# it is up to the derived class what kind of objects are stored in the list.
|
|
# the recommended kind of objects are dictionaries which hold parameter values,
|
|
# similar to the model dictionaries.
|
|
#
|
|
# the list must be populated by calling the add_symmetry() method.
|
|
|
|
## @var cluster_generator (ClusterGenerator object)
|
|
# provides the cluster generator methods.
|
|
#
|
|
# a project must provide a cluster generator object that is derived from ClusterGenerator.
|
|
# at least the ClusterGenerator.create_cluster method must be implemented.
|
|
# if emitters should be run in parallel, the ClusterGenerator.count_emitters must be implemented as well.
|
|
#
|
|
# the initial value is a LegacyClusterGenerator object
|
|
# which routes cluster calls back to the project for compatibility with older project code.
|
|
|
|
## @var pop_size (int)
|
|
# population size (number of particles) in the particle swarm optimization.
|
|
#
|
|
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
|
|
# you may want to override the default value in cases where the automatic choice is not appropriate, e.g.:
|
|
# - the calculation of a model takes a long time compared to the available computing time.
|
|
# - the calculation of a model spawns many sub-tasks due to complex symmetry.
|
|
# - you want to increase the number of generations compared to the number of particles.
|
|
#
|
|
# the default value is 0.
|
|
#
|
|
# the value can be set by the command line.
|
|
|
|
## @var history_file (string)
|
|
# name of a file containing the results from previous optimization runs.
|
|
# this can be used to resume a swarm optimization where it was interrupted before.
|
|
#
|
|
# the history file is a space-delimited, multi-column, text file.
|
|
# output files of a previous optimization run can be used as is.
|
|
# there must be one column for each model parameter, and one column of R factors.
|
|
# the first row must contain the names of the model parameters.
|
|
# the name of th R factor column must be '_rfac'.
|
|
# additional columns may be included and are ignored.
|
|
#
|
|
# by default, no history is loaded.
|
|
|
|
## @var recalc_history (bool)
|
|
# select whether the R-factors of the historic models are calculated again.
|
|
#
|
|
# this is useful if the historic data was calculated for a different cluster, different set of parameters,
|
|
# or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
|
# set this argument to False only if the calculation is a continuation of a previous one
|
|
# without any changes to the code.
|
|
|
|
## @var data_dir
|
|
# directory path to experimental data.
|
|
#
|
|
# the project should load experimental data (scan files) from this path.
|
|
# this attribute receives the --data-dir argument from the command line
|
|
# if the project parses the common arguments (pmsco.set_common_args).
|
|
#
|
|
# it is up to the project to define where to load scan files from.
|
|
# if the location of the files may depend on the machine or user account,
|
|
# the user may want to specify the data path on the command line.
|
|
|
|
## @var output_file (string)
|
|
# file name root for data files produced during the calculation, including intermediate files.
|
|
#
|
|
|
|
## @var timedelta_limit (datetime.timedelta)
|
|
# wall time after which no new calculations should be started.
|
|
#
|
|
# the actual wall time may be longer by the remaining time of running calculations.
|
|
# running calculations will not be aborted.
|
|
|
|
## @var _combined_scan
|
|
# combined raw data from scans.
|
|
# updated by add_scan().
|
|
|
|
## @var _combined_modf
|
|
# combined modulation function from scans.
|
|
# updated by add_scan().
|
|
|
|
## @var files
|
|
# list of all generated data files with metadata.
|
|
# the list is used by model handlers to decide which files can be deleted at run time to save disk space.
|
|
#
|
|
# files.categories_to_delete determines which files can be deleted.
|
|
|
|
def __init__(self):
|
|
self.mode = "single"
|
|
self.code = "edac"
|
|
self.features = {}
|
|
self.cluster_format = mc.FMT_EDAC
|
|
self.cluster_generator = LegacyClusterGenerator(self)
|
|
self.scans = []
|
|
self.symmetries = []
|
|
self.pop_size = 0
|
|
self.history_file = ""
|
|
self.recalc_history = True
|
|
self.data_dir = ""
|
|
self.output_file = "pmsco_data"
|
|
self.timedelta_limit = datetime.timedelta(days=1)
|
|
self._combined_scan = None
|
|
self._combined_modf = None
|
|
self.files = files.FileTracker()
|
|
self.handler_classes = {}
|
|
self.handler_classes['model'] = handlers.SingleModelHandler
|
|
self.handler_classes['scan'] = handlers.ScanHandler
|
|
self.handler_classes['symmetry'] = handlers.SymmetryHandler
|
|
self.handler_classes['emitter'] = handlers.EmitterHandler
|
|
self.handler_classes['region'] = handlers.SingleRegionHandler
|
|
self.calculator_class = None
|
|
|
|
def create_domain(self):
|
|
"""
|
|
create a msc_project.Domain object which defines the allowed range for model parameters.
|
|
|
|
this method must be implemented by the actual project class.
|
|
the Domain object must declare all model parameters used in the project.
|
|
|
|
@return Domain object
|
|
"""
|
|
return None
|
|
|
|
def create_params(self, model, index):
|
|
"""
|
|
create a Params object given the model parameters and calculation index.
|
|
|
|
@param model (dictionary) model parameters to be used in the calculation.
|
|
|
|
@param index (named tuple CalcID) calculation index.
|
|
the method should consider only the following attributes:
|
|
@arg @c scan scan index (index into Project.scans)
|
|
@arg @c sym symmetry index (index into Project.symmetries)
|
|
"""
|
|
return None
|
|
|
|
def clear_scans(self):
|
|
"""
|
|
clear scans.
|
|
|
|
delete all scans in self.scans and empty the list.
|
|
|
|
@return: None
|
|
"""
|
|
self.scans = []
|
|
self._combined_scan = None
|
|
self._combined_modf = None
|
|
|
|
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
|
|
"""
|
|
add the file name of reference experiment and load it.
|
|
|
|
the extension must be one of msc_data.DATATYPES (case insensitive)
|
|
corresponding to the meaning of the columns in the file.
|
|
|
|
caution: EDAC can only calculate equidistant, rectangular scans.
|
|
the following scans are currently supported:
|
|
|
|
* intensity vs energy at fixed theta, phi
|
|
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
|
|
* intensity vs theta, phi, or alpha
|
|
* intensity vs theta and phi (hemisphere or hologram scan)
|
|
|
|
the method calculates the modulation function if @c is_modf is @c False.
|
|
it also updates @c _combined_scan and @c _combined_modf which may be used as R-factor comparison targets.
|
|
|
|
@param filename: (string) file name of the experimental data, possibly including a path.
|
|
|
|
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
|
|
|
|
@param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
|
|
|
|
@param is_modf: (bool) declares whether the file contains the modulation function (True),
|
|
or intensity (False, default). In the latter case, the modulation function is calculated internally.
|
|
|
|
@param modf_model: (dict) model parameters to be passed to the modulation function.
|
|
|
|
@return (Scan) the new scan object (which is also a member of self.scans).
|
|
|
|
@todo the accepted scanning schemes should be generalized.
|
|
"""
|
|
scan = Scan()
|
|
scan.set_scan(filename, emitter, initial_state)
|
|
self.scans.append(scan)
|
|
|
|
if modf_model is None:
|
|
modf_model = {}
|
|
|
|
if scan.raw_data is not None:
|
|
if is_modf:
|
|
scan.modulation = scan.raw_data
|
|
else:
|
|
try:
|
|
scan.modulation = self.calc_modulation(scan.raw_data, modf_model)
|
|
except ValueError:
|
|
logger.error("error calculating the modulation function of experimental data.")
|
|
scan.modulation = None
|
|
else:
|
|
scan.modulation = None
|
|
|
|
if scan.raw_data is not None:
|
|
if self._combined_scan is not None:
|
|
dtype = md.common_dtype((self._combined_scan, scan.raw_data))
|
|
self._combined_scan = np.hstack((self._combined_scan, md.restructure_data(scan.raw_data, dtype)))
|
|
else:
|
|
self._combined_scan = scan.raw_data.copy()
|
|
else:
|
|
self._combined_scan = None
|
|
|
|
if scan.modulation is not None:
|
|
if self._combined_modf is not None:
|
|
dtype = md.common_dtype((self._combined_modf, scan.modulation))
|
|
self._combined_modf = np.hstack((self._combined_modf, md.restructure_data(scan.modulation, dtype)))
|
|
else:
|
|
self._combined_modf = scan.modulation.copy()
|
|
else:
|
|
self._combined_modf = None
|
|
|
|
return scan
|
|
|
|
def clear_symmetries(self):
|
|
"""
|
|
clear symmetries.
|
|
|
|
delete all symmetries in self.symmetries and empty the list.
|
|
|
|
@return: None
|
|
"""
|
|
self.symmetries = []
|
|
|
|
def add_symmetry(self, symmetry):
|
|
"""
|
|
add a symmetry to the list of symmetries.
|
|
|
|
this class declares the list of symmetries.
|
|
it does not define what should be in the list of symmetries.
|
|
however, there must be an entry for each symmetry to be calculated.
|
|
if the list is empty, no calculation will be executed.
|
|
|
|
@attention initially, the symmetries list is empty.
|
|
your project needs to add at least one symmetry.
|
|
otherwise, no calculation will be executed.
|
|
|
|
@param symmetry: it is up to the derived project class to specify and interpret the data stored here.
|
|
it is recommended to store a dictionary with symmetry parameters similar to the model parameters.
|
|
|
|
@return: None
|
|
"""
|
|
self.symmetries.append(symmetry)
|
|
|
|
def set_output(self, filename):
|
|
"""
|
|
set base name of output file
|
|
"""
|
|
self.output_file = filename
|
|
|
|
def set_timedelta_limit(self, timedelta):
|
|
"""
|
|
set the walltime limit
|
|
|
|
timedelta (datetime.timedelta)
|
|
"""
|
|
self.timedelta_limit = timedelta
|
|
|
|
def combine_symmetries(self, parent_task, child_tasks):
|
|
"""
|
|
combine results of different symmetry into one result. calculate the modulation function.
|
|
|
|
the symmetry results are read from the file system using the indices defined by the child_tasks,
|
|
and the combined result is written to the file system with the index defined by parent_task.
|
|
|
|
by default, this method adds all symmetries with equal weight.
|
|
|
|
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
|
the method must write the results to the files indicated
|
|
by the @c result_filename and @c modf_filename attributes.
|
|
|
|
@param child_tasks: (sequence of CalculationTask) tasks which identify each symmetry.
|
|
the method must read the source data from the files
|
|
indicated by the @c result_filename attributes.
|
|
the sequence is sorted by task ID, i.e., essentially, by symmetry index.
|
|
|
|
@return: None
|
|
|
|
@raise IndexError if child_tasks is empty
|
|
|
|
@raise KeyError if a filename is missing
|
|
|
|
@note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters.
|
|
the model parameters are available as the @c model attribute of the calculation tasks.
|
|
"""
|
|
|
|
result_data = None
|
|
for task in child_tasks:
|
|
data = md.load_data(task.result_filename)
|
|
if result_data is not None:
|
|
result_data['i'] += data['i']
|
|
else:
|
|
result_data = data
|
|
|
|
md.save_data(parent_task.result_filename, result_data)
|
|
|
|
# todo : the handling of missing modulation functions may need some cleanup
|
|
if self.scans[parent_task.id.scan].modulation is not None:
|
|
result_modf = self.calc_modulation(result_data, parent_task.model)
|
|
md.save_data(parent_task.modf_filename, result_modf)
|
|
else:
|
|
parent_task.modf_filename = ""
|
|
|
|
def combine_emitters(self, parent_task, child_tasks):
|
|
"""
|
|
combine results of different emitters into one result. calculate the modulation function.
|
|
|
|
the emitter results are read from the file system using the indices defined by the child_tasks,
|
|
and the combined result is written to the file system with the index defined by parent_task.
|
|
|
|
by default, this method adds all emitters with equal weight.
|
|
|
|
sub-classes may override this method and implement expansion of equivalent emitters,
|
|
unequal weights, etc.
|
|
|
|
@param parent_task: (CalculationTask) parent task of the emitter tasks.
|
|
the method must write the results to the files indicated
|
|
by the @c result_filename and @c modf_filename attributes.
|
|
|
|
@param child_tasks: (sequence of CalculationTask) tasks which identify each emitter.
|
|
the method must read the source data from the files
|
|
indicated by the @c result_filename attributes.
|
|
the sequence is sorted by task ID, i.e., essentially, by the emitter index.
|
|
|
|
@return: None
|
|
|
|
@raise IndexError if child_tasks is empty
|
|
|
|
@raise KeyError if a filename is missing
|
|
|
|
@note the weights of the emitters (in derived classes) can be part of the optimizable model parameters.
|
|
the model parameters are available as the @c model attribute of the calculation tasks.
|
|
"""
|
|
|
|
result_data = None
|
|
for task in child_tasks:
|
|
data = md.load_data(task.result_filename)
|
|
if result_data is not None:
|
|
result_data['i'] += data['i']
|
|
else:
|
|
result_data = data
|
|
|
|
md.save_data(parent_task.result_filename, result_data)
|
|
|
|
# todo : the handling of missing modulation functions may need some cleanup
|
|
if self.scans[parent_task.id.scan].modulation is not None:
|
|
result_modf = self.calc_modulation(result_data, parent_task.model)
|
|
md.save_data(parent_task.modf_filename, result_modf)
|
|
else:
|
|
parent_task.modf_filename = ""
|
|
|
|
def combine_scans(self, parent_task, child_tasks):
|
|
"""
|
|
combine results of different scans into one result, for intensity and modulation.
|
|
|
|
the scan results are read from the file system using the indices defined by the child_tasks,
|
|
and the combined result is written to the file system with the index defined by parent_task.
|
|
|
|
the datasets of the scans are appended.
|
|
this is done for intensity and modulation data independently.
|
|
|
|
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
|
the method must write the results to the files indicated
|
|
by the @c result_filename and @c modf_filename attributes.
|
|
|
|
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
|
|
the method must read the source data from the files
|
|
indicated by the @c result_filename attributes.
|
|
the sequence is sorted by task ID, i.e., essentially, by scan index.
|
|
|
|
@return: None
|
|
|
|
@raise IndexError if child_tasks is empty.
|
|
|
|
@raise KeyError if a filename is missing.
|
|
"""
|
|
|
|
# intensity
|
|
try:
|
|
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
|
|
except (KeyError, IOError):
|
|
parent_task.result_filename = ""
|
|
else:
|
|
dtype = md.common_dtype(stack1)
|
|
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
|
result_data = np.hstack(tuple(stack2))
|
|
md.save_data(parent_task.result_filename, result_data)
|
|
|
|
# modulation
|
|
try:
|
|
stack1 = [md.load_data(task.modf_filename) for task in child_tasks]
|
|
except (KeyError, IOError):
|
|
parent_task.modf_filename = ""
|
|
else:
|
|
dtype = md.common_dtype(stack1)
|
|
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
|
result_modf = np.hstack(tuple(stack2))
|
|
md.save_data(parent_task.modf_filename, result_modf)
|
|
|
|
# noinspection PyUnusedLocal
|
|
def calc_modulation(self, data, model):
|
|
"""
|
|
calculate the project-dependent modulation function.
|
|
|
|
the modulation function of I(x) is (I(x) - S(x)) / S(x)
|
|
where S(x) is a smooth copy of I(x).
|
|
|
|
by default, the modulation function is calculated by data.calc_modfunc_loess().
|
|
override this method in your project to use a different modulation function.
|
|
|
|
@param data structured numpy.ndarray in EI, ETPI, or ETPAI format.
|
|
can contain a one- or multi-dimensional scan.
|
|
the scan coordinates must be on a rectangular or hemisperical grid.
|
|
for maximum compatibility, the array should be sorted,
|
|
though for the default calc_modfunc_loess() function this is not required.
|
|
|
|
if data contains a hemispherical scan, the phi dimension is ignored,
|
|
i.e. the modulation function is calcualted on a phi-average.
|
|
|
|
@param model: (dict) model parameters of the calculation task.
|
|
can be used to pass parameters from the project.
|
|
this argument is a dictionary of the model parameters.
|
|
|
|
@return copy of the data array with the modulation function in the 'i' column.
|
|
"""
|
|
|
|
return md.calc_modfunc_loess(data)
|
|
|
|
def calc_rfactor(self, task):
|
|
"""
|
|
calculate the R-factor of a task.
|
|
|
|
the method calculates the R-factor over the combined scans.
|
|
the corresponding experimental data is taken from self._combined_modf.
|
|
|
|
this method is called by the model handler.
|
|
|
|
by default, the R-factor is calculated by data.rfactor() over the combined scans.
|
|
override this method in your project to use a different R-factor algorithm.
|
|
|
|
@param task: (CalculationTask) a model task.
|
|
|
|
@return (int) calculated R-factor.
|
|
"""
|
|
task_data = md.load_data(task.modf_filename)
|
|
result_r = md.rfactor(self._combined_modf, task_data)
|
|
|
|
return result_r
|
|
|
|
def cleanup(self):
|
|
"""
|
|
delete unwanted files at the end of a project.
|
|
|
|
@return: None
|
|
"""
|
|
self.files.delete_files()
|