public distro 2.1.0

This commit is contained in:
2019-07-19 12:54:54 +02:00
parent acea809e4e
commit fbd2d4fa8c
40 changed files with 2813 additions and 345 deletions

View File

@ -33,12 +33,15 @@ from __future__ import print_function
import collections
import copy
import datetime
import git
import logging
import numpy as np
import os.path
import socket
import sys
from pmsco.calculators.calculator import InternalAtomicCalculator
from pmsco.calculators.edac import EdacCalculator
import pmsco.cluster as mc
from pmsco.compat import open
import pmsco.data as md
@ -177,32 +180,89 @@ class Params(object):
# @arg emission angle window (EDAC)
# @arg angular_broadening (MSC)
## @var binding_energy (float)
# initial state binding energy with respect to the Fermi level in eV
#
## @var initial_state (str)
# initial state
#
# 1s, 2p, 2p1/2, etc.
#
## @var phase_files (dict)
# dictionary of phase files.
# dictionary of phase or scattering matrix element files.
#
# the keys are atomic numbers, the values file names.
# if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
# whether the files contain phase shifts or matrix elements depends on the calculator.
# EDAC determines the kind of information from the first line in the file.
#
# if the dictionary is empty or the files don't exist,
# the scattering matrix is computed by the calculator (if supported).
#
# maps to:
# @arg scatterer (EDAC)
# @arg atomic_number, phase_file (MSC)
## @var phase_output_classes (int or iterable of int)
# atom classes for which to output phase files
#
# if the atomic scattering factors are calculated internally,
# EDAC can export them to scattering files.
#
# this parameter can be one of
# @arg None (default) no phase output,
# @arg integer number defining a range 0:N-1 of atom classes,
# @arg iterable (e.g., set or sequence) of atom classes to export.
#
# the problem is that EDAC expects the user to list each atom class to export,
# though it is not possible to know how many classes there will be
# or which atoms belong to which class before the calculation is actually done.
# the number of classes will be between the number of different elements and the number of atoms.
#
# thus, this parameter should normally be left at its default value
# and used only in specific situations that can be processed manually.
# if the parameter is non-default, EDAC will also produce a cluster output
# that includes a mapping between atomic coordinates and atom classes.
#
# @note the files generated belong to the category "output".
# you need to specify `--keep-files output` to prevent them from getting cleaned up.
## @var polarization (str)
# photon polarization
#
# 'H', 'V', 'L', 'R', 'U'
#
## @var rme_files (dict)
# dictionary of radial matrix element files.
#
# if the dictionary is empty or the files don't exist,
# the radial matrix defaults to the rme_xxx_xxx attributes.
#
# in EDAC, RME files or constants are considered only if @ref phase_files are specified.
#
## @var work function (float)
# work function in eV
#
# the energy scale of EDAC is referenced to the vacuum level
# but data files are referenced to the Fermi level.
# the @ref pmsco.calculators.edac module adds the work function to the kinetic energy before it calls EDAC.
#
def __init__(self):
self.title = "default parameters"
self.comment = "set by project.Params()"
self.cluster_file = ""
self.output_file = ""
self.scan_file = ""
# EDAC convention: 1s, 2p, 2p1/2, etc.
self.initial_state = "1s"
# MSC convention: H, V, L, R, U
self.binding_energy = 0.0
self.polarization = "H"
self.angular_resolution = 1.0
self.z_surface = 0.0
self.inner_potential = 10.0
# the energy scale of EDAC is referenced to the vacuum level
# but data files are referenced to the Fermi level
# the msc_edac module adds the work function to the kinetic energy before it calls EDAC
self.work_function = 0.0
self.symmetry_range = 360.0
self.polar_incidence_angle = 60.0
@ -211,6 +271,11 @@ class Params(object):
self.debye_temperature = 400.0
self.debye_wavevector = 1.0
self.phase_files = {}
self.rme_files = {}
self.rme_minus_value = 0.1
self.rme_minus_shift = 0.0
self.rme_plus_value = 1.0
self.rme_plus_shift = 0.0
# used by MSC only
self.spherical_order = 2
self.scattering_level = 5
@ -221,15 +286,23 @@ class Params(object):
self.planewave_attenuation = 1.0
self.vibration_model = "N"
self.substrate_atomic_mass = 1.0
self.rme_minus_value = 0.5
self.rme_minus_shift = 0.0
self.rme_plus_value = 0.5
self.rme_plus_shift = 0.0
# used by EDAC only
self.emitters = [(0.0, 0.0, 0.0, 0)]
self.lmax = 15
self.dmax = 5.0
self.orders = [20]
self.phase_output_classes = None
@property
def l_init(self):
"""
initial state l quantum number.
this is converted from the initial_state property.
@return: (int) 0..3
"""
return "spdf".index(self.initial_state[1])
class Scan(object):
@ -301,11 +374,45 @@ class Scan(object):
self.mode = []
self.emitter = ""
self.initial_state = "1s"
self.energies = np.zeros((0))
self.thetas = np.zeros((0))
self.phis = np.zeros((0))
self.alphas = np.zeros((0))
self.positions = {
'e': np.empty(0),
't': np.empty(0),
'p': np.empty(0),
'a': np.empty(0),
}
@property
def energies(self):
return self.positions['e']
@energies.setter
def energies(self, value):
self.positions['e'] = value
@property
def thetas(self):
return self.positions['t']
@thetas.setter
def thetas(self, value):
self.positions['t'] = value
@property
def phis(self):
return self.positions['p']
@phis.setter
def phis(self, value):
self.positions['p'] = value
@property
def alphas(self):
return self.positions['a']
@alphas.setter
def alphas(self, value):
self.positions['a'] = value
def copy(self):
"""
create a copy of the scan.
@ -314,9 +421,9 @@ class Scan(object):
"""
return copy.deepcopy(self)
def set_scan(self, filename, emitter, initial_state):
def import_scan_file(self, filename, emitter, initial_state):
"""
set file name of reference experiment and load it.
import the reference experiment.
the extension must be one of msc_data.DATATYPES (case insensitive)
corresponding to the meaning of the columns in the file.
@ -324,9 +431,8 @@ class Scan(object):
this method does not calculate the modulation function.
@attention EDAC can only calculate equidistant, rectangular scans.
this version introduces holo scans as an experimental feature.
for all other scan types, the scan file must exactly conform with a rectangular scan.
the following scans are currently supported:
holo scans are transparently mapped to rectangular scans by pmsco.
this method accepts the following scans:
* intensity vs energy at fixed theta, phi
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
@ -347,43 +453,120 @@ class Scan(object):
if self.filename:
self.raw_data = md.load_data(self.filename)
self.dtype = self.raw_data.dtype
self.mode, positions = md.detect_scan_mode(self.raw_data)
self.mode, self.positions = md.detect_scan_mode(self.raw_data)
if 'e' in self.mode:
self.energies = positions['e']
else:
if 'e' not in self.mode:
try:
self.energies = np.asarray((self.raw_data['e'][0], ))
except ValueError:
logger.error("missing energy in scan file %s", self.filename)
raise
if 't' in self.mode:
self.thetas = positions['t']
else:
if 't' not in self.mode:
try:
self.thetas = np.asarray((self.raw_data['t'][0], ))
except ValueError:
logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
self.thetas = np.zeros((1))
self.thetas = np.zeros(1)
if 'p' in self.mode:
self.phis = positions['p']
else:
if 'p' not in self.mode:
try:
self.phis = np.asarray((self.raw_data['p'][0], ))
except ValueError:
logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
self.phis = np.zeros((1))
self.phis = np.zeros(1)
if 'a' in self.mode:
self.alphas = positions['a']
else:
if 'a' not in self.mode:
try:
self.alphas = np.asarray((self.raw_data['a'][0], ))
except ValueError:
logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
self.alphas = np.zeros((1))
self.alphas = np.zeros(1)
def define_scan(self, positions, emitter, initial_state):
"""
define a cartesian (rectangular/grid) scan.
this method initializes the scan with a one- or two-dimensional cartesian scan
of the four possible scan dimensions.
the scan range is given as arguments, the intensity values are initialized as 1.
the file name and modulation functions are reset to empty and None, respectively.
the method can create the following scan schemes:
* intensity vs energy at fixed theta, phi
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
* intensity vs theta, phi, or alpha
* intensity vs theta and phi (rectangular holo scan)
@param positions: (dictionary of numpy arrays)
the dictionary must contain a one-dimensional array for each scan dimension 'e', 't', 'p' and 'a'.
these array must contain unique, equidistant positions.
constant dimensions must contain exactly one value.
missing angle dimensions default to 0,
a missing energy dimension results in a KeyError.
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
@param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
"""
self.filename = ""
self.emitter = emitter
self.initial_state = initial_state
self.mode = []
shape = 1
try:
self.energies = np.copy(positions['e'])
except KeyError:
logger.error("missing energy in define_scan arguments")
raise
else:
if self.energies.shape[0] > 1:
self.mode.append('e')
shape *= self.energies.shape[0]
try:
self.thetas = np.copy(positions['t'])
except KeyError:
logger.info("missing theta in define_scan arguments, defaulting to 0.0")
self.thetas = np.zeros(1)
else:
if self.thetas.shape[0] > 1:
self.mode.append('t')
shape *= self.thetas.shape[0]
try:
self.phis = np.copy(positions['p'])
except KeyError:
logger.info("missing phi in define_scan arguments, defaulting to 0.0")
self.phis = np.zeros(1)
else:
if self.phis.shape[0] > 1:
self.mode.append('p')
shape *= self.phis.shape[0]
try:
self.alphas = np.copy(positions['a'])
except KeyError:
logger.info("missing alpha in define_scan arguments, defaulting to 0.0")
self.alphas = np.zeros(1)
else:
if self.alphas.shape[0] > 1:
self.mode.append('a')
shape *= self.alphas.shape[0]
assert 0 < len(self.mode) <= 2, "unacceptable number of dimensions in define_scan"
assert not ('t' in self.mode and 'a' in self.mode), "unacceptable combination of dimensions in define_scan"
self.dtype = md.DTYPE_ETPAI
self.raw_data = np.zeros(shape, self.dtype)
dimensions = [self.positions[dim] for dim in ['e', 't', 'p', 'a']]
grid = np.meshgrid(*dimensions)
for i, dim in enumerate(['e', 't', 'p', 'a']):
self.raw_data[dim] = grid[i].reshape(-1)
self.raw_data['i'] = 1
# noinspection PyMethodMayBeStatic
@ -465,9 +648,8 @@ class Project(object):
#
# @arg @c 'pop_size' (int)
# population size (number of particles) in the swarm or genetic optimization mode.
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
# by default, the population size is set to the number of parallel processes or 4, whichever is greater.
# you may want to override the default value in cases where the automatic choice is not appropriate.
# the value can be set by the command line.
# @arg @c 'seed_file' (string)
# name of a file containing the results from previous optimization runs.
# this can be used to resume a swarm or genetic optimization where it was interrupted before.
@ -537,9 +719,27 @@ class Project(object):
# @arg 3 = emitter level: emitter nodes in addition to level 1.
# @arg 4 = region level: region nodes in addition to level 1.
## @var atomic_scattering_factory
# factory function to create an atomic scattering calculator
#
# this can also be the name of a class.
# the calculator must inherit from pmsco.calculators.calculator.AtomicCalculator.
# the name of atomic scattering calculator classes should end in AtomicCalculator.
## @var multiple_scattering_factory
# factory function to create a multiple scattering calculator
#
# this can also be the name of a class.
# the calculator must inherit from pmsco.calculators.calculator.Calculator
#
# example: pmsco.calculators.edac.EdacCalculator
#
def __init__(self):
self.mode = "single"
self.code = "edac"
self.job_name = ""
self.git_hash = ""
self.description = ""
self.features = {}
self.cluster_format = mc.FMT_EDAC
self.cluster_generator = mc.LegacyClusterGenerator(self)
@ -568,7 +768,8 @@ class Project(object):
'emit': handlers.EmitterHandler,
'region': handlers.SingleRegionHandler
}
self.calculator_class = None
self.atomic_scattering_factory = InternalAtomicCalculator
self.multiple_scattering_factory = EdacCalculator
self._tasks_fields = []
self._db = database.ResultsDatabase()
@ -608,7 +809,7 @@ class Project(object):
self.combined_scan = None
self.combined_modf = None
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None, positions=None):
"""
add the file name of reference experiment and load it.
@ -627,6 +828,15 @@ class Project(object):
it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.
@param filename: (string) file name of the experimental data, possibly including a path.
the file is not loaded when the optional positions argument is present,
but the filename may serve as basename for output files (e.g. modulation function).
@param positions: (optional, dictionary of numpy arrays) scan positions.
if specified, the file given by filename is _not_ loaded,
and the scan positions are initialized from this dictionary.
the dictionary keys are the possible scan dimensions: 'e', 't', 'p', 'a'.
the arrays are one-dimensional and contain unique, equidistant positions.
constant dimensions have shape 1. see @ref Scan.define_scan.
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
@ -638,11 +848,13 @@ class Project(object):
@param modf_model: (dict) model parameters to be passed to the modulation function.
@return (Scan) the new scan object (which is also a member of self.scans).
@todo the accepted scanning schemes should be generalized.
"""
scan = Scan()
scan.set_scan(filename, emitter, initial_state)
if positions is not None:
scan.define_scan(positions, emitter, initial_state)
scan.filename = filename
else:
scan.import_scan_file(filename, emitter, initial_state)
self.scans.append(scan)
if modf_model is None:
@ -735,6 +947,41 @@ class Project(object):
"""
self.timedelta_limit = timedelta
def log_project_args(self):
"""
send some common project attributes to the log.
the attributes are normally logged at WARNING level.
this method is called by the main pmsco module after creating the project and assigning command line arguments.
it may be overridden to add logs of attributes of the sub-class.
@return: None
"""
try:
logger.warning("atomic scattering: {0}".format(self.atomic_scattering_factory))
logger.warning("multiple scattering: {0}".format(self.multiple_scattering_factory))
logger.warning("optimization mode: {0}".format(self.mode))
for key in sorted(self.optimizer_params):
val = self.optimizer_params[key]
lev = logging.WARNING if val else logging.DEBUG
logger.log(lev, "optimizer_params['{k}']: {v}".format(k=key, v=val))
logger.warning("data directory: {0}".format(self.data_dir))
logger.warning("output file: {0}".format(self.output_file))
_files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
for idx, scan in enumerate(self.scans):
logger.warning(BMsg("scan {0}: {filename} ({emitter} {initial_state})", idx, **vars(scan)))
for idx, sym in enumerate(self.symmetries):
logger.warning(BMsg("symmetry {0}: {sym}", idx, sym=sym))
except AttributeError:
logger.warning("AttributeError in log_project_args")
def combine_symmetries(self, parent_task, child_tasks):
"""
combine results of different symmetry into one result and calculate the modulation function.
@ -937,6 +1184,23 @@ class Project(object):
else:
md.save_data(parent_task.modf_filename, modf)
def get_git_hash(self):
"""
get the git commit (hash) of the running code (HEAD)
the method looks for a git repository in the source tree of this module.
if successful, it returns the hash string of the HEAD commit.
@return: hexadecimal hash string.
empty string if the file is not in a git repository.
"""
try:
repo = git.Repo(__file__, search_parent_directories=True)
except git.exc.InvalidGitRepositoryError:
return ""
else:
return repo.head.commit.hexsha
def setup(self, handlers):
"""
prepare for calculations.
@ -954,11 +1218,13 @@ class Project(object):
@return: None
"""
self.git_hash = self.get_git_hash()
fields = ["rfac"]
fields.extend(dispatch.CalcID._fields)
fields.append("secs")
fields = ["_" + f for f in fields]
dom = self.create_domain()
model_fields = dom.start.keys()
model_fields = list(dom.start.keys())
model_fields.sort(key=lambda name: name.lower())
fields.extend(model_fields)
self._tasks_fields = fields
@ -968,16 +1234,16 @@ class Project(object):
outfile.write(" ".join(fields))
outfile.write("\n")
# todo : fill in the descriptive fields, change to file-database
# todo : change to file-database
self._db.connect(":memory:")
project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
job_id = self._db.register_job(project_id,
"job-name",
self.job_name,
self.mode,
socket.gethostname(),
"git-hash",
self.git_hash,
datetime.datetime.now(),
"description")
self.description)
self._db.register_params(model_fields)
self._db.create_models_view()
@ -1012,6 +1278,7 @@ class Project(object):
values_dict = {"_" + k: v for k, v in values_dict.items()}
values_dict.update(parent_task.model)
values_dict['_rfac'] = parent_task.rfac
values_dict['_secs'] = parent_task.time.total_seconds()
values_list = [values_dict[field] for field in self._tasks_fields]
with open(self.output_file + ".tasks.dat", "a") as outfile:
outfile.write(" ".join(format(value) for value in values_list) + "\n")
@ -1258,6 +1525,76 @@ class Project(object):
return _files
def before_atomic_scattering(self, task, par, clu):
"""
project hook before atomic scattering factors are calculated.
this method derives modified Params and Cluster objects for the atomic scattering calculation
from the original objects that will be used in the multiple scattering calculation.
in the basic version, the method does not change the objects
except that it returns None for the root task (reference cluster).
subclasses may override it to modify or replace the cluster.
@param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
if the model index is -1, the project can return the global reference cluster
(to calculate the fixed scattering factors that will be used for all models)
or None if no global scattering factors should be calculated.
do not modify this object!
@param par: @ref pmsco.project.Params object representing the preliminary
multiple scattering input parameters of the current task.
the method can make modifications to this object instance directly.
@param clu: @ref pmsco.cluster.Cluster object representing the preliminary
multiple scattering cluster of the current task.
the method can make modifications to this object instance directly.
@return: a tuple (par, clu) where par and clu are the input parameters and cluster
to be used for the calculation of atomic scattering factors.
these should either be the original function arguments,
or copies of the original arguments.
if atomic scattering factors should not be calculated, the return values should be None.
"""
if task.id.model >= 0:
return par, clu
else:
return None, None
def after_atomic_scattering(self, task, par, clu):
"""
project hook after atomic scattering factors are calculated.
this method cleans up the Params and Cluster objects from the atomic scattering calculation
so that they can be used in the multiple scattering calculation.
in the basic version, the method just passes the input parameters for model tasks
and returns None for the root task.
subclasses may override it and modify the cluster and/or input parameters
so that the desired atomic scattering factors are used.
@param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
if the model index is -1, the project should return the global reference cluster
(to calculate the fixed scattering factors that will be used for all models)
or None if no global scattering factors should be calculated.
@param par: @ref pmsco.project.Params object representing the preliminary
multiple scattering input parameters of the current task.
@param clu: @ref pmsco.cluster.Cluster object representing the preliminary
multiple scattering cluster of the current task.
do not modify this object, make a copy!
@return: a tuple (par, clu) where par and clu are the input parameters and cluster
to be used for the calculation of atomic scattering factors.
these should either be the original function arguments,
or copies of the original arguments.
"""
if task.id.model >= 0:
return par, clu
else:
return None, None
def cleanup(self):
"""
delete unwanted files at the end of a project.