public distro 2.1.0

2019-07-19 12:54:54 +02:00
parent acea809e4e
commit fbd2d4fa8c
40 changed files with 2813 additions and 345 deletions
--- a/pmsco/project.py
+++ b/pmsco/project.py
@ -33,12 +33,15 @@ from __future__ import print_function
 import collections
 import copy
 import datetime
+import git
 import logging
 import numpy as np
 import os.path
 import socket
 import sys

+from pmsco.calculators.calculator import InternalAtomicCalculator
+from pmsco.calculators.edac import EdacCalculator
 import pmsco.cluster as mc
 from pmsco.compat import open
 import pmsco.data as md
@ -177,32 +180,89 @@ class Params(object):
    # @arg emission angle window (EDAC)
    # @arg angular_broadening (MSC)

+    ## @var binding_energy (float)
+    # initial state binding energy with respect to the Fermi level in eV
+    #
+
+    ## @var initial_state (str)
+    # initial state
+    #
+    # 1s, 2p, 2p1/2, etc.
+    #
+
    ## @var phase_files (dict)
-    # dictionary of phase files.
+    # dictionary of phase or scattering matrix element files.
    #
    # the keys are atomic numbers, the values file names.
-    # if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
+    # whether the files contain phase shifts or matrix elements depends on the calculator.
+    # EDAC determines the kind of information from the first line in the file.
+    #
+    # if the dictionary is empty or the files don't exist,
+    # the scattering matrix is computed by the calculator (if supported).
    #
    # maps to:
    # @arg scatterer (EDAC)
    # @arg atomic_number, phase_file (MSC)

+    ## @var phase_output_classes (int or iterable of int)
+    # atom classes for which to output phase files
+    #
+    # if the atomic scattering factors are calculated internally,
+    # EDAC can export them to scattering files.
+    #
+    # this parameter can be one of
+    # @arg None (default) no phase output,
+    # @arg integer number defining a range 0:N-1 of atom classes,
+    # @arg iterable (e.g., set or sequence) of atom classes to export.
+    #
+    # the problem is that EDAC expects the user to list each atom class to export,
+    # though it is not possible to know how many classes there will be
+    # or which atoms belong to which class before the calculation is actually done.
+    # the number of classes will be between the number of different elements and the number of atoms.
+    #
+    # thus, this parameter should normally be left at its default value
+    # and used only in specific situations that can be processed manually.
+    # if the parameter is non-default, EDAC will also produce a cluster output
+    # that includes a mapping between atomic coordinates and atom classes.
+    #
+    # @note the files generated belong to the category "output".
+    # you need to specify `--keep-files output` to prevent them from getting cleaned up.
+
+    ## @var polarization (str)
+    # photon polarization
+    #
+    # 'H', 'V', 'L', 'R', 'U'
+    #
+
+    ## @var rme_files (dict)
+    # dictionary of radial matrix element files.
+    #
+    # if the dictionary is empty or the files don't exist,
+    # the radial matrix defaults to the rme_xxx_xxx attributes.
+    #
+    # in EDAC, RME files or constants are considered only if @ref phase_files are specified.
+    #
+
+    ## @var work function (float)
+    # work function in eV
+    #
+    # the energy scale of EDAC is referenced to the vacuum level
+    # but data files are referenced to the Fermi level.
+    # the @ref pmsco.calculators.edac module adds the work function to the kinetic energy before it calls EDAC.
+    #
+
    def __init__(self):
        self.title = "default parameters"
        self.comment = "set by project.Params()"
        self.cluster_file = ""
        self.output_file = ""
        self.scan_file = ""
-        # EDAC convention: 1s, 2p, 2p1/2, etc.
        self.initial_state = "1s"
-        # MSC convention: H, V, L, R, U
+        self.binding_energy = 0.0
        self.polarization = "H"
        self.angular_resolution = 1.0
        self.z_surface = 0.0
        self.inner_potential = 10.0
-        # the energy scale of EDAC is referenced to the vacuum level
-        # but data files are referenced to the Fermi level
-        # the msc_edac module adds the work function to the kinetic energy before it calls EDAC
        self.work_function = 0.0
        self.symmetry_range = 360.0
        self.polar_incidence_angle = 60.0
@ -211,6 +271,11 @@ class Params(object):
        self.debye_temperature = 400.0
        self.debye_wavevector = 1.0
        self.phase_files = {}
+        self.rme_files = {}
+        self.rme_minus_value = 0.1
+        self.rme_minus_shift = 0.0
+        self.rme_plus_value = 1.0
+        self.rme_plus_shift = 0.0
        # used by MSC only
        self.spherical_order = 2
        self.scattering_level = 5
@ -221,15 +286,23 @@ class Params(object):
        self.planewave_attenuation = 1.0
        self.vibration_model = "N"
        self.substrate_atomic_mass = 1.0
-        self.rme_minus_value = 0.5
-        self.rme_minus_shift = 0.0
-        self.rme_plus_value = 0.5
-        self.rme_plus_shift = 0.0
        # used by EDAC only
        self.emitters = [(0.0, 0.0, 0.0, 0)]
        self.lmax = 15
        self.dmax = 5.0
        self.orders = [20]
+        self.phase_output_classes = None
+
+    @property
+    def l_init(self):
+        """
+        initial state l quantum number.
+
+        this is converted from the initial_state property.
+
+        @return: (int) 0..3
+        """
+        return "spdf".index(self.initial_state[1])


 class Scan(object):
@ -301,11 +374,45 @@ class Scan(object):
        self.mode = []
        self.emitter = ""
        self.initial_state = "1s"
-        self.energies = np.zeros((0))
-        self.thetas = np.zeros((0))
-        self.phis = np.zeros((0))
-        self.alphas = np.zeros((0))
-        
+        self.positions = {
+            'e': np.empty(0),
+            't': np.empty(0),
+            'p': np.empty(0),
+            'a': np.empty(0),
+        }
+
+    @property
+    def energies(self):
+        return self.positions['e']
+
+    @energies.setter
+    def energies(self, value):
+        self.positions['e'] = value
+
+    @property
+    def thetas(self):
+        return self.positions['t']
+
+    @thetas.setter
+    def thetas(self, value):
+        self.positions['t'] = value
+
+    @property
+    def phis(self):
+        return self.positions['p']
+
+    @phis.setter
+    def phis(self, value):
+        self.positions['p'] = value
+
+    @property
+    def alphas(self):
+        return self.positions['a']
+
+    @alphas.setter
+    def alphas(self, value):
+        self.positions['a'] = value
+
    def copy(self):
        """
        create a copy of the scan.
@ -314,9 +421,9 @@ class Scan(object):
        """
        return copy.deepcopy(self)

-    def set_scan(self, filename, emitter, initial_state):
+    def import_scan_file(self, filename, emitter, initial_state):
        """
-        set file name of reference experiment and load it.
+        import the reference experiment.

        the extension must be one of msc_data.DATATYPES (case insensitive)
        corresponding to the meaning of the columns in the file.
@ -324,9 +431,8 @@ class Scan(object):
        this method does not calculate the modulation function.

        @attention EDAC can only calculate equidistant, rectangular scans.
-        this version introduces holo scans as an experimental feature.
-        for all other scan types, the scan file must exactly conform with a rectangular scan.
-        the following scans are currently supported:
+        holo scans are transparently mapped to rectangular scans by pmsco.
+        this method accepts the following scans:

        * intensity vs energy at fixed theta, phi
        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
@ -347,43 +453,120 @@ class Scan(object):
        if self.filename:
            self.raw_data = md.load_data(self.filename)
            self.dtype = self.raw_data.dtype
-            self.mode, positions = md.detect_scan_mode(self.raw_data)
+            self.mode, self.positions = md.detect_scan_mode(self.raw_data)

-            if 'e' in self.mode:
-                self.energies = positions['e']
-            else:
+            if 'e' not in self.mode:
                try:
                    self.energies = np.asarray((self.raw_data['e'][0], ))
                except ValueError:
                    logger.error("missing energy in scan file %s", self.filename)
                    raise

-            if 't' in self.mode:
-                self.thetas = positions['t']
-            else:
+            if 't' not in self.mode:
                try:
                    self.thetas = np.asarray((self.raw_data['t'][0], ))
                except ValueError:
                    logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
-                    self.thetas = np.zeros((1))
+                    self.thetas = np.zeros(1)

-            if 'p' in self.mode:
-                self.phis = positions['p']
-            else:
+            if 'p' not in self.mode:
                try:
                    self.phis = np.asarray((self.raw_data['p'][0], ))
                except ValueError:
                    logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
-                    self.phis = np.zeros((1))
+                    self.phis = np.zeros(1)

-            if 'a' in self.mode:
-                self.alphas = positions['a']
-            else:
+            if 'a' not in self.mode:
                try:
                    self.alphas = np.asarray((self.raw_data['a'][0], ))
                except ValueError:
                    logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
-                    self.alphas = np.zeros((1))
+                    self.alphas = np.zeros(1)
+
+    def define_scan(self, positions, emitter, initial_state):
+        """
+        define a cartesian (rectangular/grid) scan.
+
+        this method initializes the scan with a one- or two-dimensional cartesian scan
+        of the four possible scan dimensions.
+        the scan range is given as arguments, the intensity values are initialized as 1.
+        the file name and modulation functions are reset to empty and None, respectively.
+
+        the method can create the following scan schemes:
+
+        * intensity vs energy at fixed theta, phi
+        * intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
+        * intensity vs theta, phi, or alpha
+        * intensity vs theta and phi (rectangular holo scan)
+
+        @param positions: (dictionary of numpy arrays)
+            the dictionary must contain a one-dimensional array for each scan dimension 'e', 't', 'p' and 'a'.
+            these array must contain unique, equidistant positions.
+            constant dimensions must contain exactly one value.
+            missing angle dimensions default to 0,
+            a missing energy dimension results in a KeyError.
+
+        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
+
+        @param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
+
+        """
+        self.filename = ""
+        self.emitter = emitter
+        self.initial_state = initial_state
+        self.mode = []
+        shape = 1
+
+        try:
+            self.energies = np.copy(positions['e'])
+        except KeyError:
+            logger.error("missing energy in define_scan arguments")
+            raise
+        else:
+            if self.energies.shape[0] > 1:
+                self.mode.append('e')
+                shape *= self.energies.shape[0]
+
+        try:
+            self.thetas = np.copy(positions['t'])
+        except KeyError:
+            logger.info("missing theta in define_scan arguments, defaulting to 0.0")
+            self.thetas = np.zeros(1)
+        else:
+            if self.thetas.shape[0] > 1:
+                self.mode.append('t')
+                shape *= self.thetas.shape[0]
+
+        try:
+            self.phis = np.copy(positions['p'])
+        except KeyError:
+            logger.info("missing phi in define_scan arguments, defaulting to 0.0")
+            self.phis = np.zeros(1)
+        else:
+            if self.phis.shape[0] > 1:
+                self.mode.append('p')
+                shape *= self.phis.shape[0]
+
+        try:
+            self.alphas = np.copy(positions['a'])
+        except KeyError:
+            logger.info("missing alpha in define_scan arguments, defaulting to 0.0")
+            self.alphas = np.zeros(1)
+        else:
+            if self.alphas.shape[0] > 1:
+                self.mode.append('a')
+                shape *= self.alphas.shape[0]
+
+        assert 0 < len(self.mode) <= 2, "unacceptable number of dimensions in define_scan"
+        assert not ('t' in self.mode and 'a' in self.mode), "unacceptable combination of dimensions in define_scan"
+
+        self.dtype = md.DTYPE_ETPAI
+        self.raw_data = np.zeros(shape, self.dtype)
+        dimensions = [self.positions[dim] for dim in ['e', 't', 'p', 'a']]
+        grid = np.meshgrid(*dimensions)
+        for i, dim in enumerate(['e', 't', 'p', 'a']):
+            self.raw_data[dim] = grid[i].reshape(-1)
+        self.raw_data['i'] = 1


 # noinspection PyMethodMayBeStatic
@ -465,9 +648,8 @@ class Project(object):
    #
    # @arg @c 'pop_size' (int)
    #   population size (number of particles) in the swarm or genetic optimization mode.
-    #   by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
+    #   by default, the population size is set to the number of parallel processes or 4, whichever is greater.
    #   you may want to override the default value in cases where the automatic choice is not appropriate.
-    #   the value can be set by the command line.
    # @arg @c 'seed_file' (string)
    #   name of a file containing the results from previous optimization runs.
    #   this can be used to resume a swarm or genetic optimization where it was interrupted before.
@ -537,9 +719,27 @@ class Project(object):
    # @arg 3 = emitter level: emitter nodes in addition to level 1.
    # @arg 4 = region level: region nodes in addition to level 1.

+    ## @var atomic_scattering_factory
+    # factory function to create an atomic scattering calculator
+    #
+    # this can also be the name of a class.
+    # the calculator must inherit from pmsco.calculators.calculator.AtomicCalculator.
+    # the name of atomic scattering calculator classes should end in AtomicCalculator.
+
+    ## @var multiple_scattering_factory
+    # factory function to create a multiple scattering calculator
+    #
+    # this can also be the name of a class.
+    # the calculator must inherit from pmsco.calculators.calculator.Calculator
+    #
+    # example: pmsco.calculators.edac.EdacCalculator
+    #
+
    def __init__(self):
        self.mode = "single"
-        self.code = "edac"
+        self.job_name = ""
+        self.git_hash = ""
+        self.description = ""
        self.features = {}
        self.cluster_format = mc.FMT_EDAC
        self.cluster_generator = mc.LegacyClusterGenerator(self)
@ -568,7 +768,8 @@ class Project(object):
            'emit': handlers.EmitterHandler,
            'region': handlers.SingleRegionHandler
        }
-        self.calculator_class = None
+        self.atomic_scattering_factory = InternalAtomicCalculator
+        self.multiple_scattering_factory = EdacCalculator
        self._tasks_fields = []
        self._db = database.ResultsDatabase()

@ -608,7 +809,7 @@ class Project(object):
        self.combined_scan = None
        self.combined_modf = None

-    def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
+    def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None, positions=None):
        """
        add the file name of reference experiment and load it.
        
@ -627,6 +828,15 @@ class Project(object):
        it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.

        @param filename: (string) file name of the experimental data, possibly including a path.
+            the file is not loaded when the optional positions argument is present,
+            but the filename may serve as basename for output files (e.g. modulation function).
+
+        @param positions: (optional, dictionary of numpy arrays) scan positions.
+            if specified, the file given by filename is _not_ loaded,
+            and the scan positions are initialized from this dictionary.
+            the dictionary keys are the possible scan dimensions: 'e', 't', 'p', 'a'.
+            the arrays are one-dimensional and contain unique, equidistant positions.
+            constant dimensions have shape 1. see @ref Scan.define_scan.

        @param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".

@ -638,11 +848,13 @@ class Project(object):
        @param modf_model: (dict) model parameters to be passed to the modulation function.

        @return (Scan) the new scan object (which is also a member of self.scans).
-
-        @todo the accepted scanning schemes should be generalized.
        """
        scan = Scan()
-        scan.set_scan(filename, emitter, initial_state)
+        if positions is not None:
+            scan.define_scan(positions, emitter, initial_state)
+            scan.filename = filename
+        else:
+            scan.import_scan_file(filename, emitter, initial_state)
        self.scans.append(scan)

        if modf_model is None:
@ -735,6 +947,41 @@ class Project(object):
        """
        self.timedelta_limit = timedelta

+    def log_project_args(self):
+        """
+        send some common project attributes to the log.
+
+        the attributes are normally logged at WARNING level.
+
+        this method is called by the main pmsco module after creating the project and assigning command line arguments.
+        it may be overridden to add logs of attributes of the sub-class.
+
+        @return: None
+        """
+        try:
+            logger.warning("atomic scattering: {0}".format(self.atomic_scattering_factory))
+            logger.warning("multiple scattering: {0}".format(self.multiple_scattering_factory))
+            logger.warning("optimization mode: {0}".format(self.mode))
+
+            for key in sorted(self.optimizer_params):
+                val = self.optimizer_params[key]
+                lev = logging.WARNING if val else logging.DEBUG
+                logger.log(lev, "optimizer_params['{k}']: {v}".format(k=key, v=val))
+
+            logger.warning("data directory: {0}".format(self.data_dir))
+            logger.warning("output file: {0}".format(self.output_file))
+
+            _files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
+            logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
+
+            for idx, scan in enumerate(self.scans):
+                logger.warning(BMsg("scan {0}: {filename} ({emitter} {initial_state})", idx, **vars(scan)))
+            for idx, sym in enumerate(self.symmetries):
+                logger.warning(BMsg("symmetry {0}: {sym}", idx, sym=sym))
+
+        except AttributeError:
+            logger.warning("AttributeError in log_project_args")
+
    def combine_symmetries(self, parent_task, child_tasks):
        """
        combine results of different symmetry into one result and calculate the modulation function.
@ -937,6 +1184,23 @@ class Project(object):
        else:
            md.save_data(parent_task.modf_filename, modf)

+    def get_git_hash(self):
+        """
+        get the git commit (hash) of the running code (HEAD)
+
+        the method looks for a git repository in the source tree of this module.
+        if successful, it returns the hash string of the HEAD commit.
+
+        @return: hexadecimal hash string.
+            empty string if the file is not in a git repository.
+        """
+        try:
+            repo = git.Repo(__file__, search_parent_directories=True)
+        except git.exc.InvalidGitRepositoryError:
+            return ""
+        else:
+            return repo.head.commit.hexsha
+
    def setup(self, handlers):
        """
        prepare for calculations.
@ -954,11 +1218,13 @@ class Project(object):

        @return: None
        """
+        self.git_hash = self.get_git_hash()
        fields = ["rfac"]
        fields.extend(dispatch.CalcID._fields)
+        fields.append("secs")
        fields = ["_" + f for f in fields]
        dom = self.create_domain()
-        model_fields = dom.start.keys()
+        model_fields = list(dom.start.keys())
        model_fields.sort(key=lambda name: name.lower())
        fields.extend(model_fields)
        self._tasks_fields = fields
@ -968,16 +1234,16 @@ class Project(object):
            outfile.write(" ".join(fields))
            outfile.write("\n")

-        # todo : fill in the descriptive fields, change to file-database
+        # todo : change to file-database
        self._db.connect(":memory:")
        project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
        job_id = self._db.register_job(project_id,
-                                       "job-name",
+                                       self.job_name,
                                       self.mode,
                                       socket.gethostname(),
-                                       "git-hash",
+                                       self.git_hash,
                                       datetime.datetime.now(),
-                                       "description")
+                                       self.description)
        self._db.register_params(model_fields)
        self._db.create_models_view()

@ -1012,6 +1278,7 @@ class Project(object):
                values_dict = {"_" + k: v for k, v in values_dict.items()}
                values_dict.update(parent_task.model)
                values_dict['_rfac'] = parent_task.rfac
+                values_dict['_secs'] = parent_task.time.total_seconds()
                values_list = [values_dict[field] for field in self._tasks_fields]
                with open(self.output_file + ".tasks.dat", "a") as outfile:
                    outfile.write(" ".join(format(value) for value in values_list) + "\n")
@ -1258,6 +1525,76 @@ class Project(object):

        return _files

+    def before_atomic_scattering(self, task, par, clu):
+        """
+        project hook before atomic scattering factors are calculated.
+
+        this method derives modified Params and Cluster objects for the atomic scattering calculation
+        from the original objects that will be used in the multiple scattering calculation.
+
+        in the basic version, the method does not change the objects
+        except that it returns None for the root task (reference cluster).
+        subclasses may override it to modify or replace the cluster.
+
+        @param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
+            if the model index is -1, the project can return the global reference cluster
+            (to calculate the fixed scattering factors that will be used for all models)
+            or None if no global scattering factors should be calculated.
+            do not modify this object!
+
+        @param par: @ref pmsco.project.Params object representing the preliminary
+            multiple scattering input parameters of the current task.
+            the method can make modifications to this object instance directly.
+
+        @param clu: @ref pmsco.cluster.Cluster object representing the preliminary
+            multiple scattering cluster of the current task.
+            the method can make modifications to this object instance directly.
+
+        @return: a tuple (par, clu) where par and clu are the input parameters and cluster
+            to be used for the calculation of atomic scattering factors.
+            these should either be the original function arguments,
+            or copies of the original arguments.
+            if atomic scattering factors should not be calculated, the return values should be None.
+        """
+        if task.id.model >= 0:
+            return par, clu
+        else:
+            return None, None
+
+    def after_atomic_scattering(self, task, par, clu):
+        """
+        project hook after atomic scattering factors are calculated.
+
+        this method cleans up the Params and Cluster objects from the atomic scattering calculation
+        so that they can be used in the multiple scattering calculation.
+
+        in the basic version, the method just passes the input parameters for model tasks
+        and returns None for the root task.
+        subclasses may override it and modify the cluster and/or input parameters
+        so that the desired atomic scattering factors are used.
+
+        @param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
+            if the model index is -1, the project should return the global reference cluster
+            (to calculate the fixed scattering factors that will be used for all models)
+            or None if no global scattering factors should be calculated.
+
+        @param par: @ref pmsco.project.Params object representing the preliminary
+            multiple scattering input parameters of the current task.
+
+        @param clu: @ref pmsco.cluster.Cluster object representing the preliminary
+            multiple scattering cluster of the current task.
+            do not modify this object, make a copy!
+
+        @return: a tuple (par, clu) where par and clu are the input parameters and cluster
+            to be used for the calculation of atomic scattering factors.
+            these should either be the original function arguments,
+            or copies of the original arguments.
+        """
+        if task.id.model >= 0:
+            return par, clu
+        else:
+            return None, None
+
    def cleanup(self):
        """
        delete unwanted files at the end of a project.