update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100
tagged rev-master-2.0.0
This commit is contained in:
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions

View File

@ -8,10 +8,17 @@ python pmsco [pmsco-arguments]
@endverbatim
"""
import pmsco
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os.path
file_dir = os.path.dirname(__file__) or '.'
root_dir = os.path.join(file_dir, '..')
root_dir = os.path.abspath(root_dir)
sys.path[0] = root_dir
if __name__ == '__main__':
args, unknown_args = pmsco.parse_cli()
pmsco.main_pmsco(args, unknown_args)
sys.exit(0)
import pmsco.pmsco
pmsco.pmsco.main()

View File

View File

@ -1,5 +1,5 @@
"""
@package pmsco.calculator
@package pmsco.calculators.calculator
abstract scattering program interface.
this module declares the basic interface to scattering programs.
@ -11,17 +11,21 @@ TestCalcInterface is provided for testing the PMSCO code quickly without calling
@author Matthias Muntwiler
@copyright (c) 2015 by Paul Scherrer Institut @n
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import numpy as np
import data as md
import cluster as mc
import pmsco.data as md
__author__ = 'matthias muntwiler'
@ -38,57 +42,24 @@ class Calculator(object):
or <code>output_file + '.etpai'</code> depending on scan mode.
all other intermediate files are deleted unless keep_temp_files is True.
@param params: a msco_project.Params() object with all necessary values except cluster and output files set.
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
@param cluster: a msco_cluster.Cluster() object with all atom positions set.
@param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
@param scan: a msco_project.Scan() object describing the experimental scanning scheme.
@param scan: a pmsco.project.Scan() object describing the experimental scanning scheme.
@param output_file: base name for all intermediate and output files
@return: result_file, files_cats
@arg result_file is the name of the main ETPI or ETPAI result file to be further processed.
@arg files_cats is a dictionary that lists the names of all created data files with their category.
@return: (str, dict) result_file, and dictionary of created files {filename: category}
@return: (str, dict) result_file, and dictionary of created files.
@arg the first element is the name of the main ETPI or ETPAI result file to be further processed.
@arg the second element is a dictionary that lists the names of all created data files with their category.
the dictionary key is the file name,
the value is the file category (cluster, phase, etc.).
"""
return None, None
def check_cluster(self, cluster, output_file):
"""
export the cluster in XYZ format for reference.
along with the complete cluster, the method also saves cuts in the xz (extension .y.xyz) and yz (.x.xyz) plane.
@param cluster: a pmsco.cluster.Cluster() object with all atom positions set.
@param output_file: base name for all intermediate and output files
@return: dictionary listing the names of the created files with their category.
the dictionary key is the file name,
the value is the file category (cluster).
@warning experimental: this method may be moved elsewhere in a future version.
"""
xyz_filename = output_file + ".xyz"
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
files = {xyz_filename: 'cluster'}
clucut = mc.Cluster()
clucut.copy_from(cluster)
clucut.trim_slab("x", 0.0, 0.1)
xyz_filename = output_file + ".x.xyz"
clucut.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
files[xyz_filename] = 'cluster'
clucut.copy_from(cluster)
clucut.trim_slab("y", 0.0, 0.1)
xyz_filename = output_file + ".y.xyz"
clucut.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
files[xyz_filename] = 'cluster'
return files
class TestCalculator(Calculator):
"""
@ -127,5 +98,5 @@ class TestCalculator(Calculator):
md.save_data(etpi_filename, result_etpi)
files = {clu_filename: 'cluster', etpi_filename: 'energy'}
files = {clu_filename: 'cluster', etpi_filename: 'region'}
return etpi_filename, files

View File

@ -1,25 +1,30 @@
"""
@package pmsco.edac_calculator
@package pmsco.calculators.edac
Garcia de Abajo EDAC program interface.
@author Matthias Muntwiler
@copyright (c) 2015 by Paul Scherrer Institut @n
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
import os
from __future__ import print_function
import logging
import math
import numpy as np
import calculator
import data as md
import cluster as mc
import edac.edac as edac
import os
import pmsco.calculators.calculator as calculator
from pmsco.compat import open
import pmsco.data as md
import pmsco.cluster as mc
import pmsco.edac.edac as edac
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -44,18 +49,19 @@ class EdacCalculator(calculator.Calculator):
if alpha is defined, theta is implicitly set to normal emission! (to be generalized)
TODO: some parameters are still hard-coded.
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
@param scan: a pmsco.project.Scan() object describing the experimental scanning scheme.
@param filepath: (str) name and path of the file to be created.
"""
with open(filepath, "w") as f:
f.write("verbose off\n")
f.write("cluster input %s\n" % (params.cluster_file))
f.write("emitters %u l(A)\n" % (len(params.emitters)))
f.write("cluster input {0}\n".format(params.cluster_file))
f.write("emitters {0:d} l(A)\n".format(len(params.emitters)))
for em in params.emitters:
f.write("%g %g %g %u\n" % em)
#for iat in range(params.atom_types):
#pf = params.phase_file[iat]
#pf = pf.replace(".pha", ".edac.pha")
#f.write("scatterer %u %s\n" % (params.atomic_number[iat], pf))
f.write("{0:f} {1:f} {2:f} {3:d}\n".format(*em))
en = scan.energies + params.work_function
en_min = en.min()
@ -106,7 +112,7 @@ class EdacCalculator(calculator.Calculator):
assert th_num < th.shape[0] * 10, \
"linearization of theta scan causes excessive oversampling {0}/{1}".format(th_num, th.shape[0])
f.write("beta {0}\n".format(params.polar_incidence_angle, params.azimuthal_incidence_angle))
f.write("beta {0}\n".format(params.polar_incidence_angle))
f.write("incidence {0} {1}\n".format(params.polar_incidence_angle, params.azimuthal_incidence_angle))
f.write("emission angle theta {th0:f} {th1:f} {nth:d}\n".format(th0=th_min, th1=th_max, nth=th_num))
@ -136,35 +142,49 @@ class EdacCalculator(calculator.Calculator):
f.write("initial state {0}\n".format(params.initial_state))
polarizations = {'H': 'LPx', 'V': 'LPy', 'L': 'LCP', 'R': 'RCP'}
f.write("polarization {0}\n".format(polarizations[params.polarization]))
f.write("muffin-tin\n")
f.write("V0 E(eV) {0}\n".format(params.inner_potential))
f.write("cluster surface l(A) {0}\n".format(params.z_surface))
scatterers = ["scatterer {at} {fi}\n".format(at=at, fi=fi)
for (at, fi) in params.phase_files.items()
if os.path.isfile(fi)]
if scatterers:
for scat in scatterers:
f.write(scat)
else:
f.write("muffin-tin\n")
f.write("V0 E(eV) {0:f}\n".format(params.inner_potential))
f.write("cluster surface l(A) {0:f}\n".format(params.z_surface))
f.write("imfp SD-UC\n")
f.write("temperature %g %g\n" % (params.experiment_temperature, params.debye_temperature))
f.write("temperature {0:f} {1:f}\n".format(params.experiment_temperature, params.debye_temperature))
f.write("iteration recursion\n")
f.write("dmax l(A) %g\n" % (params.dmax))
f.write("lmax %u\n" % (params.lmax))
f.write("orders %u " % (len(params.orders)))
for order in params.orders:
f.write("%u " % (order))
f.write("\n")
f.write("emission angle window 1\n")
f.write("scan pd %s\n" % (params.output_file))
f.write("dmax l(A) {0:f}\n".format(params.dmax))
f.write("lmax {0:d}\n".format(params.lmax))
f.write("orders {0:d} ".format(len(params.orders)))
f.write(" ".join(format(order, "d") for order in params.orders) + "\n")
f.write("emission angle window {0:F}\n".format(params.angular_resolution / 2.0))
# f.write("cluster output l(A) out.clu")
# problems:
# - muffin-tin relabels atoms
# - there can be multiple atom types for the same chemical element
# - we have to compare coordinates to find the mapping between input and output cluster
# f.write("scan scatterer i phase-shifts i.pha")
# f.write("scan scatterer i potential i.pot")
f.write("scan pd {0}\n".format(params.output_file))
f.write("end\n")
def run(self, params, cluster, scan, output_file):
"""
run EDAC with the given parameters and cluster.
@param params: a msc_param.Params() object with all necessary values except cluster and output files set.
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
@param cluster: a msc_cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
@param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
@param scan: a msco_project.Scan() object describing the experimental scanning scheme.
@param scan: a pmsco.project.Scan() object describing the experimental scanning scheme.
@param output_file: base name for all intermediate and output files
@return: result_file, files_cats
@return: (str, dict) result_file, and dictionary of created files {filename: category}
"""
# set up scan
@ -213,8 +233,14 @@ class EdacCalculator(calculator.Calculator):
pass
result_etpi = md.interpolate_hemi_scan(result_etpi, hemi_tpi)
if result_etpi.shape[0] != scan.raw_data.shape[0]:
logger.error("scan length mismatch: EDAC result: %u, scan data: %u", result_etpi.shape[0], scan.raw_data.shape[0])
if params.fixed_cluster:
expected_shape = max(scan.energies.shape[0], 1) * max(scan.alphas.shape[0], 1)
else:
expected_shape = max(scan.energies.shape[0], 1) * max(scan.phis.shape[0], scan.thetas.shape[0], 1)
if result_etpi.shape[0] != expected_shape:
logger.warning(BMsg("possible scan length mismatch: EDAC result: {result}, expected: {expected}",
result=result_etpi.shape[0], expected=expected_shape))
logger.debug("save result to file %s", etpi_filename)
md.save_data(etpi_filename, result_etpi)

View File

@ -13,9 +13,12 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
http://www.apache.org/licenses/LICENSE-2.0
"""
import calculator
import data as md
import msc.msc as msc
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pmsco.calculators.calculator as calculator
import pmsco.data as md
import pmsco.msc.msc as msc
import logging
logger = logging.getLogger(__name__)
@ -32,7 +35,7 @@ class MscCalculator(calculator.Calculator):
f.write(" %s\n" % (params.polarization) )
f.write(" %4u\n" % (params.scattering_level) )
f.write(" %7.2f%7.2f\n" % (params.fcut, params.cut) )
f.write(" %12.6f\n" % (params.angular_broadening) )
f.write(" %12.6f\n" % (params.angular_resolution) )
f.write(" %12.6f\n" % (params.lattice_constant) )
f.write(" %12.6f\n" % (params.z_surface) )
f.write(" %4u\n" % (params.atom_types) )

View File

@ -14,12 +14,17 @@ pip install --user periodictable
@author Matthias Muntwiler
@copyright (c) 2015 by Paul Scherrer Institut
@copyright (c) 2015-18 by Paul Scherrer Institut
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import periodictable as pt
import sys
## default file format identifier
FMT_DEFAULT = 0
@ -30,33 +35,40 @@ FMT_EDAC = 2
## XYZ file format identifier
FMT_XYZ = 3
# python version dependent type of chemical symbol
if sys.version_info[0] >= 3:
_SYMBOL_TYPE = 'U2'
else:
_SYMBOL_TYPE = 'S2'
## numpy.array datatype of Cluster.data array
DTYPE_CLUSTER_INTERNAL = [('i','i4'), ('t','i4'), ('s','a2'), ('x','f4'), ('y','f4'), ('z','f4'), ('e','u1')]
DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
('e', 'u1')]
## file format of internal Cluster.data array
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
## field (column) names of internal Cluster.data array
FIELDS_CLUSTER_INTERNAL = ['i','t','s','x','y','z','e']
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'x', 'y', 'z', 'e']
## numpy.array datatype of cluster for MSC cluster file input/output
DTYPE_CLUSTER_MSC = [('i','i4'), ('x','f4'), ('y','f4'), ('z','f4'), ('t','i4')]
DTYPE_CLUSTER_MSC = [('i', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('t', 'i4')]
## file format of MSC cluster file
FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
## field (column) names of MSC cluster file
FIELDS_CLUSTER_MSC = ['i','x','y','z','t']
FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']
## numpy.array datatype of cluster for EDAC cluster file input/output
DTYPE_CLUSTER_EDAC= [('i','i4'), ('t','i4'), ('x','f4'), ('y','f4'), ('z','f4')]
DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
## file format of EDAC cluster file
FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of EDAC cluster file
FIELDS_CLUSTER_EDAC = ['i','t','x','y','z']
FIELDS_CLUSTER_EDAC = ['i', 't', 'x', 'y', 'z']
## numpy.array datatype of cluster for XYZ file input/output
DTYPE_CLUSTER_XYZ= [('s','a2'), ('x','f4'), ('y','f4'), ('z','f4')]
DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
## file format of XYZ cluster file
FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
## field (column) names of XYZ cluster file
FIELDS_CLUSTER_XYZ = ['s','x','y','z']
FIELDS_CLUSTER_XYZ = ['s', 'x', 'y', 'z']
class Cluster(object):
@ -137,7 +149,7 @@ class Cluster(object):
"""
Copy the data from another cluster.
@param cluster (Cluster): other Cluster object.
@param cluster: (Cluster) other Cluster object.
"""
self.data = cluster.data.copy()
@ -164,10 +176,10 @@ class Cluster(object):
@param x, y, z: (float) atom coordinates in the cluster
@param emitter: (uint) 1 = emitter, 0 = regular
@param emitter: (int or bool) True = emitter, False = scatterer
"""
symbol = pt.elements[element_number].symbol
element = (index, element_number, symbol, x, y, z, emitter)
element = (index, element_number, symbol, x, y, z, int(emitter))
return element
def add_atom(self, atomtype, v_pos, is_emitter):
@ -178,10 +190,10 @@ class Cluster(object):
@param v_pos: (numpy.ndarray, shape = (3)) position vector
@param is_emitter: (uint) 1 = emitter, 0 = regular
@param is_emitter: (int or bool) True = emitter, False = scatterer
"""
n0 = self.data.shape[0] + 1
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter)
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], int(is_emitter))
self.data = np.append(self.data, np.array(element,
dtype=self.data.dtype))
@ -342,6 +354,29 @@ class Cluster(object):
return idx
def translate(self, vector, element=0):
"""
translate the cluster or all atoms of a specified element.
@param vector: (numpy.ndarray) 3-dimensional displacement vector.
@param element: (int) chemical element number if atoms of a specific element should be affected.
by default (element = 0), all atoms are moved.
@return: (numpy.ndarray) indices of the atoms that have been shifted.
"""
if element:
try:
sel = self.data['t'] == int(element)
except ValueError:
sel = self.data['s'] == element
else:
sel = np.ones_like(self.data['t'])
idx = np.where(sel)
self.data['x'][idx] += vector[0]
self.data['y'][idx] += vector[1]
self.data['z'][idx] += vector[2]
return idx
def matrix_transform(self, matrix):
"""
apply a transformation matrix to each atom of the cluster.
@ -474,6 +509,31 @@ class Cluster(object):
self.data = self.data[idx]
self.update_index()
def trim_paraboloid(self, rxy, z0):
"""
remove atoms outside a given paraboloid.
the paraboloid is defined by z(r) = z0 * (1 - r**2 / rxy**2),
where r**2 = x**2 + y**2.
its apex is at (0, 0, z0).
z(r) = 0 for x**2 + y**2 = rxy**2.
the coordinates (x,y,z) of atoms to keep must match z >= z(r).
@param rxy: (float) radius of the paraboloid at z = 0.
@param z0: (float) vertical coordinate of the apex (at x = y = 0).
in the usual cluster layout where the surface is near z = 0 and the atoms at lower z,
this value is negative.
@return: None
"""
rsq = self.data['x']**2 + self.data['y']**2
pz = z0 * (1. - rsq / rxy**2)
idx = np.where(self.data['z'] >= pz)
self.data = self.data[idx]
self.update_index()
def trim_sphere(self, radius):
"""
remove atoms outside a given sphere.
@ -637,7 +697,7 @@ class Cluster(object):
"""
idx = self.data['e'] != 0
ems = self.data[['x', 'y', 'z', 't']][idx]
return map(tuple, ems)
return [tuple(em) for em in ems]
def get_emitter_count(self):
"""
@ -702,7 +762,7 @@ class Cluster(object):
else:
self.data['e'] = 0
pos = self.positions()
pos = self.get_positions()
# note: np.linalg.norm does not accept axis argument in version 1.7
# (check np.version.version)
norm = np.sqrt(np.sum(pos**2, axis=1))
@ -739,13 +799,14 @@ class Cluster(object):
"""
self.data['i'] = np.arange(1, self.data.shape[0] + 1)
def save_to_file(self, f, fmt=FMT_DEFAULT, comment=""):
def save_to_file(self, f, fmt=FMT_DEFAULT, comment="", emitters_only=False):
"""
save the cluster to a file which can be read by the scattering program.
the method updates the atom index because some file formats require an index column.
@param f: (string/handle) path name or open file handle of the cluster file.
if the filename ends in .gz, the file is saved in compressed gzip format
@param fmt: (int) file format.
must be one of the FMT_ constants.
@ -755,7 +816,10 @@ class Cluster(object):
not used in other file formats.
by default, self.comment is used.
@remark if the filename ends in .gz, the file is saved in compressed gzip format
@param emitters_only: (bool) if True, only atoms marked as emitters are saved.
by default, all atoms are saved.
@return None
"""
if fmt == FMT_DEFAULT:
fmt = self.file_format
@ -763,6 +827,13 @@ class Cluster(object):
if not comment:
comment = self.comment
self.update_index()
if emitters_only:
idx = self.data['e'] != 0
data = self.data[idx]
else:
data = self.data
if fmt == FMT_MSC:
file_format = FMT_CLUSTER_MSC
fields = FIELDS_CLUSTER_MSC
@ -770,16 +841,158 @@ class Cluster(object):
elif fmt == FMT_EDAC:
file_format = FMT_CLUSTER_EDAC
fields = FIELDS_CLUSTER_EDAC
header = "%u l(A)" % (self.data.shape[0])
header = "{nat} l(A)".format(nat=data.shape[0])
elif fmt == FMT_XYZ:
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{0}\n{1}".format(self.data.shape[0], comment)
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
else:
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{0}\n{1}".format(self.data.shape[0], comment)
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
self.update_index()
data = self.data[fields]
data = data[fields]
np.savetxt(f, data, fmt=file_format, header=header, comments="")
class ClusterGenerator(object):
"""
cluster generator class.
this class bundles the cluster methods in one place
so that it's easier to exchange them for different kinds of clusters.
the project must override at least the create_cluster method.
if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
"""
def __init__(self, project):
"""
initialize the cluster generator.
@param project: reference to the project object.
cluster generators may need to look up project parameters.
"""
self.project = project
def count_emitters(self, model, index):
"""
return the number of emitter configurations for a particular model, scan and symmetry.
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
by default, the method returns 1, which means that there is only one emitter configuration.
emitter configurations are mainly a way to distribute the calculations to multiple processes
since the resulting diffraction patterns add up incoherently.
for this to work, the create_cluster() method must pay attention to the emitter index
and generate either a full cluster with all emitters (single process)
or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
whether all emitters are calculated in one or multiple processes is decided at run-time
based on the available resources.
note that this function returns the number of _configurations_ not _atoms_.
an emitter configuration (declared in a Cluster) may include more than one atom.
it is up to the project, what is included in a particular configuration.
to enable multiple emitter configurations, the derived project class must override this method
and return a number greater than 1.
@note in some cases it may be most efficient to call create_cluster and
return Cluster.get_emitter_count() of the generated cluster.
this is possible because the method is called with emitter index -1.
model and index can be passed unchanged to create_cluster.
@param model (dictionary) model parameters to be used in the calculation.
@param index (named tuple CalcID) calculation index.
the method should consider only the following attributes:
@arg @c scan scan index (index into Project.scans)
@arg @c sym symmetry index (index into Project.symmetries)
@arg @c emit emitter index must be -1.
@return number of emitter configurations.
this implementation returns the default value of 1.
"""
return 1
def create_cluster(self, model, index):
"""
create a Cluster object given the model parameters and calculation index.
the generated cluster will typically depend on the model parameters.
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
the scan index can be used to generate a different cluster for different scan geometry,
e.g., if some atoms can be excluded due to a longer mean free path.
if this is not the case for the specific project, the scan index can be ignored.
the symmetry index may select a particular domain that has a different atomic arrangement.
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
to the particular domain/symmetry.
the method can ignore the symmetry index if the project defines only one symmetry,
or if the symmetry does not correspond to a different atomic structure.
the emitter index selects a particular emitter configuration.
depending on the value of the emitter index, the method must react differently:
1. if the value is -1, return the full cluster and mark all inequivalent emitter atoms.
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
the full diffraction scan will be calculated in one calculation.
2. if the value is greater or equal to zero, generate the cluster with the emitter configuration
selected by the emitter index.
the index is in the range between 0 and the return value of count_emitters() minus 1.
the results of the individual emitter calculations are summed up in combine_emitters().
the code should ideally be written such that either case yields the same diffraction result.
if count_emitters() always returns 1 (default), the second case does not have to be implemented,
and the method can ignore the emitter index.
the method must ignore the region index.
if the creation of a cluster fails due to invalid model parameters,
rather than raising an exception or constraining values,
the method should send an error message to the logger and return an empty cluster or None.
@param model (dictionary) model parameters to be used in the calculation.
@param index (named tuple CalcID) calculation index.
the method should consider only the following attributes:
@arg @c scan scan index (index into Project.scans)
@arg @c sym symmetry index (index into Project.symmetries)
@arg @c emit emitter index.
if -1, generate the full cluster and mark all emitters.
if greater or equal to zero, the value is a zero-based index of the emitter configuration.
@return None.
sub-classes must return a valid Cluster object or None in case of failure.
"""
return None
class LegacyClusterGenerator(ClusterGenerator):
"""
cluster generator class for projects that don't declare a generator.
in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
this class redirects generator calls to the project methods
providing compatibility to older project code.
"""
def __init__(self, project):
super(LegacyClusterGenerator, self).__init__(project)
def count_emitters(self, model, index):
"""
redirect the call to the corresponding project method if implemented.
"""
try:
return self.project.count_emitters(model, index)
except AttributeError:
return 1
def create_cluster(self, model, index):
"""
redirect the call to the corresponding project method.
"""
return self.project.create_cluster(model, index)

40
pmsco/compat.py Normal file
View File

@ -0,0 +1,40 @@
"""
@package pmsco.compat
compatibility code
code bits to provide compatibility for different python versions.
currently supported 2.7 and 3.6.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from io import open as io_open
def open(fname, mode='r', encoding='latin1'):
"""
open a data file for read/write/append using the default str type
this is a drop-in for io.open
where data is exchanged via the built-in str type of python,
whether this is a byte string (python 2) or unicode string (python 3).
the file is assumed to be a latin-1 encoded binary file.
@param fname: file name and path
@param mode: 'r', 'w' or 'a'
@param encoding: 'latin1' (default), 'ascii' or 'utf-8'
@return file handle
"""
if isinstance(b'b', str):
# python 2
mode += 'b'
kwargs = {}
else:
# python 3
mode += 't'
kwargs = {'encoding': encoding}
return io_open(fname, mode, **kwargs)

View File

@ -1,21 +1,29 @@
"""
@package pmsco.data
import, export, evaluation of msc data
import, export, evaluation of msc data.
this module provides common functions for loading/saving and manipulating PED scan data sets.
@author Matthias Muntwiler
@copyright (c) 2015 by Paul Scherrer Institut @n
@copyright (c) 2015-17 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import os
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import os
import scipy.optimize as so
import loess.loess as loess
from pmsco.compat import open
import pmsco.loess.loess as loess
logger = logging.getLogger(__name__)
@ -149,7 +157,7 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_
data[i]['i'] = selected intensity column
@endverbatim
"""
with open(filename, 'r') as f:
with open(filename, "r") as f:
header1 = f.readline().strip()
header2 = f.readline().strip()
if not header1 == '--- scan PD':
@ -259,19 +267,38 @@ def load_data(filename, dtype=None):
DTYPE_EI, DTYPE_ETPI, DTYPE_ETPIS, DTYPE_ETPAI, or DTYPE_ETPAIS.
by default, the function uses the extension to determine the data type.
the actual type can be read from the dtype attribute of the returned array.
if the extension is missing, DTYPE_EI is assumed.
@return one-dimensional numpy structured ndarray with data
@raise IOError if the file cannot be read.
@raise IndexError if the number of columns is lower than expected based on the dtype or extension.
"""
if not dtype:
(root, ext) = os.path.splitext(filename)
datatype = ext[1:].upper()
dtype = DTYPES[datatype]
ext_type = ext[1:].upper()
try:
dtype = DTYPES[ext_type]
except KeyError:
dtype = DTYPE_EI
data = np.loadtxt(filename, dtype=dtype)
sort_data(data)
return data
def format_extension(data):
"""
format the file extension based on the contents of an array.
@param data ETPI-like structured numpy.ndarray.
@return: file extension string including the leading period.
"""
return "." + "".join(data.dtype.names)
def save_data(filename, data):
"""
save column data (ETPI, and the like) to a text file.
@ -331,20 +358,24 @@ def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
undefined fields are initialized to zero.
if the parameter is unspecified, all fields are initialized to zero.
@return: re-structured numpy array
@return: re-structured numpy array or
@c data if the new and original data types are the same.
"""
new_data = np.zeros(data.shape, dtype=dtype)
fields = [dt[0] for dt in dtype if dt[0] in data.dtype.names]
if data.dtype == dtype:
return data
else:
new_data = np.zeros(data.shape, dtype=dtype)
fields = [dt[0] for dt in dtype if dt[0] in data.dtype.names]
if defaults is not None:
for field, value in defaults.iteritems():
if field in new_data.dtype.names:
new_data[field] = value
if defaults is not None:
for field, value in defaults.items():
if field in new_data.dtype.names:
new_data[field] = value
for field in fields:
new_data[field] = data[field]
for field in fields:
new_data[field] = data[field]
return new_data
return new_data
def common_dtype(scans):
@ -584,7 +615,7 @@ def calc_modfunc_mean(data):
return modf
def calc_modfunc_loess(data):
def calc_modfunc_loess(data, smth=0.4):
"""
calculate the modulation function using LOESS (locally weighted regression) smoothing.
@ -609,9 +640,11 @@ def calc_modfunc_loess(data):
the modulation function is calculated for the finite-valued scan points.
NaNs are ignored and do not affect the finite values.
@return copy of the data array with the modulation function in the 'i' column.
@param smth: size of the smoothing window relative to the size of the scan.
reasonable values are between 0.2 and 0.5.
the default value 0.4 has been found to work in many cases.
@todo is a fixed smoothing factor of 0.5 okay?
@return copy of the data array with the modulation function in the 'i' column.
"""
sel = np.isfinite(data['i'])
_data = data[sel]
@ -626,7 +659,7 @@ def calc_modfunc_loess(data):
factors = [_data[axis] for axis in scan_mode]
lo.set_x(np.hstack(tuple(factors)))
lo.set_y(_data['i'])
lo.model.span = 0.5
lo.model.span = smth
loess.loess(lo)
modf['i'][sel] = lo.get_fitted_residuals() / lo.get_fitted_values()

1312
pmsco/database.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,9 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os.path
import datetime
@ -19,8 +21,9 @@ import signal
import collections
import copy
import logging
from attrdict import AttrDict
from mpi4py import MPI
from helpers import BraceMessage as BMsg
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -48,7 +51,99 @@ TAG_INVALID_RESULT = 3
## the message is empty
TAG_ERROR_ABORTING = 4
CalcID = collections.namedtuple('CalcID', ['model', 'scan', 'sym', 'emit', 'region'])
## levels of calculation tasks
#
CALC_LEVELS = ('model', 'scan', 'sym', 'emit', 'region')
## intermediate sub-class of CalcID
#
# this class should not be instantiated.
# instead, use CalcID which provides some useful helper methods.
#
_CalcID = collections.namedtuple('_CalcID', CALC_LEVELS)
class CalcID(_CalcID):
"""
named tuple class to uniquely identify a calculation task.
this is a 5-tuple of indices, one index per task level.
a positive index refers to a specific instance in the task hierarchy.
the ID is defined as a named tuple so that it can be used as key of a dictionary.
cf. @ref CalculationTask for further detail.
compared to a plain named tuple, the CalcID class provides additional helper methods and properties.
example constructor: CalcID(1, 2, 3, 4, 5).
"""
@property
def levels(self):
"""
level names.
this property returns the defined level names in a tuple.
this is the same as @ref CALC_LEVELS.
@return: tuple of level names
"""
return self._fields
@property
def level(self):
"""
specific level of a task, dictionary key form.
this corresponds to the name of the last positive component.
@return: attribute name corresponding to the task level.
empty string if all members are negative (the root task).
"""
for k in reversed(self._fields):
if self.__getattribute__(k) >= 0:
return k
return ''
@property
def numeric_level(self):
"""
specific level of a task, numeric form.
this corresponds to the last positive value in the sequence of indices.
@return: index corresponding to the significant task level component of the id.
the value ranges from -1 to len(CalcID) - 1.
it is -1 if all indices are negative (root task).
"""
for k in reversed(range(len(self))):
if self[k] >= 0:
return k
return -1
def collapse_levels(self, level):
"""
return a new CalcID that is collapsed at a specific level.
the method returns a new CalcID object where the indices below the given level are -1 (undefined).
this can be seen as collapsing the tree at the specified node (level).
@note because a CalcID is immutable, this method returns a new instance.
@param level: level at which to collapse.
the index at this level remains unchanged, lower ones are set to -1.
the level can be specified by attribute name (str) or numeric index (-1..4).
@raise ValueError if level is not numeric and not in CALC_LEVELS.
@return: new CalcID instance.
"""
try:
level = int(level)
except ValueError:
level = CALC_LEVELS.index(level)
assert -1 <= level < len(CALC_LEVELS)
mask = {l: -1 for (i, l) in enumerate(CALC_LEVELS) if i > level}
return self._replace(**mask)
class CalculationTask(object):
@ -71,11 +166,11 @@ class CalculationTask(object):
specified members must be greater or equal to zero.
-1 is the wildcard which is used in parent tasks,
where, e.g., no specific symmetry is chosen.
the root task has the ID (-1, -1, -1, -1).
the root task has the ID (-1, -1, -1, -1, -1).
"""
## @var id (CalcID)
# named tuple CalcID containing the 4-part calculation task identifier.
# named tuple CalcID containing the 5-part calculation task identifier.
## @var parent_id (CalcID)
# named tuple CalcID containing the task identifier of the parent task.
@ -105,6 +200,21 @@ class CalculationTask(object):
## @var modf_filename (string)
# name of the ETPI or ETPAI file that contains the resulting modulation function.
## @var result_valid (bool)
# validity status of the result file @ref result_filename.
#
# if True, the file must exist and contain valid data according to the task specification.
# the value is set True when a calculation task completes successfully.
# it may be reset later to invalidate the data if an error occurs during processing.
#
# validity of a parent task requires validity of all child tasks.
## @var rfac (float)
# r-factor value of the task result.
#
# the rfac field is written by @ref pmsco.project.Project.evaluate_result.
# the initial value is Not A Number.
## @var time (timedelta)
# execution time of the task.
#
@ -125,7 +235,7 @@ class CalculationTask(object):
# scan positions to substitute the ones from the original scan.
#
# this is used to distribute scans over multiple calculator processes,
# cf. e.g. @ref EnergyRegionHandler.
# cf. e.g. @ref pmsco.handlers.EnergyRegionHandler.
#
# dictionary key must be the scan dimension 'e', 't', 'p', 'a'.
# the value is a numpy.ndarray containing the scan positions.
@ -147,6 +257,7 @@ class CalculationTask(object):
self.time = datetime.timedelta()
self.files = {}
self.region = {}
self.rfac = float('nan')
def __eq__(self, other):
"""
@ -192,7 +303,7 @@ class CalculationTask(object):
msg['id'] = CalcID(**msg['id'])
if isinstance(msg['parent_id'], dict):
msg['parent_id'] = CalcID(**msg['parent_id'])
for k, v in msg.iteritems():
for k, v in msg.items():
self.__setattr__(k, v)
def format_filename(self, **overrides):
@ -204,13 +315,8 @@ class CalculationTask(object):
@return a string consisting of the concatenation of the base name, the ID, and the extension.
"""
parts = {}
parts = self.id._asdict()
parts['root'] = self.file_root
parts['model'] = self.id.model
parts['scan'] = self.id.scan
parts['sym'] = self.id.sym
parts['emit'] = self.id.emit
parts['region'] = self.id.region
parts['ext'] = self.file_ext
for key in overrides.keys():
@ -237,6 +343,30 @@ class CalculationTask(object):
"""
self.id = self.id._replace(**kwargs)
@property
def level(self):
"""
specific level of a task, dictionary key form.
this corresponds to the name of the last positive component of self.id.
@return: attribute name corresponding to the task level.
empty string for the root task.
"""
return self.id.level
@property
def numeric_level(self):
"""
specific level of a task, numeric form.
this corresponds to the index of the last positive component of self.id.
@return: index corresponding to the significant task level component of the id.
-1 for the root task.
"""
return self.id.numeric_level
def add_task_file(self, name, category):
"""
register a file that was generated by the calculation task.
@ -284,6 +414,82 @@ class CalculationTask(object):
logger.warning("CalculationTask.remove_task_file: could not remove file {0}".format(filename))
class CachedCalculationMethod(object):
"""
decorator to cache results of expensive calculation functions.
this decorator can be used to transparently cache any expensive calculation result
that depends in a deterministic way on the calculation index.
for example, each cluster gets a unique calculation index.
if a cluster needs to be calculated repeatedly, it may be more efficient to cache it.
the method to decorate must have the following signature:
result = func(self, model, index).
the index (neglecting emitter and region) identifies the result (completely and uniquely).
the number of cached results is limited by the ttl (time to live) attribute.
the items' ttl values are decreased each time a requested calculation is not found in the cache (miss).
on a cache hit, the corresponding item's ttl is reset.
the target ttl (time to live) can be specified as an optional parameter of the decorator.
time increases with every cache miss.
"""
## @var _cache (dict)
#
# key = calculation index,
# value = function result
## @var _ttl (dict)
#
# key = calculation index,
# value (int) = remaining time to live
# where time is the number of subsequent cache misses.
## @var ttl (int)
#
# target time to live of cache items.
# time is given by the number cache misses.
def __init__(self, ttl=10):
super(CachedCalculationMethod, self).__init__()
self._cache = {}
self._ttl = {}
self.ttl = ttl
def __call__(self, func):
def wrapped_func(inst, model, index):
# note: _replace returns a new instance of the namedtuple
index = index._replace(emit=-1, region=-1)
cache_index = (id(inst), index.model, index.scan, index.sym)
try:
result = self._cache[cache_index]
except KeyError:
result = func(inst, model, index)
self._expire()
self._cache[cache_index] = result
self._ttl[cache_index] = self.ttl
return result
return wrapped_func
def _expire(self):
"""
decrease the remaining ttl of cache items and delete items whose ttl has fallen below 0.
@return: None
"""
for index in self._ttl:
self._ttl[index] -= 1
old_items = [index for index in self._ttl if self._ttl[index] < 0]
for index in old_items:
del self._ttl[index]
del self._cache[index]
class MscoProcess(object):
"""
code shared by MscoMaster and MscoSlave.
@ -357,11 +563,9 @@ class MscoProcess(object):
"""
clean up after all calculations.
this method calls the clean up function of the project.
@return: None
"""
self._project.cleanup()
pass
def calc(self, task):
"""
@ -387,14 +591,36 @@ class MscoProcess(object):
logger.info("model %s", s_model)
start_time = datetime.datetime.now()
# create parameter and cluster structures
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
par = self._project.create_params(task.model, task.id)
# generate file names
clu = self._create_cluster(task)
par = self._create_params(task)
scan = self._define_scan(task)
output_file = task.format_filename(ext="")
# determine scan range
# check parameters and call the msc program
if clu.get_atom_count() < 2:
logger.error("empty cluster in calculation %s", s_id)
task.result_valid = False
elif clu.get_emitter_count() < 1:
logger.error("no emitters in cluster of calculation %s.", s_id)
task.result_valid = False
else:
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
(root, ext) = os.path.splitext(task.result_filename)
task.file_ext = ext
task.result_valid = True
task.files.update(files)
task.time = datetime.datetime.now() - start_time
return task
def _define_scan(self, task):
"""
define the scan range.
@param task: CalculationTask with all attributes set for the calculation.
@return: pmsco.project.Scan object for the calculator.
"""
scan = self._project.scans[task.id.scan]
if task.region:
scan = scan.copy()
@ -419,26 +645,56 @@ class MscoProcess(object):
except KeyError:
pass
# check parameters and call the msc program
if clu.get_atom_count() < 2:
logger.error("empty cluster in calculation %s", s_id)
task.result_valid = False
elif clu.get_emitter_count() < 1:
logger.error("no emitters in cluster of calculation %s.", s_id)
task.result_valid = False
else:
files = self._calculator.check_cluster(clu, output_file)
return scan
def _create_cluster(self, task):
"""
generate the cluster for the given calculation task.
cluster generation is delegated to the project's cluster_generator object.
if the current task has region == 0,
the method also exports diagnostic clusters via the project's export_cluster() method.
the file name is formatted with the given task index except that region is -1.
if (in addition to region == 0) the current task has emit == 0 and cluster includes multiple emitters,
the method also exports the master cluster and full emitter list.
the file name is formatted with the given task index except that emitter and region are -1.
@param task: CalculationTask with all attributes set for the calculation.
@return: pmsco.cluster.Cluster object for the calculator.
"""
nem = self._project.cluster_generator.count_emitters(task.model, task.id)
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
if task.id.region == 0:
file_index = task.id._replace(region=-1)
filename = task.format_filename(region=-1)
files = self._project.export_cluster(file_index, filename, clu)
task.files.update(files)
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
(root, ext) = os.path.splitext(task.result_filename)
task.file_ext = ext
task.result_valid = True
task.files.update(files)
# master cluster
if nem > 1 and task.id.emit == 0:
master_index = task.id._replace(emit=-1, region=-1)
filename = task.format_filename(emit=-1, region=-1)
master_cluster = self._project.cluster_generator.create_cluster(task.model, master_index)
files = self._project.export_cluster(master_index, filename, master_cluster)
task.files.update(files)
task.time = datetime.datetime.now() - start_time
return clu
return task
def _create_params(self, task):
"""
generate the parameters list.
parameters generation is delegated to the project's create_params method.
@param task: CalculationTask with all attributes set for the calculation.
@return: pmsco.project.Params object for the calculator.
"""
par = self._project.create_params(task.model, task.id)
return par
class MscoMaster(MscoProcess):
@ -505,20 +761,12 @@ class MscoMaster(MscoProcess):
# it defines the initial model and the output file name.
# it is passed to the model handler during the main loop.
# @var _model_handler
# (ModelHandler) model handler instance
# @var _scan_handler
# (ScanHandler) scan handler instance
# @var _symmetry_handler
# (SymmetryHandler) symmetry handler instance
# @var _emitter_handler
# (EmitterHandler) emitter handler instance
# @var _region_handler
# (RegionHandler) region handler instance
## @var task_handlers
# (AttrDict) dictionary of task handler objects
#
# the keys are the task levels 'model', 'scan', 'sym', 'emit' and 'region'.
# the values are handlers.TaskHandler objects.
# the objects can be accessed in attribute or dictionary notation.
def __init__(self, comm):
super(MscoMaster, self).__init__(comm)
@ -534,11 +782,8 @@ class MscoMaster(MscoProcess):
self._min_queue_len = self._slaves + 1
self._root_task = None
self._model_handler = None
self._scan_handler = None
self._symmetry_handler = None
self._emitter_handler = None
self._region_handler = None
self.task_levels = list(CalcID._fields)
self.task_handlers = AttrDict()
def setup(self, project):
"""
@ -564,30 +809,29 @@ class MscoMaster(MscoProcess):
logger.debug("master entering setup")
self._running_slaves = self._slaves
self._idle_ranks = range(1, self._running_slaves + 1)
self._idle_ranks = list(range(1, self._running_slaves + 1))
self._root_task = CalculationTask()
self._root_task.file_root = project.output_file
self._root_task.model = project.create_domain().start
self._model_handler = project.handler_classes['model']()
self._scan_handler = project.handler_classes['scan']()
self._symmetry_handler = project.handler_classes['symmetry']()
self._emitter_handler = project.handler_classes['emitter']()
self._region_handler = project.handler_classes['region']()
for level in self.task_levels:
self.task_handlers[level] = project.handler_classes[level]()
self._model_handler.datetime_limit = self.datetime_limit
self.task_handlers.model.datetime_limit = self.datetime_limit
slaves_adj = max(self._slaves, 1)
self._model_handler.setup(project, slaves_adj)
self.task_handlers.model.setup(project, slaves_adj)
if project.mode != "single":
slaves_adj = max(slaves_adj / 2, 1)
self._scan_handler.setup(project, slaves_adj)
self._symmetry_handler.setup(project, slaves_adj)
self._emitter_handler.setup(project, slaves_adj)
self.task_handlers.scan.setup(project, slaves_adj)
self.task_handlers.sym.setup(project, slaves_adj)
self.task_handlers.emit.setup(project, slaves_adj)
if project.mode != "single":
slaves_adj = min(slaves_adj, 4)
self._region_handler.setup(project, slaves_adj)
self.task_handlers.region.setup(project, slaves_adj)
project.setup(self.task_handlers)
def run(self):
"""
@ -615,14 +859,13 @@ class MscoMaster(MscoProcess):
logger.debug("master exiting main loop")
self._running = False
self._save_report()
def cleanup(self):
logger.debug("master entering cleanup")
self._region_handler.cleanup()
self._emitter_handler.cleanup()
self._symmetry_handler.cleanup()
self._scan_handler.cleanup()
self._model_handler.cleanup()
for level in reversed(self.task_levels):
self.task_handlers[level].cleanup()
self._project.cleanup()
super(MscoMaster, self).cleanup()
def _dispatch_results(self):
@ -632,29 +875,25 @@ class MscoMaster(MscoProcess):
logger.debug("dispatching results of %u tasks", len(self._complete_tasks))
while self._complete_tasks:
__, task = self._complete_tasks.popitem(last=False)
self._dispatch_result(task)
logger.debug("passing task %s to region handler", str(task.id))
task = self._region_handler.add_result(task)
def _dispatch_result(self, task):
"""
pass a result through the post-processing modules.
@param task: a CalculationTask object.
@return None
"""
level = task.level
if level:
logger.debug(BMsg("passing task {task} to {level} handler", task=str(task.id), level=level))
task = self.task_handlers[level].add_result(task)
if task:
logger.debug("passing task %s to emitter handler", str(task.id))
task = self._emitter_handler.add_result(task)
if task:
logger.debug("passing task %s to symmetry handler", str(task.id))
task = self._symmetry_handler.add_result(task)
if task:
logger.debug("passing task %s to scan handler", str(task.id))
task = self._scan_handler.add_result(task)
if task:
logger.debug("passing task %s to model handler", str(task.id))
task = self._model_handler.add_result(task)
if task:
logger.debug("root task %s complete", str(task.id))
self._finishing = True
self._dispatch_result(task)
else:
self._finishing = True
logger.debug(BMsg("root task {task} complete", task=str(task.id)))
def _create_tasks(self):
"""
@ -668,7 +907,7 @@ class MscoMaster(MscoProcess):
"""
logger.debug("creating new tasks from root")
while len(self._pending_tasks) < self._min_queue_len:
tasks = self._model_handler.create_tasks(self._root_task)
tasks = self.task_handlers.model.create_tasks(self._root_task)
logger.debug("model handler returned %u new tasks", len(tasks))
if not tasks:
self._model_done = True
@ -807,6 +1046,17 @@ class MscoMaster(MscoProcess):
return self._finishing
def _save_report(self):
"""
generate a final report.
this method is called at the end of the master loop.
it passes the call to @ref pmsco.handlers.ModelHandler.save_report.
@return: None
"""
self.task_handlers.model.save_report(self._root_task)
def add_model_task(self, task):
"""
add a new model task including all of its children to the task queue.
@ -814,13 +1064,13 @@ class MscoMaster(MscoProcess):
@param task (CalculationTask) task identifier and model parameters.
"""
scan_tasks = self._scan_handler.create_tasks(task)
scan_tasks = self.task_handlers.scan.create_tasks(task)
for scan_task in scan_tasks:
sym_tasks = self._symmetry_handler.create_tasks(scan_task)
sym_tasks = self.task_handlers.sym.create_tasks(scan_task)
for sym_task in sym_tasks:
emitter_tasks = self._emitter_handler.create_tasks(sym_task)
emitter_tasks = self.task_handlers.emit.create_tasks(sym_task)
for emitter_task in emitter_tasks:
region_tasks = self._region_handler.create_tasks(emitter_task)
region_tasks = self.task_handlers.region.create_tasks(emitter_task)
for region_task in region_tasks:
self._pending_tasks[region_task.id] = region_task

View File

@ -1,3 +1,2 @@
edac_all_wrap.*
edac.py
edac_wrap.cxx
revision.py

15224
pmsco/edac/edac_all.cpp Normal file

File diff suppressed because it is too large Load Diff

7
pmsco/edac/edac_all.i Normal file
View File

@ -0,0 +1,7 @@
/* EDAC interface for other programs */
%module edac
%{
extern int run_script(char *scriptfile);
%}
extern int run_script(char *scriptfile);

View File

@ -1,8 +1,18 @@
*** /home/muntwiler_m/mnt/pearl_data/software/edac/edac_all.cpp 2011-04-14 23:38:44.000000000 +0200
--- edac_all.cpp 2016-02-11 12:15:45.322049772 +0100
--- edac_all.cpp 2018-02-05 17:30:17.347373088 +0100
***************
*** 3085,3090 ****
--- 3085,3091 ----
numero Vxc_Barth_Hedin(numero den, numero denup)
{
if(den<1e-10) return 0;
+ if(denup<0) denup=0;
numero rs=1/pow(4*pi/3*den, 1/3.0);
numero x=denup/den;
numero alpha0=pow(4/(9*pi), 1/3.0);
***************
*** 10117,10122 ****
--- 10117,10123 ----
--- 10118,10124 ----
void scan_imfp(char *name);
void scan_imfp(FILE *fout);
numero iimfp_TPP(numero kr);
@ -12,7 +22,7 @@
int scattering_so;
***************
*** 10230,10235 ****
--- 10231,10237 ----
--- 10232,10238 ----
int n_th;
int n_fi;
@ -22,7 +32,7 @@
numero *th_out,
***************
*** 10239,10244 ****
--- 10241,10247 ----
--- 10242,10248 ----
void free(void);
void init_th(numero thi, numero thf, int nth);
void init_phi(numero fii, numero fif, int nfi);
@ -31,8 +41,121 @@
numero refraction);
void init_transmission(
***************
*** 10905,10942 ****
}
numero calculation::IIIthfi(int no, numero th, numero fi)
{
! numero ii,ii0, xth,xfi;
! numero th0=final.th[0], th1=final.th[final.n_th-1];
numero fi0=final.fi[0], fi1=final.fi[final.n_fi-1];
int ith,ifi,ij;
while(fi<0) fi+=2*pi;
while(fi>2*pi) fi-=2*pi;
! xth=(final.n_th-1)*(th-th0)/(th1-th0); ith=int(floor(xth-0.001));
! xfi=(final.n_fi-1)*(fi-fi0)/(fi1-fi0); ifi=int(floor(xfi-0.001));
if(ifi==-1) ifi=0;
if(0<=ith && ith<final.n_th-1 && 0<=ifi && ifi<final.n_fi-1) {
ij=no*n_ang+ith*final.n_fi+ifi;
! ii0=III0[ij];
! ii=ii0 + (xth-ith)*(III0[ij+final.n_fi]-ii0) + (xfi-ifi)*(III0[ij+1]-ii0);
} else ii=0;
if(ii<0) ii=0;
return ii;
}
! numero calculation::IIIave(int no, numero th, numero fi)
{
! if(thave<=1e-6) return IIIthfi(no,th,fi);
int i,j, nn=10, mm=50;
! numero tth,ffi,val=0, r,f;
for(i=0; i<nn; i++)
for(j=0; j<mm; j++) {
! r=i*thave/nn;
! f=j*2*pi/mm;
! tth=th+r*cos(f);
! ffi=fi+r*sin(f)/cos(pi*th/180);
! val+=IIIthfi(no,tth,ffi);
}
! return val/(nn*mm);
}
void calculation::write_ang(FILE *fout_, int ik)
{
int no,nno,i,j;
--- 10909,10963 ----
}
numero calculation::IIIthfi(int no, numero th, numero fi)
{
! numero ii,xth,xfi;
! numero th0=final.th_out[0], th1=final.th_out[final.n_th-1];
numero fi0=final.fi[0], fi1=final.fi[final.n_fi-1];
int ith,ifi,ij;
while(fi<0) fi+=2*pi;
while(fi>2*pi) fi-=2*pi;
! xth=(final.n_th-1)*(th-th0)/(th1-th0); ith=int(floor(xth-0.0001));
! xfi=(final.n_fi-1)*(fi-fi0)/(fi1-fi0); ifi=int(floor(xfi-0.0001));
if(ifi==-1) ifi=0;
+ if(ith==-1) ith=0;
if(0<=ith && ith<final.n_th-1 && 0<=ifi && ifi<final.n_fi-1) {
ij=no*n_ang+ith*final.n_fi+ifi;
! xth = xth-ith;
! xfi = xfi-ifi;
! ii=III0[ij]*(1-xth)*(1-xfi) + III0[ij+final.n_fi]*xth*(1-xfi) + III0[ij+1]*(1-xth)*xfi + III0[ij+final.n_fi+1]*xth*xfi;
} else ii=0;
if(ii<0) ii=0;
return ii;
}
! numero calculation::IIIave(int no, numero th, numero ph)
{
! if(thave<=1e-6) return IIIthfi(no,th,ph);
int i,j, nn=10, mm=50;
! numero tth, ffi, val=0, th1, ph1, cf, nw=0, x0, y0, z0, x1, y1, th2, ph2;
for(i=0; i<nn; i++)
for(j=0; j<mm; j++) {
! th1=i*2*thave/(nn-1); //2*sigma range
! ph1=j*2*pi/mm;
! //rotation of (001) around Y by th1 and around Z by ph1
! x0 = cos(ph1)*sin(th1);
! y1 = sin(ph1)*sin(th1);
! z0 = cos(th1);
! //rotation around Y by th
! x1 = x0*cos(th) + z0*sin(th);
! z0 = -x0*sin(th) + z0*cos(th);
! //rotation around Z by ph
! x0 = x1*cos(ph) - y1*sin(ph);
! y0 = x1*sin(ph) + y1*cos(ph);
!
! th2 = acos(z0);
! ph2 = atan2(y0,x0);
!
! cf = exp(-(th1*th1/thave*thave))*(i>0.1?i:0.1); //gauss weight * radial weight
! nw += cf; //sum of weights
! val+=IIIthfi(no,th2,ph2)*cf;
}
! return val/nw;
}
+
void calculation::write_ang(FILE *fout_, int ik)
{
int no,nno,i,j;
***************
*** 10961,10967 ****
for(no=0; no<nno; no++)
for(i=0; i<final.n_th; i++)
for(j=0; j<final.n_fi; j++)
! III[no*n_ang+i*final.n_fi+j]=IIIave(no, final.th[i], final.fi[j]);
delete [] III0;
}
for(i=0; i<final.n_th; i++)
--- 10982,10988 ----
for(no=0; no<nno; no++)
for(i=0; i<final.n_th; i++)
for(j=0; j<final.n_fi; j++)
! III[no*n_ang+i*final.n_fi+j]=IIIave(no, final.th_out[i], final.fi[j]);
delete [] III0;
}
for(i=0; i<final.n_th; i++)
***************
*** 12485,12490 ****
--- 12488,12494 ----
--- 12506,12512 ----
else {
kr=sqrt(sqr(calc.k[ik])+2*V0);
if(iimfp_flag==0) ki=iimfp.val(kr)/2;
@ -42,7 +165,7 @@
} } else if(calc.k_flag==2) set_k(calc.kc[ik]);
***************
*** 12507,12512 ****
--- 12511,12522 ----
--- 12529,12540 ----
numero imfp=E/(TPP_Ep*TPP_Ep*(beta*log(gamma*E)-C/E+D/(E*E)))/a0_au;
return 1/imfp;
}
@ -64,7 +187,7 @@
n_1=n_2=0;
Ylm0_th_flag=Ylm0_fi_flag=0;
mesh_flag=0;
--- 13212,13218 ----
--- 13230,13236 ----
}
final_state::final_state(void)
{
@ -74,7 +197,7 @@
mesh_flag=0;
***************
*** 13233,13238 ****
--- 13243,13271 ----
--- 13261,13289 ----
if(n_fi==1) fi[0]=fii;
else for(j=0; j<n_fi; j++) fi[j]=fii+j*(fif-fii)/(n_fi-1);
} }
@ -106,7 +229,7 @@
int i;
***************
*** 14743,14748 ****
--- 14776,14783 ----
--- 14794,14801 ----
|| scat.TPP_Ep<=0 || scat.TPP_Eg<0)
on_error(foutput,"(input) imfp TPP-2M", "wrong parameters");
scat.iimfp_flag=1;
@ -117,7 +240,7 @@
scat.iimfp_flag=0;
***************
*** 15162,15164 ****
--- 15197,15206 ----
--- 15215,15224 ----
fprintf(foutput,"That's all, folks!\n");
return 0;
}

View File

@ -13,28 +13,23 @@ SHELL=/bin/sh
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
.PHONY: all clean edac
FC=gfortran
FCCOPTS=
F2PY=f2py
F2PYOPTS=
CC=g++
CCOPTS=-Wno-write-strings
SWIG=swig
SWIGOPTS=
PYTHON=python
PYTHONOPTS=
FC?=gfortran
FCCOPTS?=
F2PY?=f2py
F2PYOPTS?=
CXX?=g++
CXXOPTS?=-Wno-write-strings
PYTHON?=python
PYTHONOPTS?=
all: edac
edac: edac.exe _edac.so edac.py
edac.exe: edac_all.cpp
$(CC) $(CCOPTS) -o edac.exe edac_all.cpp
$(CXX) $(CXXOPTS) -o edac.exe edac_all.cpp
edac_wrap.cxx: edac_all.cpp edac.i
$(SWIG) $(SWIGOPTS) -c++ -python edac.i
edac.py _edac.so: edac_wrap.cxx setup.py
edac.py _edac.so: edac_all.cpp edac_all.i setup.py
$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
revision.py: _edac.so
@ -46,7 +41,7 @@ revision.txt: _edac.so edac.exe
echo "" >> revision.txt
clean:
rm -f *.so *.o *.exe
rm -f *_wrap.cxx
rm -f revision.py
rm -f revision.txt
rm -f *.so *.o *.exe *.pyc
rm -f edac.py edac_all_wrap.*
rm -f revision.*

View File

@ -8,7 +8,8 @@ from distutils.core import setup, Extension
edac_module = Extension('_edac',
sources=['edac_wrap.cxx', 'edac_all.cpp'],
sources=['edac_all.cpp', 'edac_all.i'],
swig_opts=['-c++']
)
setup (name = 'edac',
@ -16,5 +17,7 @@ setup (name = 'edac',
author = "Matthias Muntwiler",
description = """EDAC module in Python""",
ext_modules = [edac_module],
py_modules = ["edac"], requires=['numpy']
py_modules = ["edac"],
requires=['numpy']
)

View File

@ -1,16 +1,19 @@
"""
@package pmsco.files
manage files produced by pmsco.
manage the lifetime of files produced by pmsco.
@author Matthias Muntwiler
@copyright (c) 2016 by Paul Scherrer Institut @n
@copyright (c) 2016-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import logging
import mpi4py
@ -53,7 +56,7 @@ FILE_CATEGORIES = {'cluster', 'phase', 'input', 'output',
#
# this constant defines the default set of file categories that are kept after the calculation.
#
FILE_CATEGORIES_TO_KEEP = {'cluster', 'model', 'report', 'population'}
FILE_CATEGORIES_TO_KEEP = {'cluster', 'model', 'scan', 'report', 'population'}
## @var FILE_CATEGORIES_TO_DELETE
# categories of files to be deleted.
@ -67,13 +70,17 @@ FILE_CATEGORIES_TO_DELETE = FILE_CATEGORIES - FILE_CATEGORIES_TO_KEEP
class FileTracker(object):
"""
organize output files of calculations.
manage the lifetime of files produced by the calculations.
the file manager stores references to data files generated during calculations
and cleans up unused files according to a range of filter criteria.
this class identifies files by _file name_.
file names must therefore be unique over the whole calculation process.
it is possible to specify a full path that is used for communication with the operating system.
"""
## @var files_to_delete (set)
## @var categories_to_delete (set)
# categories of generated files that should be deleted after the calculation.
#
# each string of this set marks a category of files to be deleted.
@ -93,96 +100,119 @@ class FileTracker(object):
#
# the default is 10.
## @var _last_id (int)
# last used file identification number (incremental)
## @var _path_by_id (dict)
# key = file id, value = file path
## @var _model_by_id (dict)
# key = file id, value = model number
## @var _category_by_id (dict)
# key = file id, value = category (str)
## @var _file_model (dict)
# key = file name, value = model number
## @var _file_category (dict)
# key = file name, value = category (str)
## @var _file_path (dict)
# key = file name, value = absolute file path (str)
## @var _rfac_by_model (dict)
# key = model number, value = file id
# key = model number, value = R-factor
## @var _complete_by_model (dict)
# key = model number, value (boolean) = all calculations complete, files can be deleted
## @var _complete_models (set)
# this set contains the model numbers of the models that have finished all calculations.
# files of these models can be considered for clean up.
def __init__(self):
self._id_by_path = {}
self._path_by_id = {}
self._model_by_id = {}
self._category_by_id = {}
self._file_model = {}
self._file_category = {}
self._file_path = {}
self._rfac_by_model = {}
self._complete_by_model = {}
self._last_id = 0
self._complete_models = set([])
self.categories_to_delete = FILE_CATEGORIES_TO_DELETE
self.keep_rfac = 10
def add_file(self, path, model, category='default'):
def get_file_count(self):
"""
return the number of tracked files.
@return: (int) number of tracked files.
"""
return len(self._file_path)
def get_complete_models_count(self):
"""
return the number of complete models.
@return: (int) number of complete models.
"""
return len(self._complete_models)
def add_file(self, name, model, category='default', path=''):
"""
add a new data file to the list.
@param path: (str) system path of the file relative to the working directory.
@param name: (str) unique identification of the file.
this can be the file name in the file system if file names are unique without path specification.
the name must be spelled identically
whenever the same file is referenced in a call to another method of this class.
the empty string is ignored.
@param model: (int) model number
@param category: (str) file category, e.g. 'output', etc.
@param path: (str) file system path of the file.
the file system path is used for communication with the operating system when the file is deleted.
by default, the path is the name argument expanded to a full path relative to the current working directory.
the path is expanded during the call of this method and will not change when the working directory changes.
@return: None
"""
self._last_id += 1
_id = self._last_id
self._id_by_path[path] = _id
self._path_by_id[_id] = path
self._model_by_id[_id] = model
self._category_by_id[_id] = category
if name:
self._file_model[name] = model
self._file_category[name] = category
self._file_path[name] = path if path else os.path.abspath(name)
def rename_file(self, old_path, new_path):
def rename_file(self, old_name, new_name, new_path=''):
"""
rename a data file in the list.
the method does not rename the file in the file system.
@param old_path: must match an existing file path identically.
if old_path is not in the list, the method does nothing.
@param old_name: name used in the original add_file() call.
if it is not in the list, the method does nothing.
@param new_path: new path.
@param new_name: new name of the file, see add_file().
if the file is already in the list, its model and category is overwritten by the values of the old file.
@param new_path: new file system path of the file, see add_file().
by default, the path is the name argument expanded to a full path relative to the current working directory.
@return: None
"""
try:
_id = self._id_by_path[old_path]
model = self._file_model[old_name]
cat = self._file_category[old_name]
except KeyError:
pass
else:
del self._id_by_path[old_path]
self._id_by_path[new_path] = _id
self._path_by_id[_id] = new_path
del self._file_model[old_name]
del self._file_category[old_name]
del self._file_path[old_name]
self.add_file(new_name, model, cat, new_path)
def remove_file(self, path):
def remove_file(self, name):
"""
remove a file from the list.
the method does not delete the file from the file system.
@param path: must match an existing file path identically.
if path is not in the list, the method does nothing.
@param name: must match an existing file name identically.
if the name is not found in the list, the method does nothing.
@return: None
"""
try:
_id = self._id_by_path[path]
del self._file_model[name]
del self._file_category[name]
del self._file_path[name]
except KeyError:
pass
else:
del self._id_by_path[path]
del self._path_by_id[_id]
del self._model_by_id[_id]
del self._category_by_id[_id]
def update_model_rfac(self, model, rfac):
"""
@ -207,18 +237,19 @@ class FileTracker(object):
@param complete: (bool) True if all calculations of the model are complete (files can be deleted).
@return: None
"""
self._complete_by_model[model] = complete
if complete:
self._complete_models.add(model)
else:
self._complete_models.discard(model)
def delete_files(self, categories=None, keep_rfac=0):
def delete_files(self, categories=None):
"""
delete the files matching the list of categories.
@param categories: set of file categories to delete.
may include 'rfac' if bad r-factors should be deleted additionally (regardless of static category).
defaults to self.categories_to_delete.
@version this method does not act on the 'rfac' category.
@param keep_rfac: number of best models to keep if bad r-factors are to be deleted.
the effective keep number is the greater of self.keep_rfac and this argument.
@param categories: set of file categories to delete.
defaults to self.categories_to_delete.
@return: None
"""
@ -226,8 +257,6 @@ class FileTracker(object):
categories = self.categories_to_delete
for cat in categories:
self.delete_category(cat)
if 'rfac' in categories:
self.delete_bad_rfac(keep=keep_rfac)
def delete_bad_rfac(self, keep=0, force_delete=False):
"""
@ -252,8 +281,6 @@ class FileTracker(object):
@param force_delete: delete the bad files even if 'rfac' is not selected in categories_to_delete.
@return: None
@todo should clean up rfac and model dictionaries from time to time.
"""
if force_delete or 'rfac' in self.categories_to_delete:
keep = max(keep, self.keep_rfac)
@ -263,12 +290,45 @@ class FileTracker(object):
except IndexError:
return
complete_models = {_model for (_model, _complete) in self._complete_by_model.iteritems() if _complete}
del_models = {_model for (_model, _rfac) in self._rfac_by_model.iteritems() if _rfac > rfac_split}
del_models &= complete_models
del_ids = {_id for (_id, _model) in self._model_by_id.iteritems() if _model in del_models}
for _id in del_ids:
self.delete_file(_id)
keep_models = {model for (model, rfac) in self._rfac_by_model.items() if 0.0 <= rfac <= rfac_split}
del_models = self._complete_models - keep_models
del_names = {name for (name, model) in self._file_model.items() if model in del_models}
for name in del_names:
self.delete_file(name)
def delete_models(self, keep=None, delete=None):
"""
delete all files by model.
this involves the following steps:
1. determine a list of complete models
(incomplete models are still being processed and must not be deleted).
2. intersect with the _delete_ list if specified.
3. subtract the _keep_ list if specified.
if neither the _keep_ nor the _delete_ list is specified,
or if the steps above resolve to the _complete_ list
the method considers it as an error and does nothing.
@param keep: (sequence) model numbers to keep, i.e., delete all others.
@param delete: (sequence) model numbers to delete.
@return (int) number of models deleted.
"""
del_models = self._complete_models.copy()
if delete:
del_models &= delete
if keep:
del_models -= keep
if not del_models or del_models == self._complete_models:
return 0
del_names = {name for (name, model) in self._file_model.items() if model in del_models}
for name in del_names:
self.delete_file(name)
return len(del_models)
def delete_category(self, category):
"""
@ -280,45 +340,38 @@ class FileTracker(object):
@return: None
"""
complete_models = {_model for (_model, _complete) in self._complete_by_model.iteritems() if _complete}
del_ids = {_id for (_id, cat) in self._category_by_id.iteritems() if cat == category}
del_ids &= {_id for (_id, _model) in self._model_by_id.iteritems() if _model in complete_models}
for _id in del_ids:
self.delete_file(_id)
del_names = {name for (name, cat) in self._file_category.items() if cat == category}
del_names &= {name for (name, model) in self._file_model.items() if model in self._complete_models}
for name in del_names:
self.delete_file(name)
def delete_file(self, _id):
def delete_file(self, name):
"""
delete a specified file from the list and the file system.
the file is identified by ID number.
this method is unconditional. it does not consider category, completeness, nor R-factor.
@param _id: (int) ID number of the file to delete.
the method catches errors during file deletion and prints warnings to the logger.
@param name: must match an existing file path identically.
if it is not in the list, the method does nothing.
the method uses the associated path declared in add_file() to delete the file.
@return: None
"""
path = self._path_by_id[_id]
cat = self._category_by_id[_id]
model = self._model_by_id[_id]
del self._id_by_path[path]
del self._path_by_id[_id]
del self._model_by_id[_id]
del self._category_by_id[_id]
try:
self._os_delete_file(path)
except OSError:
logger.warning("error deleting file {0}".format(path))
cat = self._file_category[name]
model = self._file_model[name]
path = self._file_path[name]
except KeyError:
logger.warning("tried to delete untracked file {0}".format(name))
else:
logger.debug("delete file {0} ({1}, model {2})".format(path, cat, model))
@staticmethod
def _os_delete_file(path):
"""
have the operating system delete a file path.
this function is separate so that we can mock it in unit tests.
@param path: OS path
@return: None
"""
os.remove(path)
del self._file_model[name]
del self._file_category[name]
del self._file_path[name]
try:
os.remove(path)
except OSError:
logger.warning("file system error deleting file {0}".format(path))
else:
logger.debug("delete file {0} ({1}, model {2})".format(path, cat, model))

View File

198
pmsco/graphics/rfactor.py Normal file
View File

@ -0,0 +1,198 @@
"""
@package pmsco.graphics.rfactor
graphics rendering module for r-factor optimization results.
this module is under development.
interface and implementation are subject to change.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2018 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import math
import numpy as np
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
try:
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
except ImportError:
Figure = None
FigureCanvas = None
logger.warning("error importing matplotlib. graphics rendering disabled.")
def render_param_rfac(filename, data, param_name, summary=None, canvas=None):
"""
render an r-factor versus one model parameter graph.
the default file format is PNG.
this function requires the matplotlib module.
if it is not available, the function raises an error.
@param filename: path and name of the results file.
this is used to derive the output file path by adding the parameter name and
the extension of the graphics file format.
@param data: numpy-structured array of results (one-dimensional).
the field names identify the model parameters and optimization control values.
model parameters can have any name not including a leading underscore and are evaluated as is.
the names of the special optimization control values begin with an underscore.
of these, at least _rfac must be provided.
@param param_name: name of the model parameter to display.
this must correspond to a field name of the data array.
@param summary: (dict) the dictionary returned by @ref evaluate_results.
this is used to mark the optimum value and the error limits.
if None, these values are not marked in the plot.
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
@raise TypeError if matplotlib is not available.
"""
if canvas is None:
canvas = FigureCanvas
fig = Figure()
canvas(fig)
ax = fig.add_subplot(111)
ax.scatter(data[param_name], data['_rfac'], c='b', marker='o', s=4.0)
if summary is not None:
xval = summary['val'][param_name]
ymin = summary['vmin']['_rfac']
ymax = summary['vmax']['_rfac']
ax.plot((xval, xval), (ymin, ymax), ':k')
xmin = summary['vmin'][param_name]
xmax = summary['vmax'][param_name]
varr = summary['rmin'] + summary['rvar']
ax.plot((xmin, xmax), (varr, varr), ':k')
ax.grid(True)
ax.set_xlabel(param_name)
ax.set_ylabel('R-factor')
out_filename = "{0}.{1}.{2}".format(filename, param_name, canvas.get_default_filetype())
fig.savefig(out_filename)
return out_filename
def evaluate_results(data, features=50.):
"""
@param data: numpy-structured array of results (one-dimensional).
the field names identify the model parameters and optimization control values.
model parameters can have any name not including a leading underscore and are evaluated as is.
the names of the special optimization control values begin with an underscore.
of these, at least _rfac must be provided.
@param features: number of independent features (pieces of information) in the data.
this quantity can be approximated as the scan range divided by the average width of a feature
which includes an intrinsic component and the instrumental resolution.
see Booth et al., Surf. Sci. 387 (1997), 152 for energy scans, and
Muntwiler et al., Surf. Sci. 472 (2001), 125 for angle scans.
the default value of 50 is a typical value.
@return dictionary of evaluation results.
the dictionary contains scalars and structured arrays as follows.
the structured arrays have the same data type as the input data and contain exactly one element.
@arg rmin: (scalar) minimum r-factor.
@arg rvar: (scalar) one-sigma variation of r-factor.
@arg imin: (scalar) array index where the minimum is located.
@arg val: (structured array) estimates of parameter values (parameter value at rmin).
@arg sig: (structured array) one-sigma error of estimated values.
@arg vmin: (structured array) minimum value of the parameter.
@arg vmax: (structured array) maximum value of the parameter.
"""
imin = data['_rfac'].argmin()
rmin = data['_rfac'][imin]
rvar = rmin * math.sqrt(2. / float(features))
val = np.zeros(1, dtype=data.dtype)
sig = np.zeros(1, dtype=data.dtype)
vmin = np.zeros(1, dtype=data.dtype)
vmax = np.zeros(1, dtype=data.dtype)
sel = data['_rfac'] <= rmin + rvar
for name in data.dtype.names:
val[name] = data[name][imin]
vmin[name] = data[name].min()
vmax[name] = data[name].max()
if name[0] != '_':
sig[name] = (data[name][sel].max() - data[name][sel].min()) / 2.
results = {'rmin': rmin, 'rvar': rvar, 'imin': imin, 'val': val, 'sig': sig, 'vmin': vmin, 'vmax': vmax}
return results
def render_results(results_file, data=None):
"""
produce a graphics file from optimization results.
the results can be passed in a file name or numpy array (see parameter section).
the default file format is PNG.
this function requires the matplotlib module.
if it is not available, the function will log a warning message and return gracefully.
@param results_file: path and name of the result file.
result files are the ones written by swarm.SwarmPopulation.save_array, for instance.
the file contains columns of model parameters and optimization control values.
the first row must contain column names that identify the quantity.
model parameters can have any name not including a leading underscore and are evaluated as is.
the names of the special optimization control values begin with an underscore.
of these, at least _rfac must be provided.
if the optional data parameter is present,
this is used only to derive the output file path by adding the extension of the graphics file format.
@param data: numpy-structured array of results (one-dimensional).
the field names identify the model parameters and optimization control values.
model parameters can have any name not including a leading underscore and are evaluated as is.
the names of the special optimization control values begin with an underscore.
of these, at least _rfac must be provided.
if this argument is omitted, the data is loaded from the file referenced by the filename argument.
@return (list of str) path names of the generated graphics files.
empty if an error occurred.
the most common exceptions are caught and add a warning in the log file.
"""
if data is None:
data = np.genfromtxt(results_file, names=True)
summary = evaluate_results(data)
out_files = []
try:
for name in data.dtype.names:
if name[0] != '_' and summary['sig'][name] > 0.:
graph_file = render_param_rfac(results_file, data, name, summary)
out_files.append(graph_file)
except (TypeError, AttributeError, IOError) as e:
logger.warning(BMsg("error rendering scan file {file}: {msg}", file=results_file, msg=str(e)))
return out_files

273
pmsco/graphics/scan.py Normal file
View File

@ -0,0 +1,273 @@
"""
@package pmsco.graphics.scan
graphics rendering module for energy and angle scans.
this module is experimental.
interface and implementation are subject to change.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2018 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import math
import numpy as np
import pmsco.data as md
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
try:
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
# from matplotlib.backends.backend_pdf import FigureCanvasPdf
# from matplotlib.backends.backend_svg import FigureCanvasSVG
except ImportError:
Figure = None
FigureCanvas = None
logger.warning("error importing matplotlib. graphics rendering disabled.")
def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
"""
produce a graphics file from a one-dimensional scan file.
the default file format is PNG.
this function requires the matplotlib module.
if it is not available, the function raises an error.
@param filename: path and name of the scan file.
this is used to derive the output file path by adding the extension of the graphics file format.
@param data: numpy-structured array of EI, ETPI or ETPAI data.
@param scan_mode: list containing the field name of the scanning axis of the data array.
it must contain one element exactly.
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
@param is_modf: whether data contains a modulation function (True) or intensity (False, default).
this parameter is used to set axis labels.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
@raise TypeError if matplotlib is not available.
"""
if canvas is None:
canvas = FigureCanvas
fig = Figure()
canvas(fig)
ax = fig.add_subplot(111)
ax.plot(data[scan_mode[0]], data['i'])
ax.set_xlabel(scan_mode[0])
if is_modf:
ax.set_ylabel('chi')
else:
ax.set_ylabel('int')
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
fig.savefig(out_filename)
return out_filename
def render_ea_scan(filename, data, scan_mode, canvas=None, is_modf=False):
"""
produce a graphics file from an energy-angle scan file.
the default file format is PNG.
this function requires the matplotlib module.
if it is not available, the function raises an error.
@param filename: path and name of the scan file.
this is used to derive the output file path by adding the extension of the graphics file format.
@param data: numpy-structured array of ETPI or ETPAI data.
@param scan_mode: list containing the field names of the scanning axes of the data array,
i.e. 'e' and one of the angle axes.
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
@param is_modf: whether data contains a modulation function (True) or intensity (False, default).
this parameter is used to select a suitable color scale.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
@raise TypeError if matplotlib is not available.
"""
(data2d, axis0, axis1) = md.reshape_2d(data, scan_mode, 'i')
if canvas is None:
canvas = FigureCanvas
fig = Figure()
canvas(fig)
ax = fig.add_subplot(111)
im = ax.imshow(data2d, origin='lower', aspect='auto', interpolation='none')
im.set_extent((axis1[0], axis1[-1], axis0[0], axis0[-1]))
ax.set_xlabel(scan_mode[1])
ax.set_ylabel(scan_mode[0])
cb = fig.colorbar(im, shrink=0.4, pad=0.1)
dlo = np.nanpercentile(data['i'], 1)
dhi = np.nanpercentile(data['i'], 99)
if is_modf:
im.set_cmap("RdBu_r")
dhi = max(abs(dlo), abs(dhi))
dlo = -dhi
im.set_clim((dlo, dhi))
try:
# requires matplotlib 2.1.0
ti = cb.get_ticks()
ti = [min(ti), 0., max(ti)]
cb.set_ticks(ti)
except AttributeError:
pass
else:
im.set_cmap("magma")
im.set_clim((dlo, dhi))
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
fig.savefig(out_filename)
return out_filename
def render_tp_scan(filename, data, canvas=None, is_modf=False):
"""
produce a graphics file from an theta-phi (hemisphere) scan file.
the default file format is PNG.
this function requires the matplotlib module.
if it is not available, the function raises an error.
@param filename: path and name of the scan file.
this is used to derive the output file path by adding the extension of the graphics file format.
@param data: numpy-structured array of TPI data.
the T and P columns describes a full or partial hemispherical scan.
the I column contains the intensity or modulation values.
other columns are ignored.
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
@param is_modf: whether data contains a modulation function (True) or intensity (False, default).
this parameter is used to select a suitable color scale.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
@raise TypeError if matplotlib is not available.
"""
if canvas is None:
canvas = FigureCanvas
fig = Figure()
canvas(fig)
ax = fig.add_subplot(111, projection='polar')
data = data[data['t'] <= 89.0]
# stereographic projection
rd = 2 * np.tan(np.radians(data['t']) / 2)
drdt = 1 + np.tan(np.radians(data['t']) / 2)**2
# http://matplotlib.org/api/collections_api.html#matplotlib.collections.PathCollection
pc = ax.scatter(data['p'] * math.pi / 180., rd, c=data['i'], lw=0, alpha=1.)
# interpolate marker size between 4 and 9 (for theta step = 1)
unique_theta = np.unique(data['t'])
theta_step = (np.max(unique_theta) - np.min(unique_theta)) / unique_theta.shape[0]
sz = np.ones_like(pc.get_sizes()) * drdt * 4.5 * theta_step**2
pc.set_sizes(sz)
# xticks = angles where grid lines are displayed (in radians)
ax.set_xticks([])
# rticks = radii where grid lines (circles) are displayed
ax.set_rticks([])
ax.set_rmax(2.0)
cb = fig.colorbar(pc, shrink=0.4, pad=0.1)
dlo = np.nanpercentile(data['i'], 2)
dhi = np.nanpercentile(data['i'], 98)
if is_modf:
pc.set_cmap("RdBu_r")
# im.set_cmap("coolwarm")
dhi = max(abs(dlo), abs(dhi))
dlo = -dhi
pc.set_clim((dlo, dhi))
try:
# requires matplotlib 2.1.0
ti = cb.get_ticks()
ti = [min(ti), 0., max(ti)]
cb.set_ticks(ti)
except AttributeError:
pass
else:
pc.set_cmap("magma")
# im.set_cmap("inferno")
# im.set_cmap("viridis")
pc.set_clim((dlo, dhi))
ti = cb.get_ticks()
ti = [min(ti), max(ti)]
cb.set_ticks(ti)
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
fig.savefig(out_filename)
return out_filename
def render_scan(filename, data=None):
"""
produce a graphics file from a scan file.
the default file format is PNG.
this function requires the matplotlib module.
if it is not available, the function will log a warning message and return gracefully.
@param filename: path and name of the scan file.
the file must have one of the formats supported by pmsco.data.load_data().
it must contain a single scan (not the combined scan from the model level of PMSCO).
supported are all one-dimensional linear scans,
and two-dimensional energy-angle scans (each axis must be linear).
hemispherical scans are currently not supported.
the filename should include ".modf" if the data contains a modulation function rather than intensity.
if the optional data parameter is present,
this is used only to derive the output file path by adding the extension of the graphics file format.
@param data: numpy-structured array of ETPI or ETPAI data.
if this argument is omitted, the data is loaded from the file referenced by the filename argument.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
"""
if data is None:
data = md.load_data(filename)
scan_mode, scan_positions = md.detect_scan_mode(data)
is_modf = filename.find(".modf") >= 0
try:
if len(scan_mode) == 1:
out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf)
elif len(scan_mode) == 2 and 'e' in scan_mode:
out_filename = render_ea_scan(filename, data, scan_mode, is_modf=is_modf)
elif len(scan_mode) == 2 and 't' in scan_mode and 'p' in scan_mode:
out_filename = render_tp_scan(filename, data, is_modf=is_modf)
else:
out_filename = ""
logger.warning(BMsg("no render function for scan file {file}", file=filename))
except (TypeError, AttributeError, IOError) as e:
out_filename = ""
logger.warning(BMsg("error rendering scan file {file}: {msg}", file=filename, msg=str(e)))
return out_filename

View File

@ -40,21 +40,28 @@ the scan and symmetry handlers call methods of the project class to invoke proje
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-17 by Paul Scherrer Institut @n
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import os
from functools import reduce
import logging
import math
import numpy as np
import data as md
from helpers import BraceMessage as BMsg
import os
from pmsco.compat import open
import pmsco.data as md
import pmsco.graphics.scan as mgs
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -66,10 +73,10 @@ class TaskHandler(object):
this class defines the common interface of task handlers.
"""
## @var project
## @var _project
# (Project) project instance.
## @var slots
## @var _slots
# (int) number of calculation slots (processes).
#
# for best efficiency the number of tasks generated should be greater or equal the number of slots.
@ -93,7 +100,7 @@ class TaskHandler(object):
# the dictionary keys are the task identifiers CalculationTask.id,
# the values are the corresponding CalculationTask objects.
## @var invalid_count (int)
## @var _invalid_count (int)
# accumulated total number of invalid results received.
#
# the number is incremented by add_result if an invalid task is reported.
@ -188,21 +195,22 @@ class TaskHandler(object):
the id, model, and files attributes are required.
if model contains a '_rfac' value, the r-factor is
@return: None
@return None
"""
model_id = task.id.model
for path, cat in task.files.iteritems():
for path, cat in task.files.items():
self._project.files.add_file(path, model_id, category=cat)
def cleanup_files(self, keep=10):
def cleanup_files(self, keep=0):
"""
delete uninteresting files.
@param: number of best ranking models to keep.
@param keep: minimum number of models to keep.
0 (default): leave the decision to the project.
@return: None
@return None
"""
self._project.files.delete_files(keep_rfac=keep)
self._project.cleanup_files(keep=keep)
class ModelHandler(TaskHandler):
@ -255,6 +263,22 @@ class ModelHandler(TaskHandler):
return None
def save_report(self, root_task):
"""
generate a final report of the optimization procedure.
detailed calculation results are usually saved as soon as they become available.
this method may be implemented in sub-classes to aggregate and summarize the results, generate plots, etc.
in this class, the method does nothing.
@note: implementations must add the path names of generated files to self._project.files.
@param root_task: (CalculationTask) task with initial model parameters.
@return: None
"""
pass
class SingleModelHandler(ModelHandler):
"""
@ -263,6 +287,10 @@ class SingleModelHandler(ModelHandler):
this class runs a single calculation on the start parameters defined in the domain of the project.
"""
def __init__(self):
super(SingleModelHandler, self).__init__()
self.result = {}
def create_tasks(self, parent_task):
"""
start one task with the start parameters.
@ -316,25 +344,18 @@ class SingleModelHandler(ModelHandler):
modf_ext = ".modf" + parent_task.file_ext
parent_task.modf_filename = parent_task.file_root + modf_ext
rfac = 1.0
if task.result_valid:
try:
rfac = self._project.calc_rfactor(task)
except ValueError:
task.result_valid = False
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
assert not math.isnan(task.rfac)
self.result = task.model.copy()
self.result['_rfac'] = task.rfac
task.model['_rfac'] = rfac
self.save_report_file(task.model)
self._project.files.update_model_rfac(task.id.model, rfac)
self._project.files.update_model_rfac(task.id.model, task.rfac)
self._project.files.set_model_complete(task.id.model, True)
parent_task.time = task.time
return parent_task
def save_report_file(self, result):
def save_report(self, root_task):
"""
save model parameters and r-factor to a file.
@ -343,20 +364,25 @@ class SingleModelHandler(ModelHandler):
the first line contains the parameter names.
this is the same format as used by the swarm and grid handlers.
@param result: dictionary of results and parameters. the values should be scalars and strings.
@param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
@return: None
"""
keys = [key for key in result]
super(SingleModelHandler, self).save_report(root_task)
keys = [key for key in self.result]
keys.sort(key=lambda t: t[0].lower())
vals = (str(result[key]) for key in keys)
with open(self._project.output_file + ".dat", "w") as outfile:
vals = (str(self.result[key]) for key in keys)
filename = self._project.output_file + ".dat"
with open(filename, "w") as outfile:
outfile.write("# ")
outfile.write(" ".join(keys))
outfile.write("\n")
outfile.write(" ".join(vals))
outfile.write("\n")
self._project.files.add_file(filename, root_task.id.model, "report")
class ScanHandler(TaskHandler):
"""
@ -388,6 +414,30 @@ class ScanHandler(TaskHandler):
self._pending_ids_per_parent = {}
self._complete_ids_per_parent = {}
def setup(self, project, slots):
"""
initialize the scan task handler and save processed experimental scans.
"""
super(ScanHandler, self).setup(project, slots)
for (i_scan, scan) in enumerate(self._project.scans):
if scan.modulation is not None:
__, filename = os.path.split(scan.filename)
pre, ext = os.path.splitext(filename)
filename = "{pre}_{scan}.modf{ext}".format(pre=pre, ext=ext, scan=i_scan)
filepath = os.path.join(self._project.output_dir, filename)
md.save_data(filepath, scan.modulation)
mgs.render_scan(filepath, data=scan.modulation)
if project.combined_scan is not None:
ext = md.format_extension(project.combined_scan)
filename = project.output_file + ext
md.save_data(filename, project.combined_scan)
if project.combined_modf is not None:
ext = md.format_extension(project.combined_modf)
filename = project.output_file + ".modf" + ext
md.save_data(filename, project.combined_modf)
def create_tasks(self, parent_task):
"""
generate a calculation task for each scan of the given parent task.
@ -464,6 +514,7 @@ class ScanHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_scans(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'model')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'model')
@ -575,8 +626,11 @@ class SymmetryHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_symmetries(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
graph_file = mgs.render_scan(parent_task.modf_filename)
self._project.files.add_file(graph_file, parent_task.id.model, 'scan')
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@ -621,7 +675,7 @@ class EmitterHandler(TaskHandler):
all emitters share the same model parameters.
@return list of @ref CalculationTask objects with one element per emitter configuration
@return list of @ref pmsco.dispatch.CalculationTask objects with one element per emitter configuration
if parallel processing is enabled.
otherwise the list contains a single CalculationTask object with emitter index 0.
the emitter index is used by the project's create_cluster method.
@ -634,10 +688,7 @@ class EmitterHandler(TaskHandler):
self._complete_ids_per_parent[parent_id] = set()
n_emitters = self._project.cluster_generator.count_emitters(parent_task.model, parent_task.id)
if n_emitters > 1 and self._slots > 1:
emitters = range(1, n_emitters + 1)
else:
emitters = [0]
emitters = range(n_emitters)
out_tasks = []
for em in emitters:
@ -698,8 +749,11 @@ class EmitterHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_emitters(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
graph_file = mgs.render_scan(parent_task.modf_filename)
self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@ -776,15 +830,10 @@ class RegionHandler(TaskHandler):
parent_task.time = reduce(lambda a, b: a + b, child_times)
if parent_task.result_valid:
stack1 = [md.load_data(t.result_filename) for t in child_tasks]
dtype = md.common_dtype(stack1)
stack2 = [md.restructure_data(d, dtype) for d in stack1]
result_data = np.hstack(tuple(stack2))
md.sort_data(result_data)
md.save_data(parent_task.result_filename, result_data)
self._project.combine_regions(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, "emitter")
for t in child_tasks:
self._project.files.remove_file(t.result_filename)
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, "emitter")
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@ -840,7 +889,7 @@ class EnergyRegionHandler(RegionHandler):
so that all child tasks of the same parent finish approximately in the same time.
pure angle scans are not split.
to use this feature, the project assigns this class to its @ref handler_classes['region'].
to use this feature, the project assigns this class to its @ref pmsco.project.Project.handler_classes['region'].
it is safe to use this handler for calculations that do not involve energy scans.
the handler is best used for single calculations.
in optimizations that calculate many models there is no advantage in using it

View File

@ -1,4 +1,20 @@
class BraceMessage:
"""
@package pmsco.helpers
helper classes
a collection of small and generic code bits mostly collected from the www.
"""
class BraceMessage(object):
"""
a string formatting proxy class useful for logging and exceptions.
use BraceMessage("{0} {1}", "formatted", "message")
in place of "{0} {1}".format("formatted", "message").
the advantage is that the format method is called only if the string is actually used.
"""
def __init__(self, fmt, *args, **kwargs):
self.fmt = fmt
self.args = args

View File

@ -5,45 +5,42 @@ SHELL=/bin/sh
# required libraries: libblas, liblapack, libf2c
# (you may have to set soft links so that linker finds them)
#
# the makefile calls python-config to get the compilation flags and include path.
# you may override the corresponding variables on the command line or by environment variables:
#
# PYTHON_INC: specify additional include directories. each dir must start with -I prefix.
# PYTHON_CFLAGS: specify the C compiler flags.
#
# see the top-level makefile for additional information.
.SUFFIXES:
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .x
.PHONY: all loess test gas madeup ethanol air galaxy
HOST=$(shell hostname)
CFLAGS=-O
FFLAGS=-O
OBJ=loessc.o loess.o predict.o misc.o loessf.o dqrsl.o dsvdc.o fix_main.o
FFLAGS?=-O
LIB=-lblas -lm -lf2c
LIBPATH=
CC=gcc
CCOPTS=
SWIG=swig
SWIGOPTS=
PYTHON=python
PYTHONOPTS=
ifneq (,$(filter merlin%,$(HOST)))
PYTHONINC=-I/usr/include/python2.7 -I/opt/python/python-2.7.5/include/python2.7/
else ifneq (,$(filter ra%,$(HOST)))
PYTHONINC=-I${PSI_PYTHON27_INCLUDE_DIR}/python2.7 -I${PSI_PYTHON27_LIBRARY_DIR}/python2.7/site-packages/numpy/core/include
else
PYTHONINC=-I/usr/include/python2.7
endif
LIBPATH?=
CC?=gcc
CCOPTS?=
SWIG?=swig
SWIGOPTS?=
PYTHON?=python
PYTHONOPTS?=
PYTHON_CONFIG = ${PYTHON}-config
#PYTHON_LIB ?= $(shell ${PYTHON_CONFIG} --libs)
#PYTHON_INC ?= $(shell ${PYTHON_CONFIG} --includes)
PYTHON_INC ?=
PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
#PYTHON_LDFLAGS ?= $(shell ${PYTHON_CONFIG} --ldflags)
all: loess
loess: _loess.so
loess_wrap.c: loess.c loess.i
$(SWIG) $(SWIGOPTS) -python loess.i
loess.py _loess.so: loess_wrap.c
# setuptools doesn't handle the fortran files correctly
# $(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
$(CC) $(CFLAGS) -fpic -c loessc.c loess.c predict.c misc.c loessf.f dqrsl.f dsvdc.f fix_main.c
$(CC) $(CFLAGS) -fpic -c loess_wrap.c $(PYTHONINC)
$(CC) -shared $(OBJ) $(LIB) $(LIBPATH) loess_wrap.o -o _loess.so
loess.py _loess.so: loess.c loess.i
$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
examples: gas madeup ethanol air galaxy

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__author__ = 'Matthias Muntwiler'
"""
@package loess.setup
setup.py file for LOESS
@ -17,39 +15,49 @@ the Python wrapper was set up by M. Muntwiler
with the help of the SWIG toolkit
and other incredible goodies available in the Linux world.
@bug this file is currently not used because
distutils does not compile the included Fortran files.
@bug numpy.distutils.build_src in python 2.7 treats all Fortran files with f2py
so that they are compiled via both f2py and swig.
this produces extra object files which cause the linker to fail.
to fix this issue, this module hacks the build_src class.
this hack does not work with python 3. perhaps it's even unnecessary.
@author Matthias Muntwiler
@copyright (c) 2015 by Paul Scherrer Institut @n
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from distutils.core import setup, Extension
from distutils import sysconfig
import numpy
try:
numpy_include = numpy.get_include()
except AttributeError:
numpy_include = numpy.get_numpy_include()
loess_module = Extension('_loess',
sources=['loess.i', 'loess_wrap.c', 'loess.c', 'loessc.c', 'predict.c', 'misc.c', 'loessf.f',
'dqrsl.f', 'dsvdc.f'],
include_dirs = [numpy_include],
libraries=['blas', 'm', 'f2c'],
)
setup(name='loess',
version='0.1',
author=__author__,
author_email='matthias.muntwiler@psi.ch',
description="""LOESS module in Python""",
ext_modules=[loess_module],
py_modules=["loess"], requires=['numpy']
)
def configuration(parent_package='', top_path=None):
from numpy.distutils.misc_util import Configuration
config = Configuration('loess', parent_package, top_path)
lib = ['blas', 'm', 'f2c']
src = ['loess.c', 'loessc.c', 'predict.c', 'misc.c', 'loessf.f', 'dqrsl.f', 'dsvdc.f', 'fix_main.c', 'loess.i']
inc_dir = [numpy_include]
config.add_extension('_loess',
sources=src,
libraries=lib,
include_dirs=inc_dir
)
return config
def ignore_sources(self, sources, extension):
return sources
if __name__ == '__main__':
try:
from numpy.distutils.core import numpy_cmdclass
numpy_cmdclass['build_src'].f2py_sources = ignore_sources
except ImportError:
pass
from numpy.distutils.core import setup
setup(**configuration(top_path='').todict())

View File

@ -12,16 +12,17 @@ SHELL=/bin/sh
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
.PHONY: all clean edac msc mufpot
FC=gfortran
FCCOPTS=
F2PY=f2py
F2PYOPTS=
CC=gcc
CCOPTS=
SWIG=swig
SWIGOPTS=
PYTHON=python
PYTHONOPTS=
FC?=gfortran
FCCOPTS?=
F2PY?=f2py
F2PYOPTS?=
CC?=gcc
CCOPTS?=
SWIG?=swig
SWIGOPTS?=
PYTHON?=python
PYTHONOPTS?=
PYTHONINC?=
all: msc

View File

@ -20,7 +20,7 @@ CC=gcc
CCOPTS=
SWIG=swig
SWIGOPTS=
PYTHON=python
PYTHON=python2
PYTHONOPTS=
all: mufpot

View File

308
pmsco/optimizers/genetic.py Normal file
View File

@ -0,0 +1,308 @@
"""
@package pmsco.optimizers.genetic
genetic optimization algorithm.
this module implements a genetic algorithm for structural optimization.
the genetic algorithm is adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012)
the genetic algorithm evolves a population of individuals
by a combination of inheritance, crossover and mutation
and R-factor based selection.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2018 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import random
import pmsco.optimizers.population as population
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
class GeneticPopulation(population.Population):
"""
population implementing a genetic optimization algorithm.
the genetic algorithm implements the following principles:
1. inheritance: two children of a new generation are generated from the genes (i.e. model parameters)
of two parents of the old generation.
2. elitism: individuals with similar r-factors are more likely to mate.
3. crossover: the genes of the parents are randomly distributed to their children.
4. mutation: a gene may mutate at random.
5. selection: the globally best individual is added to a parent population (and replaces the worst).
the main tuning parameter of the algorithm is the mutation_step which is copied from the domain.step.
it defines the width of a gaussian distribution of change under a weak mutation.
it should be large enough so that the whole parameter space can be probed,
but small enough that a frequent mutation does not throw the individual out of the convergence region.
typically, the step should be of the order of the parameter range divided by the population size.
other tunable parameters are the mating_factor, the weak_mutation_probability and the strong_mutation_probability.
the defaults should normally be fine.
"""
## @var weak_mutation_probability
#
# probability (between 0 and 1) that a parameter changes in the mutate_weak() method.
#
# the default is 1.0, i.e., each parameter mutates in each generation.
#
# 1.0 has shown better coverage of the continuous parameter space and faster finding of the optimum.
## @var strong_mutation_probability
#
# probability (between 0 and 1) that a parameter changes in the mutate_strong() method.
#
# the default is 0.01, i.e., on average, every hundredth probed parameter is affected by a strong mutation.
# if the model contains 10 parameters, for example,
# every tenth particle would see a mutation of at least one of its parameters.
#
# too high value may disturb convergence,
# too low value may trap the algorithm in a local optimum.
## @var mating_factor
#
# inverse width of the mating preference distribution.
#
# the greater this value, the more similar partners are mated by the mate_parents() method.
#
# the default value 4.0 results in a probability of about 0.0025
# that the best particle mates the worst.
## @var position_constrain_mode
#
# the position constrain mode selects what to do if a particle violates the parameter limits.
#
# the default is "random" which resets the parameter to a random value.
## @var mutation_step
#
# standard deviations of the exponential distribution function used in the mutate_weak() method.
# the variable is a dictionary with the same keys as model_step (the parameter domain).
#
# it is initialized from the domain.step
# or set to a default value based on the parameter range and population size.
def __init__(self):
"""
initialize the population object.
"""
super(GeneticPopulation, self).__init__()
self.weak_mutation_probability = 1.0
self.strong_mutation_probability = 0.01
self.mating_factor = 4.
self.position_constrain_mode = 'random'
self.mutation_step = {}
def setup(self, size, domain, **kwargs):
"""
@copydoc Population.setup()
in addition to the inherited behaviour, this method initializes self.mutation_step.
mutation_step of a parameter is set to its domain.step if non-zero.
otherwise it is set to the parameter range divided by the population size.
"""
super(GeneticPopulation, self).setup(size, domain, **kwargs)
for key in self.model_step:
val = self.model_step[key]
self.mutation_step[key] = val if val != 0 else (self.model_max[key] - self.model_min[key]) / size
def randomize(self, pos=True, vel=True):
"""
initializes a "random" population.
this implementation is a new proposal.
the distribution is not completely random.
rather, a position vector (by parameter) is initialized with a linear function
that covers the parameter domain.
the linear function is then permuted randomly.
the method does not update the particle info fields.
@param pos: randomize positions. if False, the positions are not changed.
@param vel: randomize velocities. if False, the velocities are not changed.
"""
if pos:
for key in self.model_start:
self.pos[key] = np.random.permutation(np.linspace(self.model_min[key], self.model_max[key],
self.pos.shape[0]))
if vel:
for key in self.model_start:
d = (self.model_max[key] - self.model_min[key]) / 8
self.vel[key] = np.random.permutation(np.linspace(-d, d, self.vel.shape[0]))
def advance_population(self):
"""
advance the population by one generation.
the population is advanced in several steps:
1. replace the worst individual by the best found so far.
2. mate the parents in pairs of two.
3. produce children by crossover from the parents.
4. apply weak mutations.
5. apply strong mutations.
if generation is lower than zero, the method increases the generation number but does not advance the particles.
@return: None
"""
if not self._hold_once:
self.generation += 1
pop = self.pos.copy()
pop.sort(order='_rfac')
elite = self.best.copy()
elite.sort(order='_rfac')
if elite[0]['_model'] not in pop['_model']:
elite[0]['_particle'] = pop[-1]['_particle']
pop[-1] = elite[0]
pop.sort(order='_rfac')
parents = self.mate_parents(pop)
children = []
for x, y in parents:
a, b = self.crossover(x, y)
children.append(a)
children.append(b)
for child in children:
index = child['_particle']
self.mutate_weak(child, self.weak_mutation_probability)
self.mutate_strong(child, self.strong_mutation_probability)
self.mutate_duplicate(child)
for key in self.model_start:
vel = child[key] - self.pos[index][key]
child[key], vel, self.model_min[key], self.model_max[key] = \
self.constrain_position(child[key], vel, self.model_min[key], self.model_max[key],
self.position_constrain_mode)
self.pos[index] = child
self.update_particle_info(index)
super(GeneticPopulation, self).advance_population()
def mate_parents(self, positions):
"""
group the population in pairs of two.
to mate two individuals, the first individual of the (remaining) population selects one of the following
with an exponential preference of earlier ones.
the process is repeated until all individuals are mated.
@param positions: original population (numpy structured array)
the population should be ordered with best model first.
@return: sequence of pairs (tuples) of structured arrays holding one model each.
"""
seq = [model for model in positions]
parents = []
while len(seq) >= 2:
p1 = seq.pop(0)
ln = len(seq)
i = min(int(random.expovariate(self.mating_factor / ln) * ln), ln - 1)
p2 = seq.pop(i)
parents.append((p1, p2))
return parents
def crossover(self, parent1, parent2):
"""
crossover two parents to create two children.
for each model parameter, the parent's value is randomly assigned to either one of the children.
@param parent1: numpy structured array holding the model of the first parent.
@param parent2: numpy structured array holding the model of the second parent.
@return: tuple of the two crossed children.
these are two new ndarray instances that are independent of their parents.
"""
child1 = parent1.copy()
child2 = parent2.copy()
for key in self.model_start:
if random.random() >= 0.5:
child1[key], child2[key] = parent2[key], parent1[key]
return child1, child2
def mutate_weak(self, model, probability):
"""
apply a weak mutation to a model.
each parameter is changed to a different value in the domain of the parameter at the given probability.
the amount of change has a gaussian distribution with a standard deviation of mutation_step.
@param[in,out] model: structured numpy.ndarray holding the model parameters.
model is modified in place.
@param probability: probability between 0 and 1 at which to change a parameter.
0 = no change, 1 = force change.
@return: model (same instance as the @c model input argument).
"""
for key in self.model_start:
if random.random() < probability:
model[key] += random.gauss(0, self.mutation_step[key])
return model
def mutate_strong(self, model, probability):
"""
apply a strong mutation to a model.
each parameter is changed to a random value in the domain of the parameter at the given probability.
@param[in,out] model: structured numpy.ndarray holding the model parameters.
model is modified in place.
@param probability: probability between 0 and 1 at which to change a parameter.
0 = no change, 1 = force change.
@return: model (same instance as the @c model input argument).
"""
for key in self.model_start:
if random.random() < probability:
model[key] = (self.model_max[key] - self.model_min[key]) * random.random() + self.model_min[key]
return model
def mutate_duplicate(self, model):
"""
mutate a model if it is identical to a previously calculated one.
if the model was calculated before, the mutate_weak mutation is applied with probability 1.
@param[in,out] model: structured numpy.ndarray holding the model parameters.
model is modified in place.
@return: model (same instance as the @c model input argument).
"""
try:
self.find_model(model)
self.mutate_weak(model, 1.0)
except ValueError:
pass
return model
class GeneticOptimizationHandler(population.PopulationHandler):
"""
model handler which implements a genetic algorithm.
"""
def __init__(self):
super(GeneticOptimizationHandler, self).__init__()
self._pop = GeneticPopulation()

View File

@ -13,14 +13,19 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
import copy
import os
from __future__ import print_function
import datetime
import math
import numpy as np
import logging
import handlers
from helpers import BraceMessage as BMsg
from pmsco.compat import open
import pmsco.handlers as handlers
import pmsco.graphics as graphics
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -114,16 +119,14 @@ class GridPopulation(object):
@param domain: definition of initial and limiting model parameters
expected by the cluster and parameters functions.
the attributes have the following meanings:
@arg start: values of the fixed parameters.
@arg min: minimum values allowed.
@arg max: maximum values allowed.
if abs(max - min) < step/2 , the parameter is kept constant.
@arg step: step size (distance between two grid points).
if step <= 0, the parameter is kept constant.
@param domain.start: values of the fixed parameters.
@param domain.min: minimum values allowed.
@param domain.max: maximum values allowed.
if abs(max - min) < step/2 , the parameter is kept constant.
@param domain.step: step size (distance between two grid points).
if step <= 0, the parameter is kept constant.
"""
self.model_start = domain.start
self.model_min = domain.min
@ -347,7 +350,7 @@ class GridSearchHandler(handlers.ModelHandler):
new_task = parent_task.copy()
new_task.parent_id = parent_id
pos = self._pop.positions[model]
new_task.model = {k:pos[k] for k in pos.dtype.names}
new_task.model = {k: pos[k] for k in pos.dtype.names}
new_task.change_id(model=model)
child_id = new_task.id
@ -374,17 +377,10 @@ class GridSearchHandler(handlers.ModelHandler):
del self._pending_tasks[task.id]
parent_task = self._parent_tasks[task.parent_id]
rfac = 1.0
if task.result_valid:
try:
rfac = self._project.calc_rfactor(task)
except ValueError:
task.result_valid = False
self._invalid_count += 1
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
task.model['_rfac'] = rfac
self._pop.add_result(task.model, rfac)
assert not math.isnan(task.rfac)
task.model['_rfac'] = task.rfac
self._pop.add_result(task.model, task.rfac)
if self._outfile:
s = (str(task.model[name]) for name in self._pop.positions.dtype.names)
@ -392,12 +388,14 @@ class GridSearchHandler(handlers.ModelHandler):
self._outfile.write("\n")
self._outfile.flush()
self._project.files.update_model_rfac(task.id.model, rfac)
self._project.files.update_model_rfac(task.id.model, task.rfac)
self._project.files.set_model_complete(task.id.model, True)
if task.result_valid:
if task.time > self._model_time:
self._model_time = task.time
else:
self._invalid_count += 1
# grid search complete?
if len(self._pending_tasks) == 0:
@ -407,3 +405,17 @@ class GridSearchHandler(handlers.ModelHandler):
self.cleanup_files()
return parent_task
def save_report(self, root_task):
"""
generate a graphical summary of the optimization.
@param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
@return: None
"""
super(GridSearchHandler, self).save_report(root_task)
files = graphics.rfactor.render_results(self._project.output_file + ".dat", self._pop.positions)
for f in files:
self._project.files.add_file(f, root_task.id.model, "report")

File diff suppressed because it is too large Load Diff

139
pmsco/optimizers/swarm.py Normal file
View File

@ -0,0 +1,139 @@
"""
@package pmsco.optimizers.swarm
particle swarm optimization handler.
the module starts multiple MSC calculations and optimizes the model parameters
according to the particle swarm optimization algorithm.
Particle swarm optimization adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012)
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import pmsco.optimizers.population as population
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
class SwarmPopulation(population.Population):
"""
particle swarm population.
"""
## @var friends
# number of other particles that each particle consults for the global best fit.
# default = 3.
## @var momentum
# momentum of the particle.
# default = 0.689343.
## @var attract_local
# preference for returning to the local best fit
# default = 1.92694.
## @var attract_global
# preference for heading towards the global best fit.
# default = 1.92694
def __init__(self):
"""
initialize the population object.
"""
super(SwarmPopulation, self).__init__()
self.friends = 3
self.momentum = 0.689343
self.attract_local = 1.92694
self.attract_global = 1.92694
self.position_constrain_mode = 'default'
self.velocity_constrain_mode = 'default'
def advance_population(self):
"""
advance the population by one step.
this method just calls advance_particle() for each particle of the population.
if generation is lower than zero, the method increases the generation number but does not advance the particles.
@return: None
"""
if not self._hold_once:
self.generation += 1
for index, __ in enumerate(self.pos):
self.advance_particle(index)
super(SwarmPopulation, self).advance_population()
def advance_particle(self, index):
"""
advance a particle by one step.
@param index: index of the particle in the population.
"""
# note: the following two identifiers are views,
# assignment will modify the original array
pos = self.pos[index]
vel = self.vel[index]
# best fit that this individual has seen
xl = self.best[index]
# best fit that a group of others have seen
xg = self.best_friend(index)
for key in self.model_start:
# update velocity
dxl = xl[key] - pos[key]
dxg = xg[key] - pos[key]
pv = np.random.random()
pl = np.random.random()
pg = np.random.random()
vel[key] = (self.momentum * pv * vel[key] +
self.attract_local * pl * dxl +
self.attract_global * pg * dxg)
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.velocity_constrain_mode)
# update position
pos[key] += vel[key]
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.position_constrain_mode)
self.update_particle_info(index)
# noinspection PyUnusedLocal
def best_friend(self, index):
"""
select the best fit out of a random set of particles
returns the "best friend"
"""
friends = np.random.choice(self.best, self.friends, replace=False)
index = np.argmin(friends['_rfac'])
return friends[index]
class ParticleSwarmHandler(population.PopulationHandler):
"""
model handler which implements the particle swarm optimization algorithm.
"""
def __init__(self):
super(ParticleSwarmHandler, self).__init__()
self._pop = SwarmPopulation()

155
pmsco/optimizers/table.py Normal file
View File

@ -0,0 +1,155 @@
"""
@package pmsco.table
table scan optimization handler
the table scan scans through an explicit table of model parameters.
it can be used to recalculate models from a previous optimization run on different scans,
or as an interface to external optimizers.
new elements can be added to the table while the calculation loop is in progress.
though the concepts _population_ and _optimization_ are not intrinsic to a table scan,
the classes defined here inherit from the generic population class and optimization handler.
this is done to share as much code as possible between the different optimizers.
the only difference is that the table optimizer does not generate models internally.
instead, it loads them (possibly repeatedly) from a file or asks the project code to provide the data.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import pmsco.optimizers.population as population
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
class TablePopulation(population.Population):
"""
population generated from explicit values.
this class maintains a population that is updated from a table of explicit values.
the table can be static (defined at the start of the optimization process)
or dynamic (new models appended during the optimization process).
for each generation, the table is read and the next models are imported into the population.
the class de-duplicates the table, i.e. models with equal parameters as a previous one are not calculated again.
it is, thus, perfectly fine that new models are appended to the table rather than overwrite previous entries.
the table can be built from the following data sources:
@arg (numpy.ndarray): structured array that can be added to self.positions,
having at least the columns defining the model parameters.
@arg (sequence of dict, numpy.ndarray, numpy.void, named tuple):
each element must be syntactically compatible with a dict
that holds the model parameters.
@arg (str): file name that contains a table in the same format as
@ref pmsco.optimizers.population.Population.save_array produces.
@arg (callable): a function that returns one of the above objects
(or None to mark the end of the table).
the data source is passed as an argument to the self.setup() method.
structured arrays and sequences cannot be modified after they are passed to `setup`.
this means that the complete table must be known at the start of the process.
the most flexible way is to pass a function that generates a structured array in each call.
this would even allow to include a non-standard optimization algorithm.
the function is best defined in the custom project class.
the population calls it every time before a new generation starts.
to end the optimization process, it simply returns None.
the table can also be defined in an external file, e.g. as calculated by other programs or edited manually.
the table file can either remain unchanged during the optimization process,
or new models can be added while the optimization is in progress.
in the latter case, note that there is no reliable synchronization of file access.
first, writing to the file must be as short as possible.
the population class has a read timeout of ten seconds.
second, because it is impossible to know whether the file has been read or not,
new models should be _appended_ rather than _overwrite_ previous ones.
the population class automatically skips models that have already been read.
this class supports does not support seeding.
although, a seed file is accepted, it is not used.
patching is allowed, but there is normally no advantage over modifying the table.
the domain is used to define the model parameters and the parameter range.
models violating the parameter domain are ignored.
"""
## @var table_source
# data source of the model table
#
# this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
# e.g. a file name, a numpy structured array, or a function returning a structured array.
# see the class description for details.
def __init__(self):
"""
initialize the population object.
"""
super(TablePopulation, self).__init__()
self.table_source = None
self.position_constrain_mode = 'error'
def setup(self, size, domain, **kwargs):
"""
set up the population arrays, parameter domain and data source.
@param size: requested number of particles.
this does not need to correspond to the number of table entries.
on each generation the population loads up to this number of new entries from the table source.
@param domain: definition of initial and limiting model parameters
expected by the cluster and parameters functions.
@arg domain.start: not used.
@arg domain.min: minimum values allowed.
@arg domain.max: maximum values allowed.
@arg domain.step: not used.
the following arguments are keyword arguments.
the method also accepts the inherited arguments for seeding. they do not have an effect, however.
@param table_source: data source of the model table.
this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
e.g. a file name, a numpy structured array, or a function returning a structured array.
see the class description for details.
@return: None
"""
super(TablePopulation, self).setup(size, domain, **kwargs)
self.table_source = kwargs['table_source']
def advance_population(self):
"""
advance the population by one step.
this methods re-imports the table file
and copies the table to current population.
@return: None
"""
self.import_positions(self.table_source)
self.advance_from_import()
super(TablePopulation, self).advance_population()
class TableModelHandler(population.PopulationHandler):
"""
model handler which implements the table algorithm.
"""
def __init__(self):
super(TableModelHandler, self).__init__()
self._pop = TablePopulation()

View File

@ -4,13 +4,9 @@
@package pmsco.pmsco
PEARL Multiple-Scattering Calculation and Structural Optimization
this is the main entry point and top-level interface of the PMSCO package.
all calculations (any mode, any project) start by calling the main_pmsco() function of this module.
the module also provides a command line parser.
command line usage: call with -h option to see the list of arguments.
python usage: call main_pmsco() with suitable arguments.
this is the top-level interface of the PMSCO package.
all calculations (any mode, any project) start by calling the run_project() function of this module.
the module also provides a command line parser for common options.
for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
note that in parallel mode, one process takes the role of the coordinator (master).
@ -24,48 +20,39 @@ PMSCO serializes the calculations automatically.
the code of the main module is independent of a particular calculation project.
all project-specific code must be in a separate python module.
the project module must implement a class derived from pmsco.project.Project,
and a global function create_project which returns a new instance of the derived project class.
and call run_project() with an instance of the project class.
refer to the projects folder for examples.
@pre
* python 2.7, including python-pip
* numpy
* nose from Debian python-nose
* statsmodels from Debian python-statsmodels, or PyPI (https://pypi.python.org/pypi/statsmodels)
* periodictable from PyPI (https://pypi.python.org/pypi/periodictable)
* mpi4py from PyPI (the Debian package may have a bug causing the program to crash)
* OpenMPI, including libopenmpi-dev
* SWIG from Debian swig
to install a PyPI package, e.g. periodictable, do
@code{.sh}
pip install --user periodictable
@endcode
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015 by Paul Scherrer Institut @n
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
from builtins import range
import datetime
import logging
import importlib
import os.path
import sys
import datetime
import argparse
import logging
import cluster
import dispatch
import handlers
import files
import calculator
import swarm
import grid
# import gradient
from mpi4py import MPI
import pmsco.calculators.calculator as calculator
import pmsco.cluster as cluster
import pmsco.dispatch as dispatch
import pmsco.files as files
import pmsco.handlers as handlers
from pmsco.optimizers import genetic, swarm, grid, table
# the module-level logger
logger = logging.getLogger(__name__)
@ -152,13 +139,20 @@ def set_common_args(project, args):
logger.debug("creating project")
mode = args.mode.lower()
if mode in {'single', 'grid', 'swarm'}:
if mode in {'single', 'grid', 'swarm', 'genetic', 'table'}:
project.mode = mode
else:
logger.error("invalid optimization mode '%s'.", mode)
if args.pop_size:
project.pop_size = args.pop_size
project.optimizer_params['pop_size'] = args.pop_size
if args.seed_file:
project.optimizer_params['seed_file'] = args.seed_file
if args.seed_limit:
project.optimizer_params['seed_limit'] = args.seed_limit
if args.table_file:
project.optimizer_params['table_file'] = args.table_file
code = args.code.lower()
if code in {'edac', 'msc', 'test'}:
@ -178,6 +172,10 @@ def set_common_args(project, args):
if mode == 'single':
cats -= {'model'}
project.files.categories_to_delete = cats
if args.keep_levels > project.keep_levels:
project.keep_levels = args.keep_levels
if args.keep_best > project.keep_best:
project.keep_best = args.keep_best
def log_project_args(project):
@ -190,8 +188,19 @@ def log_project_args(project):
try:
logger.info("scattering code: {0}".format(project.code))
logger.info("optimization mode: {0}".format(project.mode))
logger.info("minimum swarm size: {0}".format(project.pop_size))
try:
logger.info("minimum population size: {0}".format(project.optimizer_params['pop_size']))
except KeyError:
pass
try:
logger.info("seed file: {0}".format(project.optimizer_params['seed_file']))
logger.info("seed limit: {0}".format(project.optimizer_params['seed_limit']))
except KeyError:
pass
try:
logger.info("table file: {0}".format(project.optimizer_params['table_file']))
except KeyError:
pass
logger.info("data directory: {0}".format(project.data_dir))
logger.info("output file: {0}".format(project.output_file))
@ -217,10 +226,14 @@ def run_project(project):
optimizer_class = grid.GridSearchHandler
elif project.mode == 'swarm':
optimizer_class = swarm.ParticleSwarmHandler
elif project.mode == 'genetic':
optimizer_class = genetic.GeneticOptimizationHandler
elif project.mode == 'gradient':
logger.error("gradient search not implemented")
# TODO: implement gradient search
# optimizer_class = gradient.GradientSearchHandler
elif project.mode == 'table':
optimizer_class = table.TableModelHandler
else:
logger.error("invalid optimization mode '%s'.", project.mode)
project.handler_classes['model'] = optimizer_class
@ -230,14 +243,14 @@ def run_project(project):
calculator_class = None
if project.code == 'edac':
logger.debug("importing EDAC interface")
import edac_calculator
from pmsco.calculators import edac
project.cluster_format = cluster.FMT_EDAC
calculator_class = edac_calculator.EdacCalculator
calculator_class = edac.EdacCalculator
elif project.code == 'msc':
logger.debug("importing MSC interface")
import msc_calculator
from pmsco.calculators import msc
project.cluster_format = cluster.FMT_MSC
calculator_class = msc_calculator.MscCalculator
calculator_class = msc.MscCalculator
elif project.code == 'test':
logger.debug("importing TEST interface")
project.cluster_format = cluster.FMT_EDAC
@ -273,7 +286,7 @@ class Args(object):
values as the command line parser.
"""
def __init__(self, mode="single", code="edac", output_file=""):
def __init__(self, mode="single", code="edac", output_file="pmsco_data"):
"""
constructor.
@ -284,14 +297,19 @@ class Args(object):
"""
self.mode = mode
self.pop_size = 0
self.seed_file = ""
self.seed_limit = 0
self.code = code
self.data_dir = os.getcwd()
self.output_file = output_file
self.time_limit = 24.0
self.keep_files = []
self.keep_files = files.FILE_CATEGORIES_TO_KEEP
self.keep_best = 10
self.keep_levels = 1
self.log_level = "WARNING"
self.log_file = ""
self.log_enable = True
self.table_file = ""
def get_cli_parser(default_args=None):
@ -324,26 +342,47 @@ def get_cli_parser(default_args=None):
# for simplicity, the parser does not check these requirements.
# all parameters are optional and accepted regardless of mode.
# errors may occur if implicit requirements are not met.
parser.add_argument('-m', '--mode', default='single',
choices=['single', 'grid', 'swarm', 'gradient'],
parser.add_argument('project_module',
help="path to custom module that defines the calculation project")
parser.add_argument('-m', '--mode', default=default_args.mode,
choices=['single', 'grid', 'swarm', 'genetic', 'table'],
help='calculation mode')
parser.add_argument('--pop-size', type=int, default=0,
help='population size (number of particles) in swarm optimization mode. ' +
parser.add_argument('--pop-size', type=int, default=default_args.pop_size,
help='population size (number of particles) in swarm or genetic optimization mode. ' +
'default is the greater of 4 or two times the number of calculation processes.')
parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default="edac",
parser.add_argument('--seed-file',
help='path and name of population seed file. ' +
'population data of previous optimizations can be used to seed a new optimization. ' +
'the file must have the same structure as the .pop or .dat files.')
parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit,
help='maximum number of models to use from the seed file. ' +
'the models with the best R-factors are selected.')
parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default=default_args.code,
help='scattering code (default: edac)')
parser.add_argument('-d', '--data-dir', default=os.getcwd(),
parser.add_argument('-d', '--data-dir', default=default_args.data_dir,
help='directory path for experimental data files (if required by project). ' +
'default: working directory')
parser.add_argument('-o', '--output-file',
parser.add_argument('-o', '--output-file', default=default_args.output_file,
help='base path for intermediate and output files.' +
'default: pmsco_data')
parser.add_argument('-k', '--keep-files', nargs='*', default=files.FILE_CATEGORIES_TO_KEEP,
'default: pmsco_data')
parser.add_argument('--table-file',
help='path and name of population table file for table optimization mode. ' +
'the file must have the same structure as the .pop or .dat files.')
parser.add_argument('-k', '--keep-files', nargs='*', default=default_args.keep_files,
choices=KEEP_FILES_CHOICES,
help='output file categories to keep after the calculation. '
'by default, cluster and model (simulated data) '
'of a limited number of best models are kept.')
parser.add_argument('-t', '--time-limit', type=float, default=24.0,
parser.add_argument('--keep-best', type=int, default=default_args.keep_best,
help='number of best models for which to keep result files '
'(at each node from root down to keep-levels). '
'default 10 (project can define higher default).')
parser.add_argument('--keep-levels', type=int, choices=range(5),
default=default_args.keep_levels,
help='task level down to which result files of best models are kept. '
'0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region. '
'default 1 (project can define higher default).')
parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit,
help='wall time limit in hours. the optimizers try to finish before the limit. default: 24.')
parser.add_argument('--log-file', default=default_args.log_file,
help='name of the main log file. ' +
@ -375,7 +414,47 @@ def parse_cli():
return args, unknown_args
def import_project_module(path):
"""
import the custom project module.
imports the project module given its file path.
the path is expanded to its absolute form and appended to the python path.
@param path: path and name of the module to be loaded.
path is optional and defaults to the python path.
if the name includes an extension, it is stripped off.
@return: the loaded module as a python object
"""
path, name = os.path.split(path)
name, __ = os.path.splitext(name)
path = os.path.abspath(path)
sys.path.append(path)
project_module = importlib.import_module(name)
return project_module
def main():
args, unknown_args = parse_cli()
if args:
module = import_project_module(args.project_module)
try:
project_args = module.parse_project_args(unknown_args)
except NameError:
project_args = None
project = module.create_project()
set_common_args(project, args)
try:
module.set_project_args(project, project_args)
except NameError:
pass
run_project(project)
if __name__ == '__main__':
main_parser = get_cli_parser()
main_parser.print_help()
main()
sys.exit(0)

View File

@ -26,16 +26,27 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import copy
import datetime
import logging
import numpy as np
import collections
import data as md
import cluster as mc
import files
import handlers
import os.path
import socket
import sys
import pmsco.cluster as mc
from pmsco.compat import open
import pmsco.data as md
import pmsco.database as database
import pmsco.dispatch as dispatch
import pmsco.files as files
import pmsco.handlers as handlers
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -95,7 +106,7 @@ class Domain(object):
self.max = {}
self.step = {}
def add_param(self, name, start, min, max, step):
def add_param(self, name, start, min=None, max=None, step=None, width=None):
"""
set the domain of one parameter with all necessary values at once.
@ -107,15 +118,29 @@ class Domain(object):
@param start (float) start value.
@param min (float) lower bound of the parameter interval.
must be lower or equal to start.
if None, the field is set to start.
@param max (float) upper bound of the parameter interval.
must be greater or equal to start.
if None, the field is set to start.
@param width (float) width of the parameter interval.
instead of min and max, the interval can be set centered around the start value.
this is equivalent to min = start - width/2, max = start + width/2.
this argument overrides min and max. don't use both arguments.
@param step (float) step size.
must be greater or equal to zero.
if None, the field is set to zero.
"""
self.start[name] = start
self.min[name] = min
self.max[name] = max
self.step[name] = step
self.min[name] = min if min is not None else start
self.max[name] = max if max is not None else start
if width is not None:
self.min[name] = start - width / 2.
self.max[name] = start + width / 2.
self.step[name] = step if step is not None else 0.0
def get_param(self, name):
"""
@ -144,9 +169,27 @@ class Params(object):
objects of this class are created by the implementation of the create_params() method
of the actual project class.
"""
## @var angular_resolution (float)
# FWHM angular resolution of the detector.
#
# maps to:
# @arg emission angle window (EDAC)
# @arg angular_broadening (MSC)
## @var phase_files (dict)
# dictionary of phase files.
#
# the keys are atomic numbers, the values file names.
# if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
#
# maps to:
# @arg scatterer (EDAC)
# @arg atomic_number, phase_file (MSC)
def __init__(self):
self.title = "MSC default parameters"
self.comment = "from msc_project.Params()"
self.title = "default parameters"
self.comment = "set by project.Params()"
self.cluster_file = ""
self.output_file = ""
self.scan_file = ""
@ -154,7 +197,7 @@ class Params(object):
self.initial_state = "1s"
# MSC convention: H, V, L, R, U
self.polarization = "H"
self.angular_broadening = 0.0
self.angular_resolution = 1.0
self.z_surface = 0.0
self.inner_potential = 10.0
# the energy scale of EDAC is referenced to the vacuum level
@ -167,16 +210,14 @@ class Params(object):
self.experiment_temperature = 300.0
self.debye_temperature = 400.0
self.debye_wavevector = 1.0
self.phase_files = {}
# used by MSC only
self.spherical_order = 2
self.scattering_level = 5
self.fcut = 15.0
self.cut = 15.0
self.lattice_constant = 1.0
self.atom_types = 0
self.atomic_number = [1, 2, 3, 4]
self.phase_file = ["1.pha", "2.pha", "3.pha", "4.pha"]
self.msq_displacement = [0.1, 0.1, 0.1, 0.1]
self.msq_displacement = {}
self.planewave_attenuation = 1.0
self.vibration_model = "N"
self.substrate_atomic_mass = 1.0
@ -216,9 +257,12 @@ class Scan(object):
# example: ['t','p']
## @var emitter (string)
# chemical symbol of emitter atom
# chemical symbol and, optionally following, further specification (chemical state, environment, ...)
# of photo-emitting atoms.
# the interpretation of this string is up to the project and its cluster generator.
# it should, however, always start with a chemical element symbol.
#
# example: 'Cu'
# examples: 'Ca' (calcium), 'CA' (carbon A), 'C a' (carbon a), 'C 1' (carbon one), 'N=O', 'FeIII'.
## @var initial_state (string)
# nl term of initial state
@ -342,142 +386,6 @@ class Scan(object):
self.alphas = np.zeros((1))
class ClusterGenerator(object):
"""
cluster generator class.
this class bundles the cluster methods in one place
so that it's easier to exchange them for different kinds of clusters.
the project must override at least the create_cluster method.
if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
"""
def __init__(self, project):
"""
initialize the cluster generator.
@param project: reference to the project object.
cluster generators may need to look up project parameters.
"""
self.project = project
def count_emitters(self, model, index):
"""
return the number of emitter configurations for a particular model.
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
by default, the method returns 1, which means that there is only one emitter configuration.
emitter configurations are mainly a way to distribute the calculations to multiple processes
based on emitters since the resulting diffraction patterns add up incoherently.
for this to work, the create_cluster() method must pay attention to the emitter index
and generate either a full cluster with all emitters (single process)
or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
whether all emitters are calculated in one or multiple processes is decided at run-time
based on the available resources.
note that this function returns the number of _configurations_ not _atoms_.
an emitter configuration (declared in a Cluster) may include more than one atom.
it is up to the project, what is included in a particular configuration.
to enable multiple emitter configurations, the derived project class must override this method
and return a number greater than 1.
@note in some cases it may be most efficient to call create_cluster and
return Cluster.get_emitter_count() of the generated cluster.
this is possible because the method is called with emitter index -1.
model and index can be passed unchanged to create_cluster.
@param model (dictionary) model parameters to be used in the calculation.
@param index (named tuple CalcID) calculation index.
the method should consider only the following attributes:
@arg @c scan scan index (index into Project.scans)
@arg @c sym symmetry index (index into Project.symmetries)
@arg @c emit emitter index is -1 if called by the emitter handler.
@return number of emitter configurations.
this implementation returns the default value of 1.
"""
return 1
def create_cluster(self, model, index):
"""
create a Cluster object given the model parameters and calculation index.
the generated cluster will typically depend on the model parameters.
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
the scan index can be used to generate a different cluster for different scan geometry,
e.g., if some atoms can be excluded due to a longer mean free path.
if this is not the case for the specific project, the scan index can be ignored.
the symmetry index may select a particular domain that has a different atomic arrangement.
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
to the particular domain/symmetry.
the method can ignore the symmetry index if the project defines only one symmetry,
or if the symmetry does not correspond to a different atomic structure.
the emitter index selects a particular emitter configuration.
depending on the value of the emitter index, the method must react differently:
1. if the value lower or equal to zero, return the full cluster and mark all inequivalent emitter atoms.
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
the full diffraction scan will be calculated in one calculation.
2. if the value is greater than zero, generate the cluster with the emitter configuration
selected by the emitter index.
the index is in the range between 1 and the return value of count_emitters().
the results of the individual emitter calculations are summed up in combine_emitters().
the code should ideally be written such that either case yields the same diffraction result.
if count_emitters() always returns 1 (default), the second case does not have to be implemented,
and the method can ignore the emitter index.
the method must ignore the model and energy index.
@param model (dictionary) model parameters to be used in the calculation.
@param index (named tuple CalcID) calculation index.
the method should consider only the following attributes:
@arg @c scan scan index (index into Project.scans)
@arg @c sym symmetry index (index into Project.symmetries)
@arg @c emit emitter index.
if lower or equal to zero, generate the full cluster and mark all emitters.
if greater than zero, the value is a 1-based index of the emitter configuration.
"""
return None
class LegacyClusterGenerator(ClusterGenerator):
"""
cluster generator class for projects that don't declare a generator.
in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
this class redirects generator calls to the project methods
providing compatibility to older project code.
"""
def __init__(self, project):
super(LegacyClusterGenerator, self).__init__(project)
def count_emitters(self, model, index):
"""
redirect the call to the corresponding project method if implemented.
"""
try:
return self.project.count_emitters(model, index)
except AttributeError:
return 1
def create_cluster(self, model, index):
"""
redirect the call to the corresponding project method.
"""
return self.project.create_cluster(model, index)
# noinspection PyMethodMayBeStatic
class Project(object):
"""
@ -549,39 +457,27 @@ class Project(object):
# the initial value is a LegacyClusterGenerator object
# which routes cluster calls back to the project for compatibility with older project code.
## @var pop_size (int)
# population size (number of particles) in the particle swarm optimization.
## @var optimizer_params (dict)
# optional parameters of the model optimizer.
#
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
# you may want to override the default value in cases where the automatic choice is not appropriate, e.g.:
# - the calculation of a model takes a long time compared to the available computing time.
# - the calculation of a model spawns many sub-tasks due to complex symmetry.
# - you want to increase the number of generations compared to the number of particles.
# this is a dictionary that can have (among others) the following values.
# for a detailed list, see the documentation of the respective model handler.
#
# the default value is 0.
#
# the value can be set by the command line.
## @var history_file (string)
# name of a file containing the results from previous optimization runs.
# this can be used to resume a swarm optimization where it was interrupted before.
#
# the history file is a space-delimited, multi-column, text file.
# output files of a previous optimization run can be used as is.
# there must be one column for each model parameter, and one column of R factors.
# the first row must contain the names of the model parameters.
# the name of th R factor column must be '_rfac'.
# additional columns may be included and are ignored.
#
# by default, no history is loaded.
## @var recalc_history (bool)
# select whether the R-factors of the historic models are calculated again.
#
# this is useful if the historic data was calculated for a different cluster, different set of parameters,
# or different experimental data, and if the R-factors of the new optimization may be systematically greater.
# set this argument to False only if the calculation is a continuation of a previous one
# without any changes to the code.
# @arg @c 'pop_size' (int)
# population size (number of particles) in the swarm or genetic optimization mode.
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
# you may want to override the default value in cases where the automatic choice is not appropriate.
# the value can be set by the command line.
# @arg @c 'seed_file' (string)
# name of a file containing the results from previous optimization runs.
# this can be used to resume a swarm or genetic optimization where it was interrupted before.
# the seed file is a space-delimited, multi-column, text file,
# e.g., the output file of a previous optimization.
# by default, no seed is loaded.
# @arg @c 'recalc_seed' (bool)
# select whether the R-factors of the seed models are calculated again.
# set this argument to False only if the calculation is a continuation of a previous one
# without any changes to the code.
## @var data_dir
# directory path to experimental data.
@ -594,9 +490,17 @@ class Project(object):
# if the location of the files may depend on the machine or user account,
# the user may want to specify the data path on the command line.
## @var output_dir (string)
# directory path for data files produced during the calculation, including intermediate files.
#
# output_dir and output_file are set at once by @ref set_output.
## @var output_file (string)
# file name root for data files produced during the calculation, including intermediate files.
#
# the file name should include the path. the path must also be set in @ref output_dir.
#
# output_dir and output_file are set at once by @ref set_output.
## @var timedelta_limit (datetime.timedelta)
# wall time after which no new calculations should be started.
@ -604,11 +508,11 @@ class Project(object):
# the actual wall time may be longer by the remaining time of running calculations.
# running calculations will not be aborted.
## @var _combined_scan
## @var combined_scan
# combined raw data from scans.
# updated by add_scan().
## @var _combined_modf
## @var combined_modf
# combined modulation function from scans.
# updated by add_scan().
@ -618,30 +522,55 @@ class Project(object):
#
# files.categories_to_delete determines which files can be deleted.
## @var keep_best
# number of best models for which result files should be kept.
#
# this attribute determines how many models are kept based on R-factor ranking at each node of the task tree
# (up to keep_levels).
## @var keep_levels
# numeric task level down to which R-factors are considered when model files are cleaned up.
#
# @arg 0 = model level: combined results only.
# @arg 1 = scan level: scan nodes in addition to combined results (level 0).
# @arg 2 = symmetry level: symmetry nodes in addition to level 1.
# @arg 3 = emitter level: emitter nodes in addition to level 1.
# @arg 4 = region level: region nodes in addition to level 1.
def __init__(self):
self.mode = "single"
self.code = "edac"
self.features = {}
self.cluster_format = mc.FMT_EDAC
self.cluster_generator = LegacyClusterGenerator(self)
self.cluster_generator = mc.LegacyClusterGenerator(self)
self.scans = []
self.symmetries = []
self.pop_size = 0
self.history_file = ""
self.recalc_history = True
self.optimizer_params = {
'pop_size': 0,
'seed_file': "",
'seed_limit': 0,
'recalc_seed': True,
'table_file': ""
}
self.data_dir = ""
self.output_dir = ""
self.output_file = "pmsco_data"
self.timedelta_limit = datetime.timedelta(days=1)
self._combined_scan = None
self._combined_modf = None
self.combined_scan = None
self.combined_modf = None
self.files = files.FileTracker()
self.handler_classes = {}
self.handler_classes['model'] = handlers.SingleModelHandler
self.handler_classes['scan'] = handlers.ScanHandler
self.handler_classes['symmetry'] = handlers.SymmetryHandler
self.handler_classes['emitter'] = handlers.EmitterHandler
self.handler_classes['region'] = handlers.SingleRegionHandler
self.keep_levels = 1
self.keep_best = 10
self.handler_classes = {
'model': handlers.SingleModelHandler,
'scan': handlers.ScanHandler,
'sym': handlers.SymmetryHandler,
'emit': handlers.EmitterHandler,
'region': handlers.SingleRegionHandler
}
self.calculator_class = None
self._tasks_fields = []
self._db = database.ResultsDatabase()
def create_domain(self):
"""
@ -676,8 +605,8 @@ class Project(object):
@return: None
"""
self.scans = []
self._combined_scan = None
self._combined_modf = None
self.combined_scan = None
self.combined_modf = None
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
"""
@ -695,7 +624,7 @@ class Project(object):
* intensity vs theta and phi (hemisphere or hologram scan)
the method calculates the modulation function if @c is_modf is @c False.
it also updates @c _combined_scan and @c _combined_modf which may be used as R-factor comparison targets.
it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.
@param filename: (string) file name of the experimental data, possibly including a path.
@ -732,22 +661,26 @@ class Project(object):
scan.modulation = None
if scan.raw_data is not None:
if self._combined_scan is not None:
dtype = md.common_dtype((self._combined_scan, scan.raw_data))
self._combined_scan = np.hstack((self._combined_scan, md.restructure_data(scan.raw_data, dtype)))
if self.combined_scan is not None:
dt = md.common_dtype((self.combined_scan, scan.raw_data))
d1 = md.restructure_data(self.combined_scan, dt)
d2 = md.restructure_data(scan.raw_data, dt)
self.combined_scan = np.hstack((d1, d2))
else:
self._combined_scan = scan.raw_data.copy()
self.combined_scan = scan.raw_data.copy()
else:
self._combined_scan = None
self.combined_scan = None
if scan.modulation is not None:
if self._combined_modf is not None:
dtype = md.common_dtype((self._combined_modf, scan.modulation))
self._combined_modf = np.hstack((self._combined_modf, md.restructure_data(scan.modulation, dtype)))
if self.combined_modf is not None:
dt = md.common_dtype((self.combined_modf, scan.modulation))
d1 = md.restructure_data(self.combined_modf, dt)
d2 = md.restructure_data(scan.modulation, dt)
self.combined_modf = np.hstack((d1, d2))
else:
self._combined_modf = scan.modulation.copy()
self.combined_modf = scan.modulation.copy()
else:
self._combined_modf = None
self.combined_modf = None
return scan
@ -783,9 +716,16 @@ class Project(object):
def set_output(self, filename):
"""
set base name of output file
set path and base name of output file.
path and name are copied to the output_file attribute.
path is copied to the output_dir attribute.
if the path is missing, the destination is the current working directory.
"""
self.output_file = filename
path, name = os.path.split(filename)
self.output_dir = path
def set_timedelta_limit(self, timedelta):
"""
@ -797,12 +737,15 @@ class Project(object):
def combine_symmetries(self, parent_task, child_tasks):
"""
combine results of different symmetry into one result. calculate the modulation function.
combine results of different symmetry into one result and calculate the modulation function.
the symmetry results are read from the file system using the indices defined by the child_tasks,
and the combined result is written to the file system with the index defined by parent_task.
by default, this method adds all symmetries with equal weight.
weights can be defined in the model dictionary with keys 'wsym0', 'wsym1', etc.
missing weights default to 1.
note: to avoid correlated parameters, one symmetry must always have a fixed weight.
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
the method must write the results to the files indicated
@ -817,19 +760,26 @@ class Project(object):
@raise IndexError if child_tasks is empty
@raise KeyError if a filename is missing
@raise IOError if a filename is missing
@note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters.
the model parameters are available as the @c model attribute of the calculation tasks.
"""
result_data = None
sum_weights = 0.
for task in child_tasks:
data = md.load_data(task.result_filename)
if result_data is not None:
result_data['i'] += data['i']
else:
result_data = data
if result_data is None:
result_data = data.copy()
result_data['i'] = 0.
try:
weight = task.model['wsym{}'.format(task.id.sym)]
except KeyError:
weight = 1.
result_data['i'] += weight * data['i']
sum_weights += weight
result_data['i'] /= sum_weights
md.save_data(parent_task.result_filename, result_data)
@ -865,7 +815,7 @@ class Project(object):
@raise IndexError if child_tasks is empty
@raise KeyError if a filename is missing
@raise IOError if a filename is missing
@note the weights of the emitters (in derived classes) can be part of the optimizable model parameters.
the model parameters are available as the @c model attribute of the calculation tasks.
@ -898,7 +848,7 @@ class Project(object):
the datasets of the scans are appended.
this is done for intensity and modulation data independently.
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
@param parent_task: (CalculationTask) parent task of the scan tasks.
the method must write the results to the files indicated
by the @c result_filename and @c modf_filename attributes.
@ -910,14 +860,12 @@ class Project(object):
@return: None
@raise IndexError if child_tasks is empty.
@raise KeyError if a filename is missing.
"""
# intensity
try:
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
except (KeyError, IOError):
except IOError:
parent_task.result_filename = ""
else:
dtype = md.common_dtype(stack1)
@ -928,7 +876,7 @@ class Project(object):
# modulation
try:
stack1 = [md.load_data(task.modf_filename) for task in child_tasks]
except (KeyError, IOError):
except IOError:
parent_task.modf_filename = ""
else:
dtype = md.common_dtype(stack1)
@ -936,6 +884,142 @@ class Project(object):
result_modf = np.hstack(tuple(stack2))
md.save_data(parent_task.modf_filename, result_modf)
def combine_regions(self, parent_task, child_tasks):
"""
combine results from different regions into one result, for intensity and modulation.
the scan results are read from the file system using the indices defined by the child_tasks,
and the combined result is written to the file system with the index defined by parent_task.
the datasets of the regions are appended and sorted in the standard order of the data module.
if the resulting length differs from the corresponding experimental scan,
an error is printed to the logger, but the calculation continues.
the modulation function is calculated by calling @ref calc_modulation.
@param parent_task: (CalculationTask) parent task of the region tasks.
the method writes the results to the file names
given by the @c result_filename and @c modf_filename attributes.
@param child_tasks: (sequence of CalculationTask) tasks which identify each region.
the reads the source data from the files
indicated by the @c result_filename attributes.
the sequence is sorted by task ID, i.e., essentially, by region index.
@return: None
@raise IndexError if child_tasks is empty.
"""
# intensity
try:
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
except IOError:
parent_task.result_valid = False
parent_task.result_filename = ""
else:
dtype = md.common_dtype(stack1)
stack2 = [md.restructure_data(data, dtype) for data in stack1]
result_data = np.hstack(tuple(stack2))
md.sort_data(result_data)
md.save_data(parent_task.result_filename, result_data)
scan = self.scans[parent_task.id.scan]
if result_data.shape[0] != scan.raw_data.shape[0]:
logger.error(BMsg("scan length mismatch: combined result: {result}, experimental data: {expected}",
result=result_data.shape[0], expected=scan.raw_data.shape[0]))
# modulation
try:
data = md.load_data(parent_task.result_filename)
modf = self.calc_modulation(data, parent_task.model)
except IOError:
parent_task.modf_filename = ""
else:
md.save_data(parent_task.modf_filename, modf)
def setup(self, handlers):
"""
prepare for calculations.
this method is called in the master process before starting the task loop.
at this point the task handlers have been created and set up.
if the project needs to change settings of task handlers it can do so in this method.
this instance writes the header of the tasks.dat file
that will receive sub-task evaluation results from the evaluate_result() method.
@param handlers: dictionary listing the initialized task handler instances.
the dictionary keys are the attribute names of pmsco.dispatch.CalcID:
'model', 'scan', 'sym', 'emit' and 'region'.
@return: None
"""
fields = ["rfac"]
fields.extend(dispatch.CalcID._fields)
fields = ["_" + f for f in fields]
dom = self.create_domain()
model_fields = dom.start.keys()
model_fields.sort(key=lambda name: name.lower())
fields.extend(model_fields)
self._tasks_fields = fields
with open(self.output_file + ".tasks.dat", "w") as outfile:
outfile.write("# ")
outfile.write(" ".join(fields))
outfile.write("\n")
# todo : fill in the descriptive fields, change to file-database
self._db.connect(":memory:")
project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
job_id = self._db.register_job(project_id,
"job-name",
self.mode,
socket.gethostname(),
"git-hash",
datetime.datetime.now(),
"description")
self._db.register_params(model_fields)
self._db.create_models_view()
def evaluate_result(self, parent_task, child_tasks):
"""
evaluate the result of a calculation task.
this method is called from the add_result of the task handlers at each level.
it gives the project a hook to check the progress of a model at any level of the task tree.
the method calculates the r-factor by calling the Project.calc_rfactor method.
the result is written to the task.rfac field and to the .tasks.dat file.
invalid and region-level results are skipped.
this method is called in the master process only.
@param parent_task: (CalculationTask) a calculation task.
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
the sequence must be sorted by task ID.
@return: None
"""
if parent_task.result_valid and parent_task.id.region == -1:
try:
parent_task.rfac = self.calc_rfactor(parent_task, child_tasks)
except ValueError:
parent_task.result_valid = False
logger.warning(BMsg("calculation {0} resulted in an undefined R-factor.", parent_task.id))
else:
values_dict = parent_task.id._asdict()
values_dict = {"_" + k: v for k, v in values_dict.items()}
values_dict.update(parent_task.model)
values_dict['_rfac'] = parent_task.rfac
values_list = [values_dict[field] for field in self._tasks_fields]
with open(self.output_file + ".tasks.dat", "a") as outfile:
outfile.write(" ".join(format(value) for value in values_list) + "\n")
self._db.insert_result(parent_task.id, values_dict)
return None
# noinspection PyUnusedLocal
def calc_modulation(self, data, model):
"""
@ -965,31 +1049,246 @@ class Project(object):
return md.calc_modfunc_loess(data)
def calc_rfactor(self, task):
def calc_rfactor(self, parent_task, child_tasks):
"""
calculate the R-factor of a task.
calculate the r-factor of a task.
the method calculates the R-factor over the combined scans.
the corresponding experimental data is taken from self._combined_modf.
the r-factor is calculated on the experimental and simulated modulation functions.
the algorithm differs for the model level and the lower task levels.
at the model level, the calculation is delegated to Project.combine_rfactors.
at all other levels, the r-factor is calculated by Project.rfactor,
where the simulated is loaded from the file specified by parent_task.modf_filename
and the experimental data from Project.scan.
this method is called by the model handler.
this method is called by the task handlers.
all child tasks belonging to the parent task must be complete.
by default, the R-factor is calculated by data.rfactor() over the combined scans.
override this method in your project to use a different R-factor algorithm.
to select or implement a specific R-factor algorithm,
the project sub-class should override Project.rfactor.
to combine scan r-factors, it should override or patch Project.combine_rfactors.
@param task: (CalculationTask) a model task.
@version in earlier versions,
projects had to override this method to implement their algorithm.
this has lead to duplication of common code.
the r-factor algorithm is now distributed over several methods,
and the method signature has changed.
new projects should override Project.rfactor and/or Project.combine_rfactors.
@return (int) calculated R-factor.
@param parent_task: (CalculationTask) a calculation task.
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
the sequence must be sorted by task ID.
@return (float) calculated R-factor.
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
"""
task_data = md.load_data(task.modf_filename)
result_r = md.rfactor(self._combined_modf, task_data)
if parent_task.id.scan >= 0:
task_data = md.load_data(parent_task.modf_filename)
exp_data = self.scans[parent_task.id.scan].modulation
result_r = self.rfactor(exp_data, task_data)
else:
result_r = self.combine_rfactors(parent_task, child_tasks)
return result_r
def rfactor(self, exp_data, theo_data):
"""
calculate the r-factor of simulated diffraction data.
in this class, the method calls the data.rfactor function to calculate the r-factor.
override this method in your project to use a different R-factor algorithm.
the input arrays must have the same shape,
and the coordinate columns must be identical (they are ignored, however).
the array elements are compared element-by-element.
terms having NaN intensity are ignored.
if the sigma column is present in experiment and non-zero,
the R-factor terms are weighted.
@param exp_data: (numpy structured array)
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
if an @c s field is present and non-zero,
the R-factor terms are weighted by 1/sigma**2.
@param theo_data: (numpy structured array)
ETPI or ETPAI array containing the calculated modulation functions.
@return: (float) scalar R-factor
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
"""
return md.rfactor(exp_data, theo_data)
def opt_rfactor(self, exp_data, theo_data):
"""
calculate the r-factor of simulated diffraction data, adjusting their amplitude.
this is an alternative r-factor calculation algorithm
using the pmsco.data.optimize_rfactor() function.
to activate this method (replacing the default one), assign it to Project.rfactor
in the overriding __init__ or setup method:
@code{.py}
self.rfactor = self.opt_rfactor
@endcode
@param exp_data: (numpy structured array)
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
if an @c s field is present and non-zero,
the R-factor terms are weighted by 1/sigma**2.
@param theo_data: (numpy structured array)
ETPI or ETPAI array containing the calculated modulation functions.
@return: (float) scalar R-factor
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
"""
return md.optimize_rfactor(exp_data, theo_data)
def combine_rfactors(self, parent_task, child_tasks):
"""
combine r-factors of child tasks.
the r-factors are taken from the rfac attribute of the child_tasks.
the result is an average of the child r-rfactors.
to produce a balanced result, every child dataset must contain a similar amount of information.
if this is not the case, the child r-factors must be weighted.
weighting is currently not implemented but may be introduced in a future version.
the method is intended to be used at the model level (children are scans).
though it can technically be used at any level where child r-factors are available.
@param parent_task: (CalculationTask) parent task for which the r-factor is calculated,
i.e. a model task.
@param child_tasks: (sequence of CalculationTask) child tasks of parent_tasks
that may be consulted for calculating the r-factor.
@return: (float) r-factor, NaN if parent task is invalid
@raise ValueError or IndexError if child_tasks is empty.
"""
if parent_task.result_valid:
rsum = 0.
for task in child_tasks:
rsum += task.rfac
return rsum / len(child_tasks)
else:
return float('nan')
def alt_combine_rfactors(self, parent_task, child_tasks):
"""
combine r-factors of child tasks by explicit calculation on the combined result.
this is an alternative implementation of combine_rfactors.
instead of using the r-factors from child tasks,
it re-calculates the r-factor for the combined dataset.
this method avoids the issue of weighting
but can introduce bias if the amplitudes of the child datasets differ substantially.
the experimental dataset is loaded from the file specified by the parent task,
the corresponding experimental data is taken from self.combined_modf.
to activate this method, assign it to combine_rfactors.
in the overriding __init__ or setup method:
@code{.py}
self.combine_rfactors = self.alt_combine_rfactors
@endcode
@param parent_task: (CalculationTask) parent task for which the r-factor is calculated,
i.e. a model task.
@param child_tasks: (sequence of CalculationTask) child tasks of parent_tasks
that may be consulted for calculating the r-factor.
@return: (float) r-factor, NaN if parent task is invalid
"""
if parent_task.result_valid:
task_data = md.load_data(parent_task.modf_filename)
exp_data = self.combined_modf
return self.rfactor(exp_data, task_data)
else:
return float('nan')
def export_cluster(self, index, filename, cluster):
"""
export the cluster of a calculation task in XYZ format for diagnostics and reporting.
this method is called with the final cluster just before it is handed over to the calculator.
it saves the atom coordinates in XYZ format for future reference (e.g. graphics).
the method creates two files:
@arg a file with extension '.xyz' contains the whole cluster in XYZ format.
@arg a file with extension '.emit.xyz' contains only emitter atoms in XYZ format.
the first part of the file name is formatted with the output name and the complete task identification.
the file is registered with the file tracker in the 'cluster' category
so that it will be deleted unless the cluster category is selected for keeping.
derived project class may override or extend this method
to carry out further diagnostics or reporting on the cluster.
@param index: (CalcID) calculation index to which the cluster belongs.
region may be -1 if only one cluster is exported for all regions
(clusters do not depend on the scan region).
emit may be -1 if the cluster is a master from which emitter-related child clusters are derived.
@param filename: (str) base file name for the output files.
the filename should be formatted using pmsco.dispatch.CalculationTask.format_filename().
extensions are appended by this method.
@param cluster: a pmsco.cluster.Cluster() object with all atom positions and emitters.
@return: dictionary listing the names of the created files with their category.
the dictionary key is the file name,
the value is the file category (cluster).
"""
_files = {}
xyz_filename = filename + ".xyz"
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
_files[xyz_filename] = 'cluster'
xyz_filename = filename + ".emit.xyz"
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ, emitters_only=True)
_files[xyz_filename] = 'cluster'
return _files
def cleanup(self):
"""
delete unwanted files at the end of a project.
@return: None
"""
self.cleanup_files()
self._db.disconnect()
def cleanup_files(self, keep=0):
"""
delete uninteresting files.
these are all files that
belong to one of the self.files.categories_to_delete categories or
do not belong to one of the "best" models.
"best" models are a number (self.keep_best) of models that gave the lowest R-factors
at each task level from root to self.keep_levels.
for example if `keep_best = 10` and `keep_levels = 1`
the 10 best models at the top level, and the 10 best at the scan level are kept.
this means that in total up to `n = 10 + 10 * n_scans` models may be kept,
where n_scans is the number of scan files in the job.
@param keep: minimum number of best models to keep.
0 (default): use the project parameter self.keep_best.
@return None
"""
self.files.delete_files()
if 'rfac' in self.files.categories_to_delete:
keep = max(keep, self.keep_best)
keepers = self._db.query_best_task_models(self.keep_levels, keep)
self.files.delete_models(keep=keepers)

View File

@ -1,909 +0,0 @@
"""
@package pmsco.swarm
particle swarm optimization handler.
the module starts multiple MSC calculations and optimizes the model parameters
according to the particle swarm optimization algorithm.
Particle swarm optimization adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012)
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import division
import copy
import os
import datetime
import logging
import numpy as np
import handlers
from helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
CONSTRAIN_MODES = {'re-enter', 'bounce', 'scatter', 'stick', 'expand'}
class Population(object):
"""
particle swarm population.
"""
## @var size_req
# requested number of particles.
# read-only. call setup() to change this attribute.
## @var model_start
# (dict) initial model parameters.
# read-only. call setup() to change this attribute.
## @var model_min
# (dict) low limits of the model parameters.
# read-only. call setup() to change this attribute.
## @var model_max
# (dict) high limits of the model parameters.
# if min == max, the parameter is kept constant.
# read-only. call setup() to change this attribute.
## @var model_max
# (dict) high limits of the model parameters.
# read-only. call setup() to change this attribute.
## @var model_step
# (dict) initial velocity (difference between two steps) of the particle.
# read-only. call setup() to change this attribute.
## @var friends
# number of other particles that each particle consults for the global best fit.
# default = 3.
## @var momentum
# momentum of the particle.
# default = 0.689343.
## @var attract_local
# preference for returning to the local best fit
# default = 1.92694.
## @var attract_global
# preference for heading towards the global best fit.
# default = 1.92694
## @var generation
# generation number. the counter is incremented by advance_population().
# initial value = 0.
## @var model_count
# model number.
# the counter is incremented by advance_particle() each time a particle position is changed.
# initial value = 0.
## @var pos
# (numpy.ndarray) current positions of each particle.
#
# the column names include the names of the model parameters, taken from domain.start,
# and the special names @c '_particle', @c '_model', @c '_rfac'.
# the special fields have the following meanings:
#
# * @c '_particle': index of the particle in the array.
# the particle index is used to match a calculation result and its original particle.
# it must be preserved during the calculation process.
#
# * @c '_gen': generation number.
# the generation number counts the number of calls to advance_population().
# this field is not used internally.
# the first population is generation 0.
#
# * @c '_model': model number.
# the model number counts the number of calls to advance_particle().
# the field is filled with the current value of model_count whenever the position is changed.
# this field is not used internally.
# the model handlers use it to derive their model ID.
#
# * @c '_rfac': calculated R-factor for this position.
# this field is meaningful in the best and results arrays only
# where it is set by the add_result() method.
# in the pos and vel arrays, the field value is arbitrary.
#
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
# this object is a <em>view</em> of the original array item
## @var vel
# (numpy.ndarray) current the velocities of each particle.
# the structure is the same as for the pos array.
## @var best
# (numpy.ndarray) best positions found by each particle so far.
# the structure is the same as for the pos array.
## @var results
# (numpy.ndarray) all positions and resulting R-factors calculated.
# the structure is the same as for the pos array.
## @var _hold_once
# (bool) hold the population once during the next update.
# if _hold_once is True, advance_population() will skip the update process once.
# this flag is set by setup() because it sets up a valid initial population.
# the caller then doesn't have to care whether to skip advance_population() after setup.
def __init__(self):
"""
initialize the population object.
"""
self.size_req = 0
self.model_start = {}
self.model_min = {}
self.model_max = {}
self.model_step = {}
self.friends = 3
self.momentum = 0.689343
self.attract_local = 1.92694
self.attract_global = 1.92694
self.position_constrain_mode = 'default'
self.velocity_constrain_mode = 'default'
self.generation = 0
self.model_count = 0
self._hold_once = False
self.pos = None
self.vel = None
self.best = None
self.results = None
def pos_gen(self):
"""
generator for dictionaries of the pos array.
the generator can be used to loop over the array.
on each iteration, it yields a dictionary of the position at the current index.
for example,
@code{.py}
for pos in pop.pos_gen():
print pos['_index'], pos['_rfac']
@endcode
"""
return ({name: pos[name] for name in pos.dtype.names} for pos in self.pos)
def vel_gen(self):
"""
generator for dictionaries of the vel array.
@see pos_gen() for details.
"""
return ({name: vel[name] for name in vel.dtype.names} for vel in self.vel)
def best_gen(self):
"""
generator for dictionaries of the best array.
@see pos_gen() for details.
"""
return ({name: best[name] for name in best.dtype.names} for best in self.best)
def results_gen(self):
"""
generator for dictionaries of the results array.
@see pos_gen() for details.
"""
return ({name: results[name] for name in results.dtype.names} for results in self.results)
@staticmethod
def get_model_dtype(model_params):
"""
get numpy array data type for model parameters and swarm control variables.
@param model_params: dictionary of model parameters or list of parameter names.
@return: dtype for use with numpy array constructors.
this is a sorted list of (name, type) tuples.
"""
dt = []
for key in model_params:
dt.append((key, 'f4'))
dt.append(('_particle', 'i4'))
dt.append(('_gen', 'i4'))
dt.append(('_model', 'i4'))
dt.append(('_rfac', 'f4'))
dt.sort(key=lambda t: t[0].lower())
return dt
def setup(self, size, domain, history_file="", recalc_history=True):
"""
set up the population arrays seeded with previous results and the start model.
* set the population parameters and allocate the data arrays.
* set one particle to the initial guess, and the others to positions from a previous results file.
if the file contains less particles than allocated, the remaining particles are initialized randomly.
seeding from a history file can be used to continue an interrupted optimization process.
the method loads the results into the best and position arrays,
and updates the other arrays and variables
so that the population can be advanced and calculated.
by default, the calculations of the previous parameters are repeated.
this is recommended whenever the code, the experimental input, or the project arguments change
because all of them may have an influence on the R-factor.
re-calculation can be turned off by setting recalc_history to false.
this is recommended only if the calculation is a direct continuation of a previous one
without any changes to the code or input.
in that case, the previous results are marked as generation -1 with a negative model number.
upon the first iteration before running the scattering calculations,
new parameters will be derived by the swarm algorithm.
@param size: requested number of particles.
@param domain: definition of initial and limiting model parameters
expected by the cluster and parameters functions.
@arg domain.start: initial guess.
@arg domain.min: minimum values allowed.
@arg domain.max: maximum values allowed. if min == max, the parameter is kept constant.
@arg domain.step: initial velocity (difference between two steps) for particle swarm.
@param history_file: name of the results history file.
this can be a file created by the @ref save_array or @ref save_results methods.
the columns of the plain-text file contain model parameters and
the _rfac values of a previous calculation.
additional columns are ignored.
the first row must contain the column names.
if a parameter column is missing,
the corresponding parameter is seeded with a random value within the domain.
in this case, a warning is added to the log file.
the number of rows does not need to be equal to the population size.
if it is lower, the remaining particles are initialized randomly.
if it is higher, only the ones with the lowest R factors are used.
results with R >= 1.0 are ignored in any case.
@param recalc_history: select whether the R-factors of the historic models are calculated again.
this is useful if the historic data was calculated for a different cluster, different set of parameters,
or different experimental data, and if the R-factors of the new optimization may be systematically greater.
set this argument to False only if the calculation is a continuation of a previous one
without any changes to the code.
@return: None
"""
self.size_req = size
self.model_start = domain.start
self.model_min = domain.min
self.model_max = domain.max
self.model_step = domain.step
# allocate arrays
dt = self.get_model_dtype(self.model_start)
self.pos = np.zeros(self.size_req, dtype=dt)
self.vel = np.zeros(self.size_req, dtype=dt)
self.results = np.empty((0), dtype=dt)
# randomize population
self.generation = 0
self.randomize()
self.pos['_particle'] = np.arange(self.size_req)
self.pos['_gen'] = self.generation
self.pos['_model'] = np.arange(self.size_req)
self.pos['_rfac'] = 2.1
self.model_count = self.size_req
# add previous results
if history_file:
hist = np.genfromtxt(history_file, names=True)
hist = hist[hist['_rfac'] < 1.0]
hist.sort(order='_rfac')
hist_size = min(hist.shape[0], self.size_req - 1)
discarded_fields = {'_particle', '_gen', '_model'}
source_fields = set(hist.dtype.names) - discarded_fields
dest_fields = set(self.pos.dtype.names) - discarded_fields
common_fields = source_fields & dest_fields
if len(common_fields) < len(dest_fields):
logger.warning(BMsg("missing columns in history file {hf} default to random seed value.",
hf=history_file))
for name in common_fields:
self.pos[name][0:hist_size] = hist[name][0:hist_size]
self.pos['_particle'] = np.arange(self.size_req)
logger.info(BMsg("seeding swarm population with {hs} models from history file {hf}.",
hs=hist_size, hf=history_file))
if recalc_history:
self.pos['_gen'] = self.generation
self.pos['_model'] = np.arange(self.size_req)
self.pos['_rfac'] = 2.1
logger.info("historic models will be re-calculated.")
else:
self.pos['_gen'][0:hist_size] = -1
self.pos['_model'][0:hist_size] = -np.arange(hist_size) - 1
self.model_count = self.size_req - hist_size
self.pos['_model'][hist_size:] = np.arange(self.model_count)
logger.info("historic models will not be re-calculated.")
# seed last particle with start parameters
self.seed(self.model_start, index=-1)
# initialize best array
self.best = self.pos.copy()
self._hold_once = True
def randomize(self, pos=True, vel=True):
"""
initializes a random population.
the position array is filled with random values (uniform distribution) from the parameter domain.
velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain.
the method does not update the particle info fields.
@param pos: randomize positions. if False, the positions are not changed.
@param vel: randomize velocities. if False, the velocities are not changed.
"""
if pos:
for key in self.model_start:
self.pos[key] = ((self.model_max[key] - self.model_min[key]) *
np.random.random_sample(self.pos.shape) + self.model_min[key])
if vel:
for key in self.model_start:
self.vel[key] = ((self.model_max[key] - self.model_min[key]) *
(np.random.random_sample(self.pos.shape) - 0.5) / 4.0)
def seed(self, params, index=0):
"""
set the one of the particles to the specified seed values.
the method does not update the particle info fields.
@param params: dictionary of model parameters.
the keys must match the ones of domain.start.
@param index: index of the particle that is seeded.
the index must be in the allowed range of the self.pos array.
0 is the first, -1 the last particle.
"""
for key in params:
self.pos[key][index] = params[key]
def update_particle_info(self, index, inc_model=True):
"""
set the internal particle info fields.
the fields @c _particle, @c _gen, and @c _model are updated with the current values.
@c _rfac is set to the default value 2.1.
this method must be called after each change of particle position.
@param index: (int) particle index.
@param inc_model: (bool) if True, increment the model count afterwards.
@return: None
"""
self.pos['_particle'][index] = index
self.pos['_gen'][index] = self.generation
self.pos['_model'][index] = self.model_count
self.pos['_rfac'][index] = 2.1
if inc_model:
self.model_count += 1
def advance_population(self):
"""
advance the population by one step.
this method just calls advance_particle() for each particle of the population.
if generation is lower than zero, the method increases the generation number but does not advance the particles.
@return: None
"""
if not self._hold_once:
self.generation += 1
for index, __ in enumerate(self.pos):
self.advance_particle(index)
self._hold_once = False
def advance_particle(self, index):
"""
advance a particle by one step.
@param index: index of the particle in the population.
"""
# note: the following two identifiers are views,
# assignment will modify the original array
pos = self.pos[index]
vel = self.vel[index]
# best fit that this individual has seen
xl = self.best[index]
# best fit that a group of others have seen
xg = self.best_friend(index)
for key in self.model_start:
# update velocity
dxl = xl[key] - pos[key]
dxg = xg[key] - pos[key]
pv = np.random.random()
pl = np.random.random()
pg = np.random.random()
vel[key] = (self.momentum * pv * vel[key] +
self.attract_local * pl * dxl +
self.attract_global * pg * dxg)
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.velocity_constrain_mode)
# update position
pos[key] += vel[key]
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.position_constrain_mode)
self.update_particle_info(index)
@staticmethod
def constrain_velocity(_pos, _vel, _min, _max, _mode='default'):
"""
constrain a velocity to the given bounds.
@param _pos: current position of the particle.
@param _vel: new velocity of the particle, i.e. distance to move.
@param _min: lower position boundary.
@param _max: upper position boundary.
_max must be greater or equal to _min.
@param _mode: what to do if a boundary constraint is violated.
reserved for future use. should be set to 'default'.
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
in the current implementation only the velocity may change.
however, in future versions any of these values may change.
"""
d = abs(_max - _min) / 2.0
if d > 0.0:
while abs(_vel) >= d:
_vel /= 2.0
else:
_vel = 0.0
return _pos, _vel, _min, _max
@staticmethod
def constrain_position(_pos, _vel, _min, _max, _mode='default'):
"""
constrain a position to the given bounds.
@param _pos: new position of the particle, possible out of bounds.
@param _vel: velocity of the particle, i.e. distance from the previous position.
_vel must be lower than _max - _min.
@param _min: lower boundary.
@param _max: upper boundary.
_max must be greater or equal to _min.
@param _mode: what to do if a boundary constraint is violated:
@arg 're-enter': re-enter from the opposite side of the parameter interval.
@arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain.
@arg 'scatter': place the particle at a random place between its old position and the violated boundary.
@arg 'stick': place the particle at the violated boundary.
@arg 'expand': move the boundary so that the particle fits.
@arg 'random': place the particle at a random position between the lower and upper boundaries.
@arg 'default': the default mode is 'bounce'. this may change in future versions.
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
depending on the mode, any of these values may change.
the velocity is adjusted to be consistent with the change of position.
"""
_rng = max(_max - _min, 0.0)
_old = _pos - _vel
# prevent undershoot
if _vel > 0.0 and _pos < _min:
_pos = _min
_vel = _pos - _old
if _vel < 0.0 and _pos > _max:
_pos = _max
_vel = _pos - _old
assert abs(_vel) <= _rng, \
"velocity: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
assert (_vel >= 0 and _pos >= _min) or (_vel <= 0 and _pos <= _max), \
"undershoot: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
if _rng > 0.0:
while _pos > _max:
if _mode == 're-enter':
_pos -= _rng
elif _mode == 'bounce' or _mode == 'default':
_pos = _max - (_pos - _max)
_vel = -_vel
elif _mode == 'scatter':
_pos = _old + (_max - _old) * np.random.random()
_vel = _pos - _old
elif _mode == 'stick':
_pos = _max
_vel = _pos - _old
elif _mode == 'expand':
_max = _pos
elif _mode == 'random':
_pos = _min + _rng * np.random.random()
_vel = _pos - _old
else:
raise ValueError('invalid constrain mode')
while _pos < _min:
if _mode == 're-enter':
_pos += _rng
elif _mode == 'bounce' or _mode == 'default':
_pos = _min - (_pos - _min)
_vel = -_vel
elif _mode == 'scatter':
_pos = _old + (_min - _old) * np.random.random()
_vel = _pos - _old
elif _mode == 'stick':
_pos = _min
_vel = _pos - _old
elif _mode == 'expand':
_min = _pos
elif _mode == 'random':
_pos = _min + _rng * np.random.random()
_vel = _pos - _old
else:
raise ValueError('invalid constrain mode')
else:
_pos = _max
_vel = 0.0
return _pos, _vel, _min, _max
# noinspection PyUnusedLocal
def best_friend(self, index):
"""
select the best fit out of a random set of particles
returns the "best friend"
"""
friends = np.random.choice(self.best, self.friends, replace=False)
index = np.argmin(friends['_rfac'])
return friends[index]
def add_result(self, particle, rfac):
"""
add a calculation particle to the results array, and update the best fit array.
@param particle: dictionary of model parameters and particle values.
the keys must correspond to the columns of the pos array,
i.e. the names of the model parameters plus the _rfac, _particle, and _model fields.
@param rfac: calculated R-factor.
the R-factor is written to the '_rfac' field.
@return better (bool): True if the new R-factor is better than the particle's previous best mark.
"""
particle['_rfac'] = rfac
l = [particle[n] for n in self.results.dtype.names]
t = tuple(l)
a = np.asarray(t, dtype=self.results.dtype)
self.results = np.append(self.results, a)
index = particle['_particle']
better = particle['_rfac'] < self.best['_rfac'][index]
if better:
self.best[index] = a
return better
def is_converged(self, tol=0.01):
"""
check whether the population has converged.
convergence is reached when the R-factors of the N latest results,
do not vary more than tol, where N is the size of the population.
@param tol: max. difference allowed between greatest and lowest value of the R factor in the population.
"""
nres = self.results.shape[0]
npop = self.pos.shape[0]
if nres >= npop:
rfac1 = np.min(self.results['_rfac'][-npop:])
rfac2 = np.max(self.results['_rfac'][-npop:])
converg = rfac2 - rfac1 < tol
return converg
else:
return False
def save_array(self, filename, array):
"""
save a population array to a text file.
the columns are space-delimited.
the first line contains the column names.
@param filename: name of destination file, optionally including a path.
@param array: population array to save.
must be one of self.pos, self.vel, self.best, self.results
"""
header = " ".join(self.results.dtype.names)
np.savetxt(filename, array, fmt='%g', header=header)
def load_array(self, filename, array):
"""
load a population array from a text file.
the array to load must be compatible with the current population
(same number of rows, same columns).
the first row must contain column names.
the ordering of columns may be different.
the returned array is ordered according to the array argument.
@param filename: name of source file, optionally including a path.
@param array: population array to load.
must be one of self.pos, self.vel, self.results.
@return array with loaded data.
this may be the same instance as on input.
@raise AssertionError if the number of rows of the two files differ.
"""
data = np.genfromtxt(filename, names=True)
assert data.shape == array.shape
for name in data.dtype.names:
array[name] = data[name]
return array
def save_population(self, base_filename):
"""
save the population array to a set of text files.
the file name extensions are .pos, .vel, and .best
"""
self.save_array(base_filename + ".pos", self.pos)
self.save_array(base_filename + ".vel", self.vel)
self.save_array(base_filename + ".best", self.best)
def load_population(self, base_filename):
"""
load the population array from a set of previously saved text files.
this can be used to continue an optimization job.
the file name extensions are .pos, .vel, and .best.
the files must have the same format as produced by save_population.
the files must have the same number of rows.
"""
self.pos = self.load_array(base_filename + ".pos", self.pos)
self.vel = self.load_array(base_filename + ".vel", self.vel)
self.best = self.load_array(base_filename + ".best", self.best)
def save_results(self, filename):
"""
saves the complete list of calculations results.
"""
self.save_array(filename, self.results)
class ParticleSwarmHandler(handlers.ModelHandler):
"""
model handler which implements the particle swarm optimization algorithm.
"""
## @var _pop (Population)
# holds the population object.
## @var _pop_size (int)
# number of particles in the swarm.
## @var _outfile (file)
# output file for model parametes and R factor.
# the file is open during calculations.
# each calculation result adds one line.
## @var _model_time (timedelta)
# estimated CPU time to calculate one model.
# this value is the maximum time measured of the completed calculations.
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
## @var _converged (bool)
# indicates that the population has converged.
# convergence is detected by calling Population.is_converged().
# once convergence has been reached, this flag is set, and further convergence tests are skipped.
## @var _timeout (bool)
# indicates when the handler has run out of time,
# i.e. time is up before convergence has been reached.
# if _timeout is True, create_tasks() will not create further tasks,
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
## @var _invalid_limit (int)
# maximum tolerated number of invalid calculations.
#
# if the number of invalid calculations (self._invalid_count) exceeds this limit,
# the optimization is aborted.
# the variable is initialized by self.setup() to 10 times the population size.
def __init__(self):
super(ParticleSwarmHandler, self).__init__()
self._pop = None
self._pop_size = 0
self._outfile = None
self._model_time = datetime.timedelta()
self._converged = False
self._timeout = False
self._invalid_limit = 10
def setup(self, project, slots):
"""
initialize the particle swarm and open an output file.
the population size is set to project.pop_size if it is defined and greater than 4.
otherwise, it defaults to <code>max(2 * slots, 4)</code>.
for good efficiency the population size (number of particles) should be
greater or equal to the number of available processing slots,
otherwise the next generation is created before all particles have been calculated
which may slow down convergence.
if calculations take a long time compared to the available computation time
or spawn a lot of sub-tasks due to complex symmetry,
and you prefer to allow for a good number of generations,
you should override the population size.
@param project: project instance.
@param slots: number of calculation processes available through MPI.
@return: None
"""
super(ParticleSwarmHandler, self).setup(project, slots)
_min_size = 4
if project.pop_size:
self._pop_size = max(project.pop_size, _min_size)
else:
self._pop_size = max(self._slots * 2, _min_size)
self._pop = Population()
self._pop.setup(self._pop_size, self._project.create_domain(), self._project.history_file,
self._project.recalc_history)
self._invalid_limit = self._pop_size * 10
self._outfile = open(self._project.output_file + ".dat", "w")
self._outfile.write("# ")
self._outfile.write(" ".join(self._pop.results.dtype.names))
self._outfile.write("\n")
return None
def cleanup(self):
self._outfile.close()
super(ParticleSwarmHandler, self).cleanup()
def create_tasks(self, parent_task):
"""
develop the particle population and create a calculation task per particle.
this method advances the population by one step.
it generates one task for each particle if its model number is positive.
negative model numbers indicate that the particle is used for seeding
and does not need to be calculated in the first generation.
if the time limit is approaching, no new tasks are created.
the process loop calls this method every time the length of the task queue drops
below the number of calculation processes (slots).
this means in particular that a population will not be completely calculated
before the next generation starts.
for efficiency reasons, we do not wait until a population is complete.
this will cause a certain mixing of generations and slow down convergence
because the best peer position in the generation may not be known yet.
the effect can be reduced by making the population larger than the number of processes.
@return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
"""
super(ParticleSwarmHandler, self).create_tasks(parent_task)
# this is the top-level handler, we expect just one parent: root.
parent_id = parent_task.id
assert parent_id == (-1, -1, -1, -1, -1)
self._parent_tasks[parent_id] = parent_task
time_pending = self._model_time * len(self._pending_tasks)
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
out_tasks = []
if not self._timeout and not self._converged:
self._pop.advance_population()
for pos in self._pop.pos_gen():
time_pending += self._model_time
if time_pending > time_avail:
self._timeout = True
logger.info("time limit reached")
break
if pos['_model'] >= 0:
new_task = parent_task.copy()
new_task.parent_id = parent_id
new_task.model = pos
new_task.change_id(model=pos['_model'])
child_id = new_task.id
self._pending_tasks[child_id] = new_task
out_tasks.append(new_task)
return out_tasks
def add_result(self, task):
"""
calculate the R factor of the result and add it to the results list of the population.
* save the current population.
* append the result to the result output file.
* update the execution time statistics.
* remove temporary files if requested.
* check whether the population has converged.
@return parent task (CalculationTask) if the optimization has converged, @c None otherwise.
"""
super(ParticleSwarmHandler, self).add_result(task)
self._complete_tasks[task.id] = task
del self._pending_tasks[task.id]
parent_task = self._parent_tasks[task.parent_id]
rfac = 1.0
if task.result_valid:
try:
rfac = self._project.calc_rfactor(task)
except ValueError:
task.result_valid = False
self._invalid_count += 1
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
task.model['_rfac'] = rfac
self._pop.add_result(task.model, rfac)
self._pop.save_population(self._project.output_file + ".pop")
if self._outfile:
s = (str(task.model[name]) for name in self._pop.results.dtype.names)
self._outfile.write(" ".join(s))
self._outfile.write("\n")
self._outfile.flush()
self._project.files.update_model_rfac(task.id.model, rfac)
self._project.files.set_model_complete(task.id.model, True)
if task.result_valid:
if self._pop.is_converged() and not self._converged:
logger.info("population converged")
self._converged = True
if task.time > self._model_time:
self._model_time = task.time
else:
if self._invalid_count >= self._invalid_limit:
logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)
self._converged = True
# optimization complete?
if (self._timeout or self._converged) and len(self._pending_tasks) == 0:
del self._parent_tasks[parent_task.id]
else:
parent_task = None
self.cleanup_files(keep=self._pop_size)
return parent_task