update public distribution
based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100 tagged rev-master-2.0.0
This commit is contained in:
@ -8,10 +8,17 @@ python pmsco [pmsco-arguments]
|
||||
@endverbatim
|
||||
"""
|
||||
|
||||
import pmsco
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import os.path
|
||||
|
||||
file_dir = os.path.dirname(__file__) or '.'
|
||||
root_dir = os.path.join(file_dir, '..')
|
||||
root_dir = os.path.abspath(root_dir)
|
||||
sys.path[0] = root_dir
|
||||
|
||||
if __name__ == '__main__':
|
||||
args, unknown_args = pmsco.parse_cli()
|
||||
pmsco.main_pmsco(args, unknown_args)
|
||||
sys.exit(0)
|
||||
import pmsco.pmsco
|
||||
pmsco.pmsco.main()
|
||||
|
0
pmsco/calculators/__init__.py
Normal file
0
pmsco/calculators/__init__.py
Normal file
@ -1,5 +1,5 @@
|
||||
"""
|
||||
@package pmsco.calculator
|
||||
@package pmsco.calculators.calculator
|
||||
abstract scattering program interface.
|
||||
|
||||
this module declares the basic interface to scattering programs.
|
||||
@ -11,17 +11,21 @@ TestCalcInterface is provided for testing the PMSCO code quickly without calling
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
import data as md
|
||||
import cluster as mc
|
||||
|
||||
import pmsco.data as md
|
||||
|
||||
__author__ = 'matthias muntwiler'
|
||||
|
||||
@ -38,57 +42,24 @@ class Calculator(object):
|
||||
or <code>output_file + '.etpai'</code> depending on scan mode.
|
||||
all other intermediate files are deleted unless keep_temp_files is True.
|
||||
|
||||
@param params: a msco_project.Params() object with all necessary values except cluster and output files set.
|
||||
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
|
||||
|
||||
@param cluster: a msco_cluster.Cluster() object with all atom positions set.
|
||||
@param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
|
||||
|
||||
@param scan: a msco_project.Scan() object describing the experimental scanning scheme.
|
||||
@param scan: a pmsco.project.Scan() object describing the experimental scanning scheme.
|
||||
|
||||
@param output_file: base name for all intermediate and output files
|
||||
|
||||
@return: result_file, files_cats
|
||||
@arg result_file is the name of the main ETPI or ETPAI result file to be further processed.
|
||||
@arg files_cats is a dictionary that lists the names of all created data files with their category.
|
||||
@return: (str, dict) result_file, and dictionary of created files {filename: category}
|
||||
|
||||
@return: (str, dict) result_file, and dictionary of created files.
|
||||
@arg the first element is the name of the main ETPI or ETPAI result file to be further processed.
|
||||
@arg the second element is a dictionary that lists the names of all created data files with their category.
|
||||
the dictionary key is the file name,
|
||||
the value is the file category (cluster, phase, etc.).
|
||||
"""
|
||||
return None, None
|
||||
|
||||
def check_cluster(self, cluster, output_file):
|
||||
"""
|
||||
export the cluster in XYZ format for reference.
|
||||
|
||||
along with the complete cluster, the method also saves cuts in the xz (extension .y.xyz) and yz (.x.xyz) plane.
|
||||
|
||||
@param cluster: a pmsco.cluster.Cluster() object with all atom positions set.
|
||||
|
||||
@param output_file: base name for all intermediate and output files
|
||||
|
||||
@return: dictionary listing the names of the created files with their category.
|
||||
the dictionary key is the file name,
|
||||
the value is the file category (cluster).
|
||||
|
||||
@warning experimental: this method may be moved elsewhere in a future version.
|
||||
"""
|
||||
xyz_filename = output_file + ".xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
files = {xyz_filename: 'cluster'}
|
||||
|
||||
clucut = mc.Cluster()
|
||||
clucut.copy_from(cluster)
|
||||
clucut.trim_slab("x", 0.0, 0.1)
|
||||
xyz_filename = output_file + ".x.xyz"
|
||||
clucut.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
files[xyz_filename] = 'cluster'
|
||||
|
||||
clucut.copy_from(cluster)
|
||||
clucut.trim_slab("y", 0.0, 0.1)
|
||||
xyz_filename = output_file + ".y.xyz"
|
||||
clucut.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
files[xyz_filename] = 'cluster'
|
||||
|
||||
return files
|
||||
|
||||
|
||||
class TestCalculator(Calculator):
|
||||
"""
|
||||
@ -127,5 +98,5 @@ class TestCalculator(Calculator):
|
||||
|
||||
md.save_data(etpi_filename, result_etpi)
|
||||
|
||||
files = {clu_filename: 'cluster', etpi_filename: 'energy'}
|
||||
files = {clu_filename: 'cluster', etpi_filename: 'region'}
|
||||
return etpi_filename, files
|
@ -1,25 +1,30 @@
|
||||
"""
|
||||
@package pmsco.edac_calculator
|
||||
@package pmsco.calculators.edac
|
||||
Garcia de Abajo EDAC program interface.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
import os
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
import calculator
|
||||
import data as md
|
||||
import cluster as mc
|
||||
import edac.edac as edac
|
||||
import os
|
||||
|
||||
import pmsco.calculators.calculator as calculator
|
||||
from pmsco.compat import open
|
||||
import pmsco.data as md
|
||||
import pmsco.cluster as mc
|
||||
import pmsco.edac.edac as edac
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -44,18 +49,19 @@ class EdacCalculator(calculator.Calculator):
|
||||
|
||||
if alpha is defined, theta is implicitly set to normal emission! (to be generalized)
|
||||
|
||||
TODO: some parameters are still hard-coded.
|
||||
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
|
||||
|
||||
@param scan: a pmsco.project.Scan() object describing the experimental scanning scheme.
|
||||
|
||||
@param filepath: (str) name and path of the file to be created.
|
||||
|
||||
"""
|
||||
with open(filepath, "w") as f:
|
||||
f.write("verbose off\n")
|
||||
f.write("cluster input %s\n" % (params.cluster_file))
|
||||
f.write("emitters %u l(A)\n" % (len(params.emitters)))
|
||||
f.write("cluster input {0}\n".format(params.cluster_file))
|
||||
f.write("emitters {0:d} l(A)\n".format(len(params.emitters)))
|
||||
for em in params.emitters:
|
||||
f.write("%g %g %g %u\n" % em)
|
||||
#for iat in range(params.atom_types):
|
||||
#pf = params.phase_file[iat]
|
||||
#pf = pf.replace(".pha", ".edac.pha")
|
||||
#f.write("scatterer %u %s\n" % (params.atomic_number[iat], pf))
|
||||
f.write("{0:f} {1:f} {2:f} {3:d}\n".format(*em))
|
||||
|
||||
en = scan.energies + params.work_function
|
||||
en_min = en.min()
|
||||
@ -106,7 +112,7 @@ class EdacCalculator(calculator.Calculator):
|
||||
assert th_num < th.shape[0] * 10, \
|
||||
"linearization of theta scan causes excessive oversampling {0}/{1}".format(th_num, th.shape[0])
|
||||
|
||||
f.write("beta {0}\n".format(params.polar_incidence_angle, params.azimuthal_incidence_angle))
|
||||
f.write("beta {0}\n".format(params.polar_incidence_angle))
|
||||
f.write("incidence {0} {1}\n".format(params.polar_incidence_angle, params.azimuthal_incidence_angle))
|
||||
f.write("emission angle theta {th0:f} {th1:f} {nth:d}\n".format(th0=th_min, th1=th_max, nth=th_num))
|
||||
|
||||
@ -136,35 +142,49 @@ class EdacCalculator(calculator.Calculator):
|
||||
f.write("initial state {0}\n".format(params.initial_state))
|
||||
polarizations = {'H': 'LPx', 'V': 'LPy', 'L': 'LCP', 'R': 'RCP'}
|
||||
f.write("polarization {0}\n".format(polarizations[params.polarization]))
|
||||
f.write("muffin-tin\n")
|
||||
f.write("V0 E(eV) {0}\n".format(params.inner_potential))
|
||||
f.write("cluster surface l(A) {0}\n".format(params.z_surface))
|
||||
|
||||
scatterers = ["scatterer {at} {fi}\n".format(at=at, fi=fi)
|
||||
for (at, fi) in params.phase_files.items()
|
||||
if os.path.isfile(fi)]
|
||||
if scatterers:
|
||||
for scat in scatterers:
|
||||
f.write(scat)
|
||||
else:
|
||||
f.write("muffin-tin\n")
|
||||
|
||||
f.write("V0 E(eV) {0:f}\n".format(params.inner_potential))
|
||||
f.write("cluster surface l(A) {0:f}\n".format(params.z_surface))
|
||||
f.write("imfp SD-UC\n")
|
||||
f.write("temperature %g %g\n" % (params.experiment_temperature, params.debye_temperature))
|
||||
f.write("temperature {0:f} {1:f}\n".format(params.experiment_temperature, params.debye_temperature))
|
||||
f.write("iteration recursion\n")
|
||||
f.write("dmax l(A) %g\n" % (params.dmax))
|
||||
f.write("lmax %u\n" % (params.lmax))
|
||||
f.write("orders %u " % (len(params.orders)))
|
||||
for order in params.orders:
|
||||
f.write("%u " % (order))
|
||||
f.write("\n")
|
||||
f.write("emission angle window 1\n")
|
||||
f.write("scan pd %s\n" % (params.output_file))
|
||||
f.write("dmax l(A) {0:f}\n".format(params.dmax))
|
||||
f.write("lmax {0:d}\n".format(params.lmax))
|
||||
f.write("orders {0:d} ".format(len(params.orders)))
|
||||
f.write(" ".join(format(order, "d") for order in params.orders) + "\n")
|
||||
f.write("emission angle window {0:F}\n".format(params.angular_resolution / 2.0))
|
||||
# f.write("cluster output l(A) out.clu")
|
||||
# problems:
|
||||
# - muffin-tin relabels atoms
|
||||
# - there can be multiple atom types for the same chemical element
|
||||
# - we have to compare coordinates to find the mapping between input and output cluster
|
||||
# f.write("scan scatterer i phase-shifts i.pha")
|
||||
# f.write("scan scatterer i potential i.pot")
|
||||
f.write("scan pd {0}\n".format(params.output_file))
|
||||
f.write("end\n")
|
||||
|
||||
def run(self, params, cluster, scan, output_file):
|
||||
"""
|
||||
run EDAC with the given parameters and cluster.
|
||||
|
||||
@param params: a msc_param.Params() object with all necessary values except cluster and output files set.
|
||||
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
|
||||
|
||||
@param cluster: a msc_cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
|
||||
@param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
|
||||
|
||||
@param scan: a msco_project.Scan() object describing the experimental scanning scheme.
|
||||
@param scan: a pmsco.project.Scan() object describing the experimental scanning scheme.
|
||||
|
||||
@param output_file: base name for all intermediate and output files
|
||||
|
||||
@return: result_file, files_cats
|
||||
@return: (str, dict) result_file, and dictionary of created files {filename: category}
|
||||
"""
|
||||
|
||||
# set up scan
|
||||
@ -213,8 +233,14 @@ class EdacCalculator(calculator.Calculator):
|
||||
pass
|
||||
result_etpi = md.interpolate_hemi_scan(result_etpi, hemi_tpi)
|
||||
|
||||
if result_etpi.shape[0] != scan.raw_data.shape[0]:
|
||||
logger.error("scan length mismatch: EDAC result: %u, scan data: %u", result_etpi.shape[0], scan.raw_data.shape[0])
|
||||
if params.fixed_cluster:
|
||||
expected_shape = max(scan.energies.shape[0], 1) * max(scan.alphas.shape[0], 1)
|
||||
else:
|
||||
expected_shape = max(scan.energies.shape[0], 1) * max(scan.phis.shape[0], scan.thetas.shape[0], 1)
|
||||
if result_etpi.shape[0] != expected_shape:
|
||||
logger.warning(BMsg("possible scan length mismatch: EDAC result: {result}, expected: {expected}",
|
||||
result=result_etpi.shape[0], expected=expected_shape))
|
||||
|
||||
logger.debug("save result to file %s", etpi_filename)
|
||||
md.save_data(etpi_filename, result_etpi)
|
||||
|
@ -13,9 +13,12 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import calculator
|
||||
import data as md
|
||||
import msc.msc as msc
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import pmsco.calculators.calculator as calculator
|
||||
import pmsco.data as md
|
||||
import pmsco.msc.msc as msc
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -32,7 +35,7 @@ class MscCalculator(calculator.Calculator):
|
||||
f.write(" %s\n" % (params.polarization) )
|
||||
f.write(" %4u\n" % (params.scattering_level) )
|
||||
f.write(" %7.2f%7.2f\n" % (params.fcut, params.cut) )
|
||||
f.write(" %12.6f\n" % (params.angular_broadening) )
|
||||
f.write(" %12.6f\n" % (params.angular_resolution) )
|
||||
f.write(" %12.6f\n" % (params.lattice_constant) )
|
||||
f.write(" %12.6f\n" % (params.z_surface) )
|
||||
f.write(" %4u\n" % (params.atom_types) )
|
259
pmsco/cluster.py
259
pmsco/cluster.py
@ -14,12 +14,17 @@ pip install --user periodictable
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import periodictable as pt
|
||||
import sys
|
||||
|
||||
## default file format identifier
|
||||
FMT_DEFAULT = 0
|
||||
@ -30,33 +35,40 @@ FMT_EDAC = 2
|
||||
## XYZ file format identifier
|
||||
FMT_XYZ = 3
|
||||
|
||||
# python version dependent type of chemical symbol
|
||||
if sys.version_info[0] >= 3:
|
||||
_SYMBOL_TYPE = 'U2'
|
||||
else:
|
||||
_SYMBOL_TYPE = 'S2'
|
||||
|
||||
## numpy.array datatype of Cluster.data array
|
||||
DTYPE_CLUSTER_INTERNAL = [('i','i4'), ('t','i4'), ('s','a2'), ('x','f4'), ('y','f4'), ('z','f4'), ('e','u1')]
|
||||
DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
|
||||
('e', 'u1')]
|
||||
## file format of internal Cluster.data array
|
||||
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
|
||||
## field (column) names of internal Cluster.data array
|
||||
FIELDS_CLUSTER_INTERNAL = ['i','t','s','x','y','z','e']
|
||||
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'x', 'y', 'z', 'e']
|
||||
|
||||
## numpy.array datatype of cluster for MSC cluster file input/output
|
||||
DTYPE_CLUSTER_MSC = [('i','i4'), ('x','f4'), ('y','f4'), ('z','f4'), ('t','i4')]
|
||||
DTYPE_CLUSTER_MSC = [('i', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('t', 'i4')]
|
||||
## file format of MSC cluster file
|
||||
FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
|
||||
## field (column) names of MSC cluster file
|
||||
FIELDS_CLUSTER_MSC = ['i','x','y','z','t']
|
||||
FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']
|
||||
|
||||
## numpy.array datatype of cluster for EDAC cluster file input/output
|
||||
DTYPE_CLUSTER_EDAC= [('i','i4'), ('t','i4'), ('x','f4'), ('y','f4'), ('z','f4')]
|
||||
DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
|
||||
## file format of EDAC cluster file
|
||||
FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
|
||||
## field (column) names of EDAC cluster file
|
||||
FIELDS_CLUSTER_EDAC = ['i','t','x','y','z']
|
||||
FIELDS_CLUSTER_EDAC = ['i', 't', 'x', 'y', 'z']
|
||||
|
||||
## numpy.array datatype of cluster for XYZ file input/output
|
||||
DTYPE_CLUSTER_XYZ= [('s','a2'), ('x','f4'), ('y','f4'), ('z','f4')]
|
||||
DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
|
||||
## file format of XYZ cluster file
|
||||
FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
|
||||
## field (column) names of XYZ cluster file
|
||||
FIELDS_CLUSTER_XYZ = ['s','x','y','z']
|
||||
FIELDS_CLUSTER_XYZ = ['s', 'x', 'y', 'z']
|
||||
|
||||
|
||||
class Cluster(object):
|
||||
@ -137,7 +149,7 @@ class Cluster(object):
|
||||
"""
|
||||
Copy the data from another cluster.
|
||||
|
||||
@param cluster (Cluster): other Cluster object.
|
||||
@param cluster: (Cluster) other Cluster object.
|
||||
"""
|
||||
self.data = cluster.data.copy()
|
||||
|
||||
@ -164,10 +176,10 @@ class Cluster(object):
|
||||
|
||||
@param x, y, z: (float) atom coordinates in the cluster
|
||||
|
||||
@param emitter: (uint) 1 = emitter, 0 = regular
|
||||
@param emitter: (int or bool) True = emitter, False = scatterer
|
||||
"""
|
||||
symbol = pt.elements[element_number].symbol
|
||||
element = (index, element_number, symbol, x, y, z, emitter)
|
||||
element = (index, element_number, symbol, x, y, z, int(emitter))
|
||||
return element
|
||||
|
||||
def add_atom(self, atomtype, v_pos, is_emitter):
|
||||
@ -178,10 +190,10 @@ class Cluster(object):
|
||||
|
||||
@param v_pos: (numpy.ndarray, shape = (3)) position vector
|
||||
|
||||
@param is_emitter: (uint) 1 = emitter, 0 = regular
|
||||
@param is_emitter: (int or bool) True = emitter, False = scatterer
|
||||
"""
|
||||
n0 = self.data.shape[0] + 1
|
||||
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter)
|
||||
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], int(is_emitter))
|
||||
self.data = np.append(self.data, np.array(element,
|
||||
dtype=self.data.dtype))
|
||||
|
||||
@ -342,6 +354,29 @@ class Cluster(object):
|
||||
|
||||
return idx
|
||||
|
||||
def translate(self, vector, element=0):
|
||||
"""
|
||||
translate the cluster or all atoms of a specified element.
|
||||
|
||||
@param vector: (numpy.ndarray) 3-dimensional displacement vector.
|
||||
@param element: (int) chemical element number if atoms of a specific element should be affected.
|
||||
by default (element = 0), all atoms are moved.
|
||||
@return: (numpy.ndarray) indices of the atoms that have been shifted.
|
||||
"""
|
||||
if element:
|
||||
try:
|
||||
sel = self.data['t'] == int(element)
|
||||
except ValueError:
|
||||
sel = self.data['s'] == element
|
||||
else:
|
||||
sel = np.ones_like(self.data['t'])
|
||||
idx = np.where(sel)
|
||||
self.data['x'][idx] += vector[0]
|
||||
self.data['y'][idx] += vector[1]
|
||||
self.data['z'][idx] += vector[2]
|
||||
|
||||
return idx
|
||||
|
||||
def matrix_transform(self, matrix):
|
||||
"""
|
||||
apply a transformation matrix to each atom of the cluster.
|
||||
@ -474,6 +509,31 @@ class Cluster(object):
|
||||
self.data = self.data[idx]
|
||||
self.update_index()
|
||||
|
||||
def trim_paraboloid(self, rxy, z0):
|
||||
"""
|
||||
remove atoms outside a given paraboloid.
|
||||
|
||||
the paraboloid is defined by z(r) = z0 * (1 - r**2 / rxy**2),
|
||||
where r**2 = x**2 + y**2.
|
||||
its apex is at (0, 0, z0).
|
||||
z(r) = 0 for x**2 + y**2 = rxy**2.
|
||||
|
||||
the coordinates (x,y,z) of atoms to keep must match z >= z(r).
|
||||
|
||||
@param rxy: (float) radius of the paraboloid at z = 0.
|
||||
|
||||
@param z0: (float) vertical coordinate of the apex (at x = y = 0).
|
||||
in the usual cluster layout where the surface is near z = 0 and the atoms at lower z,
|
||||
this value is negative.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
rsq = self.data['x']**2 + self.data['y']**2
|
||||
pz = z0 * (1. - rsq / rxy**2)
|
||||
idx = np.where(self.data['z'] >= pz)
|
||||
self.data = self.data[idx]
|
||||
self.update_index()
|
||||
|
||||
def trim_sphere(self, radius):
|
||||
"""
|
||||
remove atoms outside a given sphere.
|
||||
@ -637,7 +697,7 @@ class Cluster(object):
|
||||
"""
|
||||
idx = self.data['e'] != 0
|
||||
ems = self.data[['x', 'y', 'z', 't']][idx]
|
||||
return map(tuple, ems)
|
||||
return [tuple(em) for em in ems]
|
||||
|
||||
def get_emitter_count(self):
|
||||
"""
|
||||
@ -702,7 +762,7 @@ class Cluster(object):
|
||||
else:
|
||||
self.data['e'] = 0
|
||||
|
||||
pos = self.positions()
|
||||
pos = self.get_positions()
|
||||
# note: np.linalg.norm does not accept axis argument in version 1.7
|
||||
# (check np.version.version)
|
||||
norm = np.sqrt(np.sum(pos**2, axis=1))
|
||||
@ -739,13 +799,14 @@ class Cluster(object):
|
||||
"""
|
||||
self.data['i'] = np.arange(1, self.data.shape[0] + 1)
|
||||
|
||||
def save_to_file(self, f, fmt=FMT_DEFAULT, comment=""):
|
||||
def save_to_file(self, f, fmt=FMT_DEFAULT, comment="", emitters_only=False):
|
||||
"""
|
||||
save the cluster to a file which can be read by the scattering program.
|
||||
|
||||
the method updates the atom index because some file formats require an index column.
|
||||
|
||||
@param f: (string/handle) path name or open file handle of the cluster file.
|
||||
if the filename ends in .gz, the file is saved in compressed gzip format
|
||||
|
||||
@param fmt: (int) file format.
|
||||
must be one of the FMT_ constants.
|
||||
@ -755,7 +816,10 @@ class Cluster(object):
|
||||
not used in other file formats.
|
||||
by default, self.comment is used.
|
||||
|
||||
@remark if the filename ends in .gz, the file is saved in compressed gzip format
|
||||
@param emitters_only: (bool) if True, only atoms marked as emitters are saved.
|
||||
by default, all atoms are saved.
|
||||
|
||||
@return None
|
||||
"""
|
||||
if fmt == FMT_DEFAULT:
|
||||
fmt = self.file_format
|
||||
@ -763,6 +827,13 @@ class Cluster(object):
|
||||
if not comment:
|
||||
comment = self.comment
|
||||
|
||||
self.update_index()
|
||||
if emitters_only:
|
||||
idx = self.data['e'] != 0
|
||||
data = self.data[idx]
|
||||
else:
|
||||
data = self.data
|
||||
|
||||
if fmt == FMT_MSC:
|
||||
file_format = FMT_CLUSTER_MSC
|
||||
fields = FIELDS_CLUSTER_MSC
|
||||
@ -770,16 +841,158 @@ class Cluster(object):
|
||||
elif fmt == FMT_EDAC:
|
||||
file_format = FMT_CLUSTER_EDAC
|
||||
fields = FIELDS_CLUSTER_EDAC
|
||||
header = "%u l(A)" % (self.data.shape[0])
|
||||
header = "{nat} l(A)".format(nat=data.shape[0])
|
||||
elif fmt == FMT_XYZ:
|
||||
file_format = FMT_CLUSTER_XYZ
|
||||
fields = FIELDS_CLUSTER_XYZ
|
||||
header = "{0}\n{1}".format(self.data.shape[0], comment)
|
||||
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
|
||||
else:
|
||||
file_format = FMT_CLUSTER_XYZ
|
||||
fields = FIELDS_CLUSTER_XYZ
|
||||
header = "{0}\n{1}".format(self.data.shape[0], comment)
|
||||
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
|
||||
|
||||
self.update_index()
|
||||
data = self.data[fields]
|
||||
data = data[fields]
|
||||
np.savetxt(f, data, fmt=file_format, header=header, comments="")
|
||||
|
||||
|
||||
class ClusterGenerator(object):
|
||||
"""
|
||||
cluster generator class.
|
||||
|
||||
this class bundles the cluster methods in one place
|
||||
so that it's easier to exchange them for different kinds of clusters.
|
||||
|
||||
the project must override at least the create_cluster method.
|
||||
if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
"""
|
||||
initialize the cluster generator.
|
||||
|
||||
@param project: reference to the project object.
|
||||
cluster generators may need to look up project parameters.
|
||||
"""
|
||||
self.project = project
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
return the number of emitter configurations for a particular model, scan and symmetry.
|
||||
|
||||
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
|
||||
by default, the method returns 1, which means that there is only one emitter configuration.
|
||||
|
||||
emitter configurations are mainly a way to distribute the calculations to multiple processes
|
||||
since the resulting diffraction patterns add up incoherently.
|
||||
for this to work, the create_cluster() method must pay attention to the emitter index
|
||||
and generate either a full cluster with all emitters (single process)
|
||||
or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
|
||||
whether all emitters are calculated in one or multiple processes is decided at run-time
|
||||
based on the available resources.
|
||||
|
||||
note that this function returns the number of _configurations_ not _atoms_.
|
||||
an emitter configuration (declared in a Cluster) may include more than one atom.
|
||||
it is up to the project, what is included in a particular configuration.
|
||||
|
||||
to enable multiple emitter configurations, the derived project class must override this method
|
||||
and return a number greater than 1.
|
||||
|
||||
@note in some cases it may be most efficient to call create_cluster and
|
||||
return Cluster.get_emitter_count() of the generated cluster.
|
||||
this is possible because the method is called with emitter index -1.
|
||||
model and index can be passed unchanged to create_cluster.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index must be -1.
|
||||
|
||||
@return number of emitter configurations.
|
||||
this implementation returns the default value of 1.
|
||||
"""
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
create a Cluster object given the model parameters and calculation index.
|
||||
|
||||
the generated cluster will typically depend on the model parameters.
|
||||
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
|
||||
|
||||
the scan index can be used to generate a different cluster for different scan geometry,
|
||||
e.g., if some atoms can be excluded due to a longer mean free path.
|
||||
if this is not the case for the specific project, the scan index can be ignored.
|
||||
|
||||
the symmetry index may select a particular domain that has a different atomic arrangement.
|
||||
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
|
||||
to the particular domain/symmetry.
|
||||
the method can ignore the symmetry index if the project defines only one symmetry,
|
||||
or if the symmetry does not correspond to a different atomic structure.
|
||||
|
||||
the emitter index selects a particular emitter configuration.
|
||||
depending on the value of the emitter index, the method must react differently:
|
||||
|
||||
1. if the value is -1, return the full cluster and mark all inequivalent emitter atoms.
|
||||
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
|
||||
the full diffraction scan will be calculated in one calculation.
|
||||
|
||||
2. if the value is greater or equal to zero, generate the cluster with the emitter configuration
|
||||
selected by the emitter index.
|
||||
the index is in the range between 0 and the return value of count_emitters() minus 1.
|
||||
the results of the individual emitter calculations are summed up in combine_emitters().
|
||||
|
||||
the code should ideally be written such that either case yields the same diffraction result.
|
||||
if count_emitters() always returns 1 (default), the second case does not have to be implemented,
|
||||
and the method can ignore the emitter index.
|
||||
|
||||
the method must ignore the region index.
|
||||
|
||||
if the creation of a cluster fails due to invalid model parameters,
|
||||
rather than raising an exception or constraining values,
|
||||
the method should send an error message to the logger and return an empty cluster or None.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index.
|
||||
if -1, generate the full cluster and mark all emitters.
|
||||
if greater or equal to zero, the value is a zero-based index of the emitter configuration.
|
||||
|
||||
@return None.
|
||||
sub-classes must return a valid Cluster object or None in case of failure.
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
class LegacyClusterGenerator(ClusterGenerator):
|
||||
"""
|
||||
cluster generator class for projects that don't declare a generator.
|
||||
|
||||
in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
|
||||
this class redirects generator calls to the project methods
|
||||
providing compatibility to older project code.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
super(LegacyClusterGenerator, self).__init__(project)
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method if implemented.
|
||||
"""
|
||||
try:
|
||||
return self.project.count_emitters(model, index)
|
||||
except AttributeError:
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method.
|
||||
"""
|
||||
return self.project.create_cluster(model, index)
|
||||
|
40
pmsco/compat.py
Normal file
40
pmsco/compat.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""
|
||||
@package pmsco.compat
|
||||
compatibility code
|
||||
|
||||
code bits to provide compatibility for different python versions.
|
||||
currently supported 2.7 and 3.6.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from io import open as io_open
|
||||
|
||||
|
||||
def open(fname, mode='r', encoding='latin1'):
|
||||
"""
|
||||
open a data file for read/write/append using the default str type
|
||||
|
||||
this is a drop-in for io.open
|
||||
where data is exchanged via the built-in str type of python,
|
||||
whether this is a byte string (python 2) or unicode string (python 3).
|
||||
|
||||
the file is assumed to be a latin-1 encoded binary file.
|
||||
|
||||
@param fname: file name and path
|
||||
@param mode: 'r', 'w' or 'a'
|
||||
@param encoding: 'latin1' (default), 'ascii' or 'utf-8'
|
||||
@return file handle
|
||||
"""
|
||||
if isinstance(b'b', str):
|
||||
# python 2
|
||||
mode += 'b'
|
||||
kwargs = {}
|
||||
else:
|
||||
# python 3
|
||||
mode += 't'
|
||||
kwargs = {'encoding': encoding}
|
||||
|
||||
return io_open(fname, mode, **kwargs)
|
@ -1,21 +1,29 @@
|
||||
"""
|
||||
@package pmsco.data
|
||||
import, export, evaluation of msc data
|
||||
import, export, evaluation of msc data.
|
||||
|
||||
this module provides common functions for loading/saving and manipulating PED scan data sets.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-17 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import os
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
import os
|
||||
import scipy.optimize as so
|
||||
import loess.loess as loess
|
||||
|
||||
from pmsco.compat import open
|
||||
import pmsco.loess.loess as loess
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -149,7 +157,7 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_
|
||||
data[i]['i'] = selected intensity column
|
||||
@endverbatim
|
||||
"""
|
||||
with open(filename, 'r') as f:
|
||||
with open(filename, "r") as f:
|
||||
header1 = f.readline().strip()
|
||||
header2 = f.readline().strip()
|
||||
if not header1 == '--- scan PD':
|
||||
@ -259,19 +267,38 @@ def load_data(filename, dtype=None):
|
||||
DTYPE_EI, DTYPE_ETPI, DTYPE_ETPIS, DTYPE_ETPAI, or DTYPE_ETPAIS.
|
||||
by default, the function uses the extension to determine the data type.
|
||||
the actual type can be read from the dtype attribute of the returned array.
|
||||
if the extension is missing, DTYPE_EI is assumed.
|
||||
|
||||
@return one-dimensional numpy structured ndarray with data
|
||||
|
||||
@raise IOError if the file cannot be read.
|
||||
|
||||
@raise IndexError if the number of columns is lower than expected based on the dtype or extension.
|
||||
"""
|
||||
if not dtype:
|
||||
(root, ext) = os.path.splitext(filename)
|
||||
datatype = ext[1:].upper()
|
||||
dtype = DTYPES[datatype]
|
||||
ext_type = ext[1:].upper()
|
||||
try:
|
||||
dtype = DTYPES[ext_type]
|
||||
except KeyError:
|
||||
dtype = DTYPE_EI
|
||||
|
||||
data = np.loadtxt(filename, dtype=dtype)
|
||||
sort_data(data)
|
||||
return data
|
||||
|
||||
|
||||
def format_extension(data):
|
||||
"""
|
||||
format the file extension based on the contents of an array.
|
||||
|
||||
@param data ETPI-like structured numpy.ndarray.
|
||||
|
||||
@return: file extension string including the leading period.
|
||||
"""
|
||||
return "." + "".join(data.dtype.names)
|
||||
|
||||
|
||||
def save_data(filename, data):
|
||||
"""
|
||||
save column data (ETPI, and the like) to a text file.
|
||||
@ -331,20 +358,24 @@ def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
|
||||
undefined fields are initialized to zero.
|
||||
if the parameter is unspecified, all fields are initialized to zero.
|
||||
|
||||
@return: re-structured numpy array
|
||||
@return: re-structured numpy array or
|
||||
@c data if the new and original data types are the same.
|
||||
"""
|
||||
new_data = np.zeros(data.shape, dtype=dtype)
|
||||
fields = [dt[0] for dt in dtype if dt[0] in data.dtype.names]
|
||||
if data.dtype == dtype:
|
||||
return data
|
||||
else:
|
||||
new_data = np.zeros(data.shape, dtype=dtype)
|
||||
fields = [dt[0] for dt in dtype if dt[0] in data.dtype.names]
|
||||
|
||||
if defaults is not None:
|
||||
for field, value in defaults.iteritems():
|
||||
if field in new_data.dtype.names:
|
||||
new_data[field] = value
|
||||
if defaults is not None:
|
||||
for field, value in defaults.items():
|
||||
if field in new_data.dtype.names:
|
||||
new_data[field] = value
|
||||
|
||||
for field in fields:
|
||||
new_data[field] = data[field]
|
||||
for field in fields:
|
||||
new_data[field] = data[field]
|
||||
|
||||
return new_data
|
||||
return new_data
|
||||
|
||||
|
||||
def common_dtype(scans):
|
||||
@ -584,7 +615,7 @@ def calc_modfunc_mean(data):
|
||||
return modf
|
||||
|
||||
|
||||
def calc_modfunc_loess(data):
|
||||
def calc_modfunc_loess(data, smth=0.4):
|
||||
"""
|
||||
calculate the modulation function using LOESS (locally weighted regression) smoothing.
|
||||
|
||||
@ -609,9 +640,11 @@ def calc_modfunc_loess(data):
|
||||
the modulation function is calculated for the finite-valued scan points.
|
||||
NaNs are ignored and do not affect the finite values.
|
||||
|
||||
@return copy of the data array with the modulation function in the 'i' column.
|
||||
@param smth: size of the smoothing window relative to the size of the scan.
|
||||
reasonable values are between 0.2 and 0.5.
|
||||
the default value 0.4 has been found to work in many cases.
|
||||
|
||||
@todo is a fixed smoothing factor of 0.5 okay?
|
||||
@return copy of the data array with the modulation function in the 'i' column.
|
||||
"""
|
||||
sel = np.isfinite(data['i'])
|
||||
_data = data[sel]
|
||||
@ -626,7 +659,7 @@ def calc_modfunc_loess(data):
|
||||
factors = [_data[axis] for axis in scan_mode]
|
||||
lo.set_x(np.hstack(tuple(factors)))
|
||||
lo.set_y(_data['i'])
|
||||
lo.model.span = 0.5
|
||||
lo.model.span = smth
|
||||
loess.loess(lo)
|
||||
|
||||
modf['i'][sel] = lo.get_fitted_residuals() / lo.get_fitted_values()
|
||||
|
1312
pmsco/database.py
Normal file
1312
pmsco/database.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,9 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import os.path
|
||||
import datetime
|
||||
@ -19,8 +21,9 @@ import signal
|
||||
import collections
|
||||
import copy
|
||||
import logging
|
||||
from attrdict import AttrDict
|
||||
from mpi4py import MPI
|
||||
from helpers import BraceMessage as BMsg
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -48,7 +51,99 @@ TAG_INVALID_RESULT = 3
|
||||
## the message is empty
|
||||
TAG_ERROR_ABORTING = 4
|
||||
|
||||
CalcID = collections.namedtuple('CalcID', ['model', 'scan', 'sym', 'emit', 'region'])
|
||||
## levels of calculation tasks
|
||||
#
|
||||
CALC_LEVELS = ('model', 'scan', 'sym', 'emit', 'region')
|
||||
|
||||
## intermediate sub-class of CalcID
|
||||
#
|
||||
# this class should not be instantiated.
|
||||
# instead, use CalcID which provides some useful helper methods.
|
||||
#
|
||||
_CalcID = collections.namedtuple('_CalcID', CALC_LEVELS)
|
||||
|
||||
|
||||
class CalcID(_CalcID):
|
||||
"""
|
||||
named tuple class to uniquely identify a calculation task.
|
||||
|
||||
this is a 5-tuple of indices, one index per task level.
|
||||
a positive index refers to a specific instance in the task hierarchy.
|
||||
|
||||
the ID is defined as a named tuple so that it can be used as key of a dictionary.
|
||||
cf. @ref CalculationTask for further detail.
|
||||
compared to a plain named tuple, the CalcID class provides additional helper methods and properties.
|
||||
|
||||
example constructor: CalcID(1, 2, 3, 4, 5).
|
||||
"""
|
||||
|
||||
@property
|
||||
def levels(self):
|
||||
"""
|
||||
level names.
|
||||
|
||||
this property returns the defined level names in a tuple.
|
||||
this is the same as @ref CALC_LEVELS.
|
||||
|
||||
@return: tuple of level names
|
||||
"""
|
||||
return self._fields
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
"""
|
||||
specific level of a task, dictionary key form.
|
||||
|
||||
this corresponds to the name of the last positive component.
|
||||
|
||||
@return: attribute name corresponding to the task level.
|
||||
empty string if all members are negative (the root task).
|
||||
"""
|
||||
for k in reversed(self._fields):
|
||||
if self.__getattribute__(k) >= 0:
|
||||
return k
|
||||
return ''
|
||||
|
||||
@property
|
||||
def numeric_level(self):
|
||||
"""
|
||||
specific level of a task, numeric form.
|
||||
|
||||
this corresponds to the last positive value in the sequence of indices.
|
||||
|
||||
@return: index corresponding to the significant task level component of the id.
|
||||
the value ranges from -1 to len(CalcID) - 1.
|
||||
it is -1 if all indices are negative (root task).
|
||||
"""
|
||||
for k in reversed(range(len(self))):
|
||||
if self[k] >= 0:
|
||||
return k
|
||||
return -1
|
||||
|
||||
def collapse_levels(self, level):
|
||||
"""
|
||||
return a new CalcID that is collapsed at a specific level.
|
||||
|
||||
the method returns a new CalcID object where the indices below the given level are -1 (undefined).
|
||||
this can be seen as collapsing the tree at the specified node (level).
|
||||
|
||||
@note because a CalcID is immutable, this method returns a new instance.
|
||||
|
||||
@param level: level at which to collapse.
|
||||
the index at this level remains unchanged, lower ones are set to -1.
|
||||
the level can be specified by attribute name (str) or numeric index (-1..4).
|
||||
|
||||
@raise ValueError if level is not numeric and not in CALC_LEVELS.
|
||||
|
||||
@return: new CalcID instance.
|
||||
"""
|
||||
try:
|
||||
level = int(level)
|
||||
except ValueError:
|
||||
level = CALC_LEVELS.index(level)
|
||||
assert -1 <= level < len(CALC_LEVELS)
|
||||
mask = {l: -1 for (i, l) in enumerate(CALC_LEVELS) if i > level}
|
||||
return self._replace(**mask)
|
||||
|
||||
|
||||
class CalculationTask(object):
|
||||
@ -71,11 +166,11 @@ class CalculationTask(object):
|
||||
specified members must be greater or equal to zero.
|
||||
-1 is the wildcard which is used in parent tasks,
|
||||
where, e.g., no specific symmetry is chosen.
|
||||
the root task has the ID (-1, -1, -1, -1).
|
||||
the root task has the ID (-1, -1, -1, -1, -1).
|
||||
"""
|
||||
|
||||
## @var id (CalcID)
|
||||
# named tuple CalcID containing the 4-part calculation task identifier.
|
||||
# named tuple CalcID containing the 5-part calculation task identifier.
|
||||
|
||||
## @var parent_id (CalcID)
|
||||
# named tuple CalcID containing the task identifier of the parent task.
|
||||
@ -105,6 +200,21 @@ class CalculationTask(object):
|
||||
## @var modf_filename (string)
|
||||
# name of the ETPI or ETPAI file that contains the resulting modulation function.
|
||||
|
||||
## @var result_valid (bool)
|
||||
# validity status of the result file @ref result_filename.
|
||||
#
|
||||
# if True, the file must exist and contain valid data according to the task specification.
|
||||
# the value is set True when a calculation task completes successfully.
|
||||
# it may be reset later to invalidate the data if an error occurs during processing.
|
||||
#
|
||||
# validity of a parent task requires validity of all child tasks.
|
||||
|
||||
## @var rfac (float)
|
||||
# r-factor value of the task result.
|
||||
#
|
||||
# the rfac field is written by @ref pmsco.project.Project.evaluate_result.
|
||||
# the initial value is Not A Number.
|
||||
|
||||
## @var time (timedelta)
|
||||
# execution time of the task.
|
||||
#
|
||||
@ -125,7 +235,7 @@ class CalculationTask(object):
|
||||
# scan positions to substitute the ones from the original scan.
|
||||
#
|
||||
# this is used to distribute scans over multiple calculator processes,
|
||||
# cf. e.g. @ref EnergyRegionHandler.
|
||||
# cf. e.g. @ref pmsco.handlers.EnergyRegionHandler.
|
||||
#
|
||||
# dictionary key must be the scan dimension 'e', 't', 'p', 'a'.
|
||||
# the value is a numpy.ndarray containing the scan positions.
|
||||
@ -147,6 +257,7 @@ class CalculationTask(object):
|
||||
self.time = datetime.timedelta()
|
||||
self.files = {}
|
||||
self.region = {}
|
||||
self.rfac = float('nan')
|
||||
|
||||
def __eq__(self, other):
|
||||
"""
|
||||
@ -192,7 +303,7 @@ class CalculationTask(object):
|
||||
msg['id'] = CalcID(**msg['id'])
|
||||
if isinstance(msg['parent_id'], dict):
|
||||
msg['parent_id'] = CalcID(**msg['parent_id'])
|
||||
for k, v in msg.iteritems():
|
||||
for k, v in msg.items():
|
||||
self.__setattr__(k, v)
|
||||
|
||||
def format_filename(self, **overrides):
|
||||
@ -204,13 +315,8 @@ class CalculationTask(object):
|
||||
|
||||
@return a string consisting of the concatenation of the base name, the ID, and the extension.
|
||||
"""
|
||||
parts = {}
|
||||
parts = self.id._asdict()
|
||||
parts['root'] = self.file_root
|
||||
parts['model'] = self.id.model
|
||||
parts['scan'] = self.id.scan
|
||||
parts['sym'] = self.id.sym
|
||||
parts['emit'] = self.id.emit
|
||||
parts['region'] = self.id.region
|
||||
parts['ext'] = self.file_ext
|
||||
|
||||
for key in overrides.keys():
|
||||
@ -237,6 +343,30 @@ class CalculationTask(object):
|
||||
"""
|
||||
self.id = self.id._replace(**kwargs)
|
||||
|
||||
@property
|
||||
def level(self):
|
||||
"""
|
||||
specific level of a task, dictionary key form.
|
||||
|
||||
this corresponds to the name of the last positive component of self.id.
|
||||
|
||||
@return: attribute name corresponding to the task level.
|
||||
empty string for the root task.
|
||||
"""
|
||||
return self.id.level
|
||||
|
||||
@property
|
||||
def numeric_level(self):
|
||||
"""
|
||||
specific level of a task, numeric form.
|
||||
|
||||
this corresponds to the index of the last positive component of self.id.
|
||||
|
||||
@return: index corresponding to the significant task level component of the id.
|
||||
-1 for the root task.
|
||||
"""
|
||||
return self.id.numeric_level
|
||||
|
||||
def add_task_file(self, name, category):
|
||||
"""
|
||||
register a file that was generated by the calculation task.
|
||||
@ -284,6 +414,82 @@ class CalculationTask(object):
|
||||
logger.warning("CalculationTask.remove_task_file: could not remove file {0}".format(filename))
|
||||
|
||||
|
||||
class CachedCalculationMethod(object):
|
||||
"""
|
||||
decorator to cache results of expensive calculation functions.
|
||||
|
||||
this decorator can be used to transparently cache any expensive calculation result
|
||||
that depends in a deterministic way on the calculation index.
|
||||
for example, each cluster gets a unique calculation index.
|
||||
if a cluster needs to be calculated repeatedly, it may be more efficient to cache it.
|
||||
|
||||
the method to decorate must have the following signature:
|
||||
result = func(self, model, index).
|
||||
the index (neglecting emitter and region) identifies the result (completely and uniquely).
|
||||
|
||||
the number of cached results is limited by the ttl (time to live) attribute.
|
||||
the items' ttl values are decreased each time a requested calculation is not found in the cache (miss).
|
||||
on a cache hit, the corresponding item's ttl is reset.
|
||||
|
||||
the target ttl (time to live) can be specified as an optional parameter of the decorator.
|
||||
time increases with every cache miss.
|
||||
"""
|
||||
|
||||
## @var _cache (dict)
|
||||
#
|
||||
# key = calculation index,
|
||||
# value = function result
|
||||
|
||||
## @var _ttl (dict)
|
||||
#
|
||||
# key = calculation index,
|
||||
# value (int) = remaining time to live
|
||||
# where time is the number of subsequent cache misses.
|
||||
|
||||
## @var ttl (int)
|
||||
#
|
||||
# target time to live of cache items.
|
||||
# time is given by the number cache misses.
|
||||
|
||||
def __init__(self, ttl=10):
|
||||
super(CachedCalculationMethod, self).__init__()
|
||||
self._cache = {}
|
||||
self._ttl = {}
|
||||
self.ttl = ttl
|
||||
|
||||
def __call__(self, func):
|
||||
|
||||
def wrapped_func(inst, model, index):
|
||||
# note: _replace returns a new instance of the namedtuple
|
||||
index = index._replace(emit=-1, region=-1)
|
||||
cache_index = (id(inst), index.model, index.scan, index.sym)
|
||||
try:
|
||||
result = self._cache[cache_index]
|
||||
except KeyError:
|
||||
result = func(inst, model, index)
|
||||
self._expire()
|
||||
self._cache[cache_index] = result
|
||||
|
||||
self._ttl[cache_index] = self.ttl
|
||||
|
||||
return result
|
||||
|
||||
return wrapped_func
|
||||
|
||||
def _expire(self):
|
||||
"""
|
||||
decrease the remaining ttl of cache items and delete items whose ttl has fallen below 0.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
for index in self._ttl:
|
||||
self._ttl[index] -= 1
|
||||
old_items = [index for index in self._ttl if self._ttl[index] < 0]
|
||||
for index in old_items:
|
||||
del self._ttl[index]
|
||||
del self._cache[index]
|
||||
|
||||
|
||||
class MscoProcess(object):
|
||||
"""
|
||||
code shared by MscoMaster and MscoSlave.
|
||||
@ -357,11 +563,9 @@ class MscoProcess(object):
|
||||
"""
|
||||
clean up after all calculations.
|
||||
|
||||
this method calls the clean up function of the project.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self._project.cleanup()
|
||||
pass
|
||||
|
||||
def calc(self, task):
|
||||
"""
|
||||
@ -387,14 +591,36 @@ class MscoProcess(object):
|
||||
logger.info("model %s", s_model)
|
||||
start_time = datetime.datetime.now()
|
||||
|
||||
# create parameter and cluster structures
|
||||
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
|
||||
par = self._project.create_params(task.model, task.id)
|
||||
|
||||
# generate file names
|
||||
clu = self._create_cluster(task)
|
||||
par = self._create_params(task)
|
||||
scan = self._define_scan(task)
|
||||
output_file = task.format_filename(ext="")
|
||||
|
||||
# determine scan range
|
||||
# check parameters and call the msc program
|
||||
if clu.get_atom_count() < 2:
|
||||
logger.error("empty cluster in calculation %s", s_id)
|
||||
task.result_valid = False
|
||||
elif clu.get_emitter_count() < 1:
|
||||
logger.error("no emitters in cluster of calculation %s.", s_id)
|
||||
task.result_valid = False
|
||||
else:
|
||||
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
|
||||
(root, ext) = os.path.splitext(task.result_filename)
|
||||
task.file_ext = ext
|
||||
task.result_valid = True
|
||||
task.files.update(files)
|
||||
|
||||
task.time = datetime.datetime.now() - start_time
|
||||
|
||||
return task
|
||||
|
||||
def _define_scan(self, task):
|
||||
"""
|
||||
define the scan range.
|
||||
|
||||
@param task: CalculationTask with all attributes set for the calculation.
|
||||
@return: pmsco.project.Scan object for the calculator.
|
||||
"""
|
||||
scan = self._project.scans[task.id.scan]
|
||||
if task.region:
|
||||
scan = scan.copy()
|
||||
@ -419,26 +645,56 @@ class MscoProcess(object):
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# check parameters and call the msc program
|
||||
if clu.get_atom_count() < 2:
|
||||
logger.error("empty cluster in calculation %s", s_id)
|
||||
task.result_valid = False
|
||||
elif clu.get_emitter_count() < 1:
|
||||
logger.error("no emitters in cluster of calculation %s.", s_id)
|
||||
task.result_valid = False
|
||||
else:
|
||||
files = self._calculator.check_cluster(clu, output_file)
|
||||
return scan
|
||||
|
||||
def _create_cluster(self, task):
|
||||
"""
|
||||
generate the cluster for the given calculation task.
|
||||
|
||||
cluster generation is delegated to the project's cluster_generator object.
|
||||
|
||||
if the current task has region == 0,
|
||||
the method also exports diagnostic clusters via the project's export_cluster() method.
|
||||
the file name is formatted with the given task index except that region is -1.
|
||||
|
||||
if (in addition to region == 0) the current task has emit == 0 and cluster includes multiple emitters,
|
||||
the method also exports the master cluster and full emitter list.
|
||||
the file name is formatted with the given task index except that emitter and region are -1.
|
||||
|
||||
@param task: CalculationTask with all attributes set for the calculation.
|
||||
@return: pmsco.cluster.Cluster object for the calculator.
|
||||
"""
|
||||
nem = self._project.cluster_generator.count_emitters(task.model, task.id)
|
||||
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
|
||||
|
||||
if task.id.region == 0:
|
||||
file_index = task.id._replace(region=-1)
|
||||
filename = task.format_filename(region=-1)
|
||||
files = self._project.export_cluster(file_index, filename, clu)
|
||||
task.files.update(files)
|
||||
|
||||
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
|
||||
(root, ext) = os.path.splitext(task.result_filename)
|
||||
task.file_ext = ext
|
||||
task.result_valid = True
|
||||
task.files.update(files)
|
||||
# master cluster
|
||||
if nem > 1 and task.id.emit == 0:
|
||||
master_index = task.id._replace(emit=-1, region=-1)
|
||||
filename = task.format_filename(emit=-1, region=-1)
|
||||
master_cluster = self._project.cluster_generator.create_cluster(task.model, master_index)
|
||||
files = self._project.export_cluster(master_index, filename, master_cluster)
|
||||
task.files.update(files)
|
||||
|
||||
task.time = datetime.datetime.now() - start_time
|
||||
return clu
|
||||
|
||||
return task
|
||||
def _create_params(self, task):
|
||||
"""
|
||||
generate the parameters list.
|
||||
|
||||
parameters generation is delegated to the project's create_params method.
|
||||
|
||||
@param task: CalculationTask with all attributes set for the calculation.
|
||||
@return: pmsco.project.Params object for the calculator.
|
||||
"""
|
||||
par = self._project.create_params(task.model, task.id)
|
||||
|
||||
return par
|
||||
|
||||
|
||||
class MscoMaster(MscoProcess):
|
||||
@ -505,20 +761,12 @@ class MscoMaster(MscoProcess):
|
||||
# it defines the initial model and the output file name.
|
||||
# it is passed to the model handler during the main loop.
|
||||
|
||||
# @var _model_handler
|
||||
# (ModelHandler) model handler instance
|
||||
|
||||
# @var _scan_handler
|
||||
# (ScanHandler) scan handler instance
|
||||
|
||||
# @var _symmetry_handler
|
||||
# (SymmetryHandler) symmetry handler instance
|
||||
|
||||
# @var _emitter_handler
|
||||
# (EmitterHandler) emitter handler instance
|
||||
|
||||
# @var _region_handler
|
||||
# (RegionHandler) region handler instance
|
||||
## @var task_handlers
|
||||
# (AttrDict) dictionary of task handler objects
|
||||
#
|
||||
# the keys are the task levels 'model', 'scan', 'sym', 'emit' and 'region'.
|
||||
# the values are handlers.TaskHandler objects.
|
||||
# the objects can be accessed in attribute or dictionary notation.
|
||||
|
||||
def __init__(self, comm):
|
||||
super(MscoMaster, self).__init__(comm)
|
||||
@ -534,11 +782,8 @@ class MscoMaster(MscoProcess):
|
||||
self._min_queue_len = self._slaves + 1
|
||||
|
||||
self._root_task = None
|
||||
self._model_handler = None
|
||||
self._scan_handler = None
|
||||
self._symmetry_handler = None
|
||||
self._emitter_handler = None
|
||||
self._region_handler = None
|
||||
self.task_levels = list(CalcID._fields)
|
||||
self.task_handlers = AttrDict()
|
||||
|
||||
def setup(self, project):
|
||||
"""
|
||||
@ -564,30 +809,29 @@ class MscoMaster(MscoProcess):
|
||||
|
||||
logger.debug("master entering setup")
|
||||
self._running_slaves = self._slaves
|
||||
self._idle_ranks = range(1, self._running_slaves + 1)
|
||||
self._idle_ranks = list(range(1, self._running_slaves + 1))
|
||||
|
||||
self._root_task = CalculationTask()
|
||||
self._root_task.file_root = project.output_file
|
||||
self._root_task.model = project.create_domain().start
|
||||
|
||||
self._model_handler = project.handler_classes['model']()
|
||||
self._scan_handler = project.handler_classes['scan']()
|
||||
self._symmetry_handler = project.handler_classes['symmetry']()
|
||||
self._emitter_handler = project.handler_classes['emitter']()
|
||||
self._region_handler = project.handler_classes['region']()
|
||||
for level in self.task_levels:
|
||||
self.task_handlers[level] = project.handler_classes[level]()
|
||||
|
||||
self._model_handler.datetime_limit = self.datetime_limit
|
||||
self.task_handlers.model.datetime_limit = self.datetime_limit
|
||||
|
||||
slaves_adj = max(self._slaves, 1)
|
||||
self._model_handler.setup(project, slaves_adj)
|
||||
self.task_handlers.model.setup(project, slaves_adj)
|
||||
if project.mode != "single":
|
||||
slaves_adj = max(slaves_adj / 2, 1)
|
||||
self._scan_handler.setup(project, slaves_adj)
|
||||
self._symmetry_handler.setup(project, slaves_adj)
|
||||
self._emitter_handler.setup(project, slaves_adj)
|
||||
self.task_handlers.scan.setup(project, slaves_adj)
|
||||
self.task_handlers.sym.setup(project, slaves_adj)
|
||||
self.task_handlers.emit.setup(project, slaves_adj)
|
||||
if project.mode != "single":
|
||||
slaves_adj = min(slaves_adj, 4)
|
||||
self._region_handler.setup(project, slaves_adj)
|
||||
self.task_handlers.region.setup(project, slaves_adj)
|
||||
|
||||
project.setup(self.task_handlers)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
@ -615,14 +859,13 @@ class MscoMaster(MscoProcess):
|
||||
|
||||
logger.debug("master exiting main loop")
|
||||
self._running = False
|
||||
self._save_report()
|
||||
|
||||
def cleanup(self):
|
||||
logger.debug("master entering cleanup")
|
||||
self._region_handler.cleanup()
|
||||
self._emitter_handler.cleanup()
|
||||
self._symmetry_handler.cleanup()
|
||||
self._scan_handler.cleanup()
|
||||
self._model_handler.cleanup()
|
||||
for level in reversed(self.task_levels):
|
||||
self.task_handlers[level].cleanup()
|
||||
self._project.cleanup()
|
||||
super(MscoMaster, self).cleanup()
|
||||
|
||||
def _dispatch_results(self):
|
||||
@ -632,29 +875,25 @@ class MscoMaster(MscoProcess):
|
||||
logger.debug("dispatching results of %u tasks", len(self._complete_tasks))
|
||||
while self._complete_tasks:
|
||||
__, task = self._complete_tasks.popitem(last=False)
|
||||
self._dispatch_result(task)
|
||||
|
||||
logger.debug("passing task %s to region handler", str(task.id))
|
||||
task = self._region_handler.add_result(task)
|
||||
def _dispatch_result(self, task):
|
||||
"""
|
||||
pass a result through the post-processing modules.
|
||||
|
||||
@param task: a CalculationTask object.
|
||||
|
||||
@return None
|
||||
"""
|
||||
level = task.level
|
||||
if level:
|
||||
logger.debug(BMsg("passing task {task} to {level} handler", task=str(task.id), level=level))
|
||||
task = self.task_handlers[level].add_result(task)
|
||||
if task:
|
||||
logger.debug("passing task %s to emitter handler", str(task.id))
|
||||
task = self._emitter_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("passing task %s to symmetry handler", str(task.id))
|
||||
task = self._symmetry_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("passing task %s to scan handler", str(task.id))
|
||||
task = self._scan_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("passing task %s to model handler", str(task.id))
|
||||
task = self._model_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("root task %s complete", str(task.id))
|
||||
self._finishing = True
|
||||
self._dispatch_result(task)
|
||||
else:
|
||||
self._finishing = True
|
||||
logger.debug(BMsg("root task {task} complete", task=str(task.id)))
|
||||
|
||||
def _create_tasks(self):
|
||||
"""
|
||||
@ -668,7 +907,7 @@ class MscoMaster(MscoProcess):
|
||||
"""
|
||||
logger.debug("creating new tasks from root")
|
||||
while len(self._pending_tasks) < self._min_queue_len:
|
||||
tasks = self._model_handler.create_tasks(self._root_task)
|
||||
tasks = self.task_handlers.model.create_tasks(self._root_task)
|
||||
logger.debug("model handler returned %u new tasks", len(tasks))
|
||||
if not tasks:
|
||||
self._model_done = True
|
||||
@ -807,6 +1046,17 @@ class MscoMaster(MscoProcess):
|
||||
|
||||
return self._finishing
|
||||
|
||||
def _save_report(self):
|
||||
"""
|
||||
generate a final report.
|
||||
|
||||
this method is called at the end of the master loop.
|
||||
it passes the call to @ref pmsco.handlers.ModelHandler.save_report.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.task_handlers.model.save_report(self._root_task)
|
||||
|
||||
def add_model_task(self, task):
|
||||
"""
|
||||
add a new model task including all of its children to the task queue.
|
||||
@ -814,13 +1064,13 @@ class MscoMaster(MscoProcess):
|
||||
@param task (CalculationTask) task identifier and model parameters.
|
||||
"""
|
||||
|
||||
scan_tasks = self._scan_handler.create_tasks(task)
|
||||
scan_tasks = self.task_handlers.scan.create_tasks(task)
|
||||
for scan_task in scan_tasks:
|
||||
sym_tasks = self._symmetry_handler.create_tasks(scan_task)
|
||||
sym_tasks = self.task_handlers.sym.create_tasks(scan_task)
|
||||
for sym_task in sym_tasks:
|
||||
emitter_tasks = self._emitter_handler.create_tasks(sym_task)
|
||||
emitter_tasks = self.task_handlers.emit.create_tasks(sym_task)
|
||||
for emitter_task in emitter_tasks:
|
||||
region_tasks = self._region_handler.create_tasks(emitter_task)
|
||||
region_tasks = self.task_handlers.region.create_tasks(emitter_task)
|
||||
for region_task in region_tasks:
|
||||
self._pending_tasks[region_task.id] = region_task
|
||||
|
||||
|
3
pmsco/edac/.gitignore
vendored
3
pmsco/edac/.gitignore
vendored
@ -1,3 +1,2 @@
|
||||
edac_all_wrap.*
|
||||
edac.py
|
||||
edac_wrap.cxx
|
||||
revision.py
|
||||
|
15224
pmsco/edac/edac_all.cpp
Normal file
15224
pmsco/edac/edac_all.cpp
Normal file
File diff suppressed because it is too large
Load Diff
7
pmsco/edac/edac_all.i
Normal file
7
pmsco/edac/edac_all.i
Normal file
@ -0,0 +1,7 @@
|
||||
/* EDAC interface for other programs */
|
||||
%module edac
|
||||
%{
|
||||
extern int run_script(char *scriptfile);
|
||||
%}
|
||||
|
||||
extern int run_script(char *scriptfile);
|
@ -1,8 +1,18 @@
|
||||
*** /home/muntwiler_m/mnt/pearl_data/software/edac/edac_all.cpp 2011-04-14 23:38:44.000000000 +0200
|
||||
--- edac_all.cpp 2016-02-11 12:15:45.322049772 +0100
|
||||
--- edac_all.cpp 2018-02-05 17:30:17.347373088 +0100
|
||||
***************
|
||||
*** 3085,3090 ****
|
||||
--- 3085,3091 ----
|
||||
numero Vxc_Barth_Hedin(numero den, numero denup)
|
||||
{
|
||||
if(den<1e-10) return 0;
|
||||
+ if(denup<0) denup=0;
|
||||
numero rs=1/pow(4*pi/3*den, 1/3.0);
|
||||
numero x=denup/den;
|
||||
numero alpha0=pow(4/(9*pi), 1/3.0);
|
||||
***************
|
||||
*** 10117,10122 ****
|
||||
--- 10117,10123 ----
|
||||
--- 10118,10124 ----
|
||||
void scan_imfp(char *name);
|
||||
void scan_imfp(FILE *fout);
|
||||
numero iimfp_TPP(numero kr);
|
||||
@ -12,7 +22,7 @@
|
||||
int scattering_so;
|
||||
***************
|
||||
*** 10230,10235 ****
|
||||
--- 10231,10237 ----
|
||||
--- 10232,10238 ----
|
||||
|
||||
int n_th;
|
||||
int n_fi;
|
||||
@ -22,7 +32,7 @@
|
||||
numero *th_out,
|
||||
***************
|
||||
*** 10239,10244 ****
|
||||
--- 10241,10247 ----
|
||||
--- 10242,10248 ----
|
||||
void free(void);
|
||||
void init_th(numero thi, numero thf, int nth);
|
||||
void init_phi(numero fii, numero fif, int nfi);
|
||||
@ -31,8 +41,121 @@
|
||||
numero refraction);
|
||||
void init_transmission(
|
||||
***************
|
||||
*** 10905,10942 ****
|
||||
}
|
||||
numero calculation::IIIthfi(int no, numero th, numero fi)
|
||||
{
|
||||
! numero ii,ii0, xth,xfi;
|
||||
! numero th0=final.th[0], th1=final.th[final.n_th-1];
|
||||
numero fi0=final.fi[0], fi1=final.fi[final.n_fi-1];
|
||||
int ith,ifi,ij;
|
||||
while(fi<0) fi+=2*pi;
|
||||
while(fi>2*pi) fi-=2*pi;
|
||||
! xth=(final.n_th-1)*(th-th0)/(th1-th0); ith=int(floor(xth-0.001));
|
||||
! xfi=(final.n_fi-1)*(fi-fi0)/(fi1-fi0); ifi=int(floor(xfi-0.001));
|
||||
if(ifi==-1) ifi=0;
|
||||
if(0<=ith && ith<final.n_th-1 && 0<=ifi && ifi<final.n_fi-1) {
|
||||
ij=no*n_ang+ith*final.n_fi+ifi;
|
||||
! ii0=III0[ij];
|
||||
! ii=ii0 + (xth-ith)*(III0[ij+final.n_fi]-ii0) + (xfi-ifi)*(III0[ij+1]-ii0);
|
||||
} else ii=0;
|
||||
if(ii<0) ii=0;
|
||||
return ii;
|
||||
}
|
||||
! numero calculation::IIIave(int no, numero th, numero fi)
|
||||
{
|
||||
! if(thave<=1e-6) return IIIthfi(no,th,fi);
|
||||
int i,j, nn=10, mm=50;
|
||||
! numero tth,ffi,val=0, r,f;
|
||||
for(i=0; i<nn; i++)
|
||||
for(j=0; j<mm; j++) {
|
||||
! r=i*thave/nn;
|
||||
! f=j*2*pi/mm;
|
||||
! tth=th+r*cos(f);
|
||||
! ffi=fi+r*sin(f)/cos(pi*th/180);
|
||||
! val+=IIIthfi(no,tth,ffi);
|
||||
}
|
||||
! return val/(nn*mm);
|
||||
}
|
||||
void calculation::write_ang(FILE *fout_, int ik)
|
||||
{
|
||||
int no,nno,i,j;
|
||||
--- 10909,10963 ----
|
||||
}
|
||||
numero calculation::IIIthfi(int no, numero th, numero fi)
|
||||
{
|
||||
! numero ii,xth,xfi;
|
||||
! numero th0=final.th_out[0], th1=final.th_out[final.n_th-1];
|
||||
numero fi0=final.fi[0], fi1=final.fi[final.n_fi-1];
|
||||
int ith,ifi,ij;
|
||||
while(fi<0) fi+=2*pi;
|
||||
while(fi>2*pi) fi-=2*pi;
|
||||
! xth=(final.n_th-1)*(th-th0)/(th1-th0); ith=int(floor(xth-0.0001));
|
||||
! xfi=(final.n_fi-1)*(fi-fi0)/(fi1-fi0); ifi=int(floor(xfi-0.0001));
|
||||
if(ifi==-1) ifi=0;
|
||||
+ if(ith==-1) ith=0;
|
||||
if(0<=ith && ith<final.n_th-1 && 0<=ifi && ifi<final.n_fi-1) {
|
||||
ij=no*n_ang+ith*final.n_fi+ifi;
|
||||
! xth = xth-ith;
|
||||
! xfi = xfi-ifi;
|
||||
! ii=III0[ij]*(1-xth)*(1-xfi) + III0[ij+final.n_fi]*xth*(1-xfi) + III0[ij+1]*(1-xth)*xfi + III0[ij+final.n_fi+1]*xth*xfi;
|
||||
} else ii=0;
|
||||
if(ii<0) ii=0;
|
||||
return ii;
|
||||
}
|
||||
! numero calculation::IIIave(int no, numero th, numero ph)
|
||||
{
|
||||
! if(thave<=1e-6) return IIIthfi(no,th,ph);
|
||||
int i,j, nn=10, mm=50;
|
||||
! numero tth, ffi, val=0, th1, ph1, cf, nw=0, x0, y0, z0, x1, y1, th2, ph2;
|
||||
for(i=0; i<nn; i++)
|
||||
for(j=0; j<mm; j++) {
|
||||
! th1=i*2*thave/(nn-1); //2*sigma range
|
||||
! ph1=j*2*pi/mm;
|
||||
! //rotation of (001) around Y by th1 and around Z by ph1
|
||||
! x0 = cos(ph1)*sin(th1);
|
||||
! y1 = sin(ph1)*sin(th1);
|
||||
! z0 = cos(th1);
|
||||
! //rotation around Y by th
|
||||
! x1 = x0*cos(th) + z0*sin(th);
|
||||
! z0 = -x0*sin(th) + z0*cos(th);
|
||||
! //rotation around Z by ph
|
||||
! x0 = x1*cos(ph) - y1*sin(ph);
|
||||
! y0 = x1*sin(ph) + y1*cos(ph);
|
||||
!
|
||||
! th2 = acos(z0);
|
||||
! ph2 = atan2(y0,x0);
|
||||
!
|
||||
! cf = exp(-(th1*th1/thave*thave))*(i>0.1?i:0.1); //gauss weight * radial weight
|
||||
! nw += cf; //sum of weights
|
||||
! val+=IIIthfi(no,th2,ph2)*cf;
|
||||
}
|
||||
! return val/nw;
|
||||
}
|
||||
+
|
||||
void calculation::write_ang(FILE *fout_, int ik)
|
||||
{
|
||||
int no,nno,i,j;
|
||||
***************
|
||||
*** 10961,10967 ****
|
||||
for(no=0; no<nno; no++)
|
||||
for(i=0; i<final.n_th; i++)
|
||||
for(j=0; j<final.n_fi; j++)
|
||||
! III[no*n_ang+i*final.n_fi+j]=IIIave(no, final.th[i], final.fi[j]);
|
||||
delete [] III0;
|
||||
}
|
||||
for(i=0; i<final.n_th; i++)
|
||||
--- 10982,10988 ----
|
||||
for(no=0; no<nno; no++)
|
||||
for(i=0; i<final.n_th; i++)
|
||||
for(j=0; j<final.n_fi; j++)
|
||||
! III[no*n_ang+i*final.n_fi+j]=IIIave(no, final.th_out[i], final.fi[j]);
|
||||
delete [] III0;
|
||||
}
|
||||
for(i=0; i<final.n_th; i++)
|
||||
***************
|
||||
*** 12485,12490 ****
|
||||
--- 12488,12494 ----
|
||||
--- 12506,12512 ----
|
||||
else {
|
||||
kr=sqrt(sqr(calc.k[ik])+2*V0);
|
||||
if(iimfp_flag==0) ki=iimfp.val(kr)/2;
|
||||
@ -42,7 +165,7 @@
|
||||
} } else if(calc.k_flag==2) set_k(calc.kc[ik]);
|
||||
***************
|
||||
*** 12507,12512 ****
|
||||
--- 12511,12522 ----
|
||||
--- 12529,12540 ----
|
||||
numero imfp=E/(TPP_Ep*TPP_Ep*(beta*log(gamma*E)-C/E+D/(E*E)))/a0_au;
|
||||
return 1/imfp;
|
||||
}
|
||||
@ -64,7 +187,7 @@
|
||||
n_1=n_2=0;
|
||||
Ylm0_th_flag=Ylm0_fi_flag=0;
|
||||
mesh_flag=0;
|
||||
--- 13212,13218 ----
|
||||
--- 13230,13236 ----
|
||||
}
|
||||
final_state::final_state(void)
|
||||
{
|
||||
@ -74,7 +197,7 @@
|
||||
mesh_flag=0;
|
||||
***************
|
||||
*** 13233,13238 ****
|
||||
--- 13243,13271 ----
|
||||
--- 13261,13289 ----
|
||||
if(n_fi==1) fi[0]=fii;
|
||||
else for(j=0; j<n_fi; j++) fi[j]=fii+j*(fif-fii)/(n_fi-1);
|
||||
} }
|
||||
@ -106,7 +229,7 @@
|
||||
int i;
|
||||
***************
|
||||
*** 14743,14748 ****
|
||||
--- 14776,14783 ----
|
||||
--- 14794,14801 ----
|
||||
|| scat.TPP_Ep<=0 || scat.TPP_Eg<0)
|
||||
on_error(foutput,"(input) imfp TPP-2M", "wrong parameters");
|
||||
scat.iimfp_flag=1;
|
||||
@ -117,7 +240,7 @@
|
||||
scat.iimfp_flag=0;
|
||||
***************
|
||||
*** 15162,15164 ****
|
||||
--- 15197,15206 ----
|
||||
--- 15215,15224 ----
|
||||
fprintf(foutput,"That's all, folks!\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -13,28 +13,23 @@ SHELL=/bin/sh
|
||||
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
|
||||
.PHONY: all clean edac
|
||||
|
||||
FC=gfortran
|
||||
FCCOPTS=
|
||||
F2PY=f2py
|
||||
F2PYOPTS=
|
||||
CC=g++
|
||||
CCOPTS=-Wno-write-strings
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHONOPTS=
|
||||
FC?=gfortran
|
||||
FCCOPTS?=
|
||||
F2PY?=f2py
|
||||
F2PYOPTS?=
|
||||
CXX?=g++
|
||||
CXXOPTS?=-Wno-write-strings
|
||||
PYTHON?=python
|
||||
PYTHONOPTS?=
|
||||
|
||||
all: edac
|
||||
|
||||
edac: edac.exe _edac.so edac.py
|
||||
|
||||
edac.exe: edac_all.cpp
|
||||
$(CC) $(CCOPTS) -o edac.exe edac_all.cpp
|
||||
$(CXX) $(CXXOPTS) -o edac.exe edac_all.cpp
|
||||
|
||||
edac_wrap.cxx: edac_all.cpp edac.i
|
||||
$(SWIG) $(SWIGOPTS) -c++ -python edac.i
|
||||
|
||||
edac.py _edac.so: edac_wrap.cxx setup.py
|
||||
edac.py _edac.so: edac_all.cpp edac_all.i setup.py
|
||||
$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
|
||||
|
||||
revision.py: _edac.so
|
||||
@ -46,7 +41,7 @@ revision.txt: _edac.so edac.exe
|
||||
echo "" >> revision.txt
|
||||
|
||||
clean:
|
||||
rm -f *.so *.o *.exe
|
||||
rm -f *_wrap.cxx
|
||||
rm -f revision.py
|
||||
rm -f revision.txt
|
||||
rm -f *.so *.o *.exe *.pyc
|
||||
rm -f edac.py edac_all_wrap.*
|
||||
rm -f revision.*
|
||||
|
||||
|
@ -8,7 +8,8 @@ from distutils.core import setup, Extension
|
||||
|
||||
|
||||
edac_module = Extension('_edac',
|
||||
sources=['edac_wrap.cxx', 'edac_all.cpp'],
|
||||
sources=['edac_all.cpp', 'edac_all.i'],
|
||||
swig_opts=['-c++']
|
||||
)
|
||||
|
||||
setup (name = 'edac',
|
||||
@ -16,5 +17,7 @@ setup (name = 'edac',
|
||||
author = "Matthias Muntwiler",
|
||||
description = """EDAC module in Python""",
|
||||
ext_modules = [edac_module],
|
||||
py_modules = ["edac"], requires=['numpy']
|
||||
py_modules = ["edac"],
|
||||
requires=['numpy']
|
||||
)
|
||||
|
||||
|
249
pmsco/files.py
249
pmsco/files.py
@ -1,16 +1,19 @@
|
||||
"""
|
||||
@package pmsco.files
|
||||
manage files produced by pmsco.
|
||||
manage the lifetime of files produced by pmsco.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2016 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2016-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import logging
|
||||
import mpi4py
|
||||
@ -53,7 +56,7 @@ FILE_CATEGORIES = {'cluster', 'phase', 'input', 'output',
|
||||
#
|
||||
# this constant defines the default set of file categories that are kept after the calculation.
|
||||
#
|
||||
FILE_CATEGORIES_TO_KEEP = {'cluster', 'model', 'report', 'population'}
|
||||
FILE_CATEGORIES_TO_KEEP = {'cluster', 'model', 'scan', 'report', 'population'}
|
||||
|
||||
## @var FILE_CATEGORIES_TO_DELETE
|
||||
# categories of files to be deleted.
|
||||
@ -67,13 +70,17 @@ FILE_CATEGORIES_TO_DELETE = FILE_CATEGORIES - FILE_CATEGORIES_TO_KEEP
|
||||
|
||||
class FileTracker(object):
|
||||
"""
|
||||
organize output files of calculations.
|
||||
manage the lifetime of files produced by the calculations.
|
||||
|
||||
the file manager stores references to data files generated during calculations
|
||||
and cleans up unused files according to a range of filter criteria.
|
||||
|
||||
this class identifies files by _file name_.
|
||||
file names must therefore be unique over the whole calculation process.
|
||||
it is possible to specify a full path that is used for communication with the operating system.
|
||||
"""
|
||||
|
||||
## @var files_to_delete (set)
|
||||
## @var categories_to_delete (set)
|
||||
# categories of generated files that should be deleted after the calculation.
|
||||
#
|
||||
# each string of this set marks a category of files to be deleted.
|
||||
@ -93,96 +100,119 @@ class FileTracker(object):
|
||||
#
|
||||
# the default is 10.
|
||||
|
||||
## @var _last_id (int)
|
||||
# last used file identification number (incremental)
|
||||
|
||||
## @var _path_by_id (dict)
|
||||
# key = file id, value = file path
|
||||
|
||||
## @var _model_by_id (dict)
|
||||
# key = file id, value = model number
|
||||
|
||||
## @var _category_by_id (dict)
|
||||
# key = file id, value = category (str)
|
||||
## @var _file_model (dict)
|
||||
# key = file name, value = model number
|
||||
|
||||
## @var _file_category (dict)
|
||||
# key = file name, value = category (str)
|
||||
|
||||
## @var _file_path (dict)
|
||||
# key = file name, value = absolute file path (str)
|
||||
|
||||
## @var _rfac_by_model (dict)
|
||||
# key = model number, value = file id
|
||||
# key = model number, value = R-factor
|
||||
|
||||
## @var _complete_by_model (dict)
|
||||
# key = model number, value (boolean) = all calculations complete, files can be deleted
|
||||
## @var _complete_models (set)
|
||||
# this set contains the model numbers of the models that have finished all calculations.
|
||||
# files of these models can be considered for clean up.
|
||||
|
||||
def __init__(self):
|
||||
self._id_by_path = {}
|
||||
self._path_by_id = {}
|
||||
self._model_by_id = {}
|
||||
self._category_by_id = {}
|
||||
self._file_model = {}
|
||||
self._file_category = {}
|
||||
self._file_path = {}
|
||||
self._rfac_by_model = {}
|
||||
self._complete_by_model = {}
|
||||
self._last_id = 0
|
||||
self._complete_models = set([])
|
||||
self.categories_to_delete = FILE_CATEGORIES_TO_DELETE
|
||||
self.keep_rfac = 10
|
||||
|
||||
def add_file(self, path, model, category='default'):
|
||||
def get_file_count(self):
|
||||
"""
|
||||
return the number of tracked files.
|
||||
|
||||
@return: (int) number of tracked files.
|
||||
"""
|
||||
return len(self._file_path)
|
||||
|
||||
def get_complete_models_count(self):
|
||||
"""
|
||||
return the number of complete models.
|
||||
|
||||
@return: (int) number of complete models.
|
||||
"""
|
||||
return len(self._complete_models)
|
||||
|
||||
def add_file(self, name, model, category='default', path=''):
|
||||
"""
|
||||
add a new data file to the list.
|
||||
|
||||
@param path: (str) system path of the file relative to the working directory.
|
||||
@param name: (str) unique identification of the file.
|
||||
this can be the file name in the file system if file names are unique without path specification.
|
||||
the name must be spelled identically
|
||||
whenever the same file is referenced in a call to another method of this class.
|
||||
the empty string is ignored.
|
||||
|
||||
@param model: (int) model number
|
||||
|
||||
@param category: (str) file category, e.g. 'output', etc.
|
||||
|
||||
@param path: (str) file system path of the file.
|
||||
the file system path is used for communication with the operating system when the file is deleted.
|
||||
|
||||
by default, the path is the name argument expanded to a full path relative to the current working directory.
|
||||
the path is expanded during the call of this method and will not change when the working directory changes.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self._last_id += 1
|
||||
_id = self._last_id
|
||||
self._id_by_path[path] = _id
|
||||
self._path_by_id[_id] = path
|
||||
self._model_by_id[_id] = model
|
||||
self._category_by_id[_id] = category
|
||||
if name:
|
||||
self._file_model[name] = model
|
||||
self._file_category[name] = category
|
||||
self._file_path[name] = path if path else os.path.abspath(name)
|
||||
|
||||
def rename_file(self, old_path, new_path):
|
||||
def rename_file(self, old_name, new_name, new_path=''):
|
||||
"""
|
||||
rename a data file in the list.
|
||||
|
||||
the method does not rename the file in the file system.
|
||||
|
||||
@param old_path: must match an existing file path identically.
|
||||
if old_path is not in the list, the method does nothing.
|
||||
@param old_name: name used in the original add_file() call.
|
||||
if it is not in the list, the method does nothing.
|
||||
|
||||
@param new_path: new path.
|
||||
@param new_name: new name of the file, see add_file().
|
||||
if the file is already in the list, its model and category is overwritten by the values of the old file.
|
||||
|
||||
@param new_path: new file system path of the file, see add_file().
|
||||
by default, the path is the name argument expanded to a full path relative to the current working directory.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
_id = self._id_by_path[old_path]
|
||||
model = self._file_model[old_name]
|
||||
cat = self._file_category[old_name]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
del self._id_by_path[old_path]
|
||||
self._id_by_path[new_path] = _id
|
||||
self._path_by_id[_id] = new_path
|
||||
del self._file_model[old_name]
|
||||
del self._file_category[old_name]
|
||||
del self._file_path[old_name]
|
||||
self.add_file(new_name, model, cat, new_path)
|
||||
|
||||
def remove_file(self, path):
|
||||
def remove_file(self, name):
|
||||
"""
|
||||
remove a file from the list.
|
||||
|
||||
the method does not delete the file from the file system.
|
||||
|
||||
@param path: must match an existing file path identically.
|
||||
if path is not in the list, the method does nothing.
|
||||
@param name: must match an existing file name identically.
|
||||
if the name is not found in the list, the method does nothing.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
_id = self._id_by_path[path]
|
||||
del self._file_model[name]
|
||||
del self._file_category[name]
|
||||
del self._file_path[name]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
del self._id_by_path[path]
|
||||
del self._path_by_id[_id]
|
||||
del self._model_by_id[_id]
|
||||
del self._category_by_id[_id]
|
||||
|
||||
def update_model_rfac(self, model, rfac):
|
||||
"""
|
||||
@ -207,18 +237,19 @@ class FileTracker(object):
|
||||
@param complete: (bool) True if all calculations of the model are complete (files can be deleted).
|
||||
@return: None
|
||||
"""
|
||||
self._complete_by_model[model] = complete
|
||||
if complete:
|
||||
self._complete_models.add(model)
|
||||
else:
|
||||
self._complete_models.discard(model)
|
||||
|
||||
def delete_files(self, categories=None, keep_rfac=0):
|
||||
def delete_files(self, categories=None):
|
||||
"""
|
||||
delete the files matching the list of categories.
|
||||
|
||||
@param categories: set of file categories to delete.
|
||||
may include 'rfac' if bad r-factors should be deleted additionally (regardless of static category).
|
||||
defaults to self.categories_to_delete.
|
||||
@version this method does not act on the 'rfac' category.
|
||||
|
||||
@param keep_rfac: number of best models to keep if bad r-factors are to be deleted.
|
||||
the effective keep number is the greater of self.keep_rfac and this argument.
|
||||
@param categories: set of file categories to delete.
|
||||
defaults to self.categories_to_delete.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
@ -226,8 +257,6 @@ class FileTracker(object):
|
||||
categories = self.categories_to_delete
|
||||
for cat in categories:
|
||||
self.delete_category(cat)
|
||||
if 'rfac' in categories:
|
||||
self.delete_bad_rfac(keep=keep_rfac)
|
||||
|
||||
def delete_bad_rfac(self, keep=0, force_delete=False):
|
||||
"""
|
||||
@ -252,8 +281,6 @@ class FileTracker(object):
|
||||
@param force_delete: delete the bad files even if 'rfac' is not selected in categories_to_delete.
|
||||
|
||||
@return: None
|
||||
|
||||
@todo should clean up rfac and model dictionaries from time to time.
|
||||
"""
|
||||
if force_delete or 'rfac' in self.categories_to_delete:
|
||||
keep = max(keep, self.keep_rfac)
|
||||
@ -263,12 +290,45 @@ class FileTracker(object):
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
complete_models = {_model for (_model, _complete) in self._complete_by_model.iteritems() if _complete}
|
||||
del_models = {_model for (_model, _rfac) in self._rfac_by_model.iteritems() if _rfac > rfac_split}
|
||||
del_models &= complete_models
|
||||
del_ids = {_id for (_id, _model) in self._model_by_id.iteritems() if _model in del_models}
|
||||
for _id in del_ids:
|
||||
self.delete_file(_id)
|
||||
keep_models = {model for (model, rfac) in self._rfac_by_model.items() if 0.0 <= rfac <= rfac_split}
|
||||
del_models = self._complete_models - keep_models
|
||||
del_names = {name for (name, model) in self._file_model.items() if model in del_models}
|
||||
for name in del_names:
|
||||
self.delete_file(name)
|
||||
|
||||
def delete_models(self, keep=None, delete=None):
|
||||
"""
|
||||
delete all files by model.
|
||||
|
||||
this involves the following steps:
|
||||
1. determine a list of complete models
|
||||
(incomplete models are still being processed and must not be deleted).
|
||||
2. intersect with the _delete_ list if specified.
|
||||
3. subtract the _keep_ list if specified.
|
||||
|
||||
if neither the _keep_ nor the _delete_ list is specified,
|
||||
or if the steps above resolve to the _complete_ list
|
||||
the method considers it as an error and does nothing.
|
||||
|
||||
@param keep: (sequence) model numbers to keep, i.e., delete all others.
|
||||
|
||||
@param delete: (sequence) model numbers to delete.
|
||||
|
||||
@return (int) number of models deleted.
|
||||
"""
|
||||
del_models = self._complete_models.copy()
|
||||
if delete:
|
||||
del_models &= delete
|
||||
if keep:
|
||||
del_models -= keep
|
||||
if not del_models or del_models == self._complete_models:
|
||||
return 0
|
||||
|
||||
del_names = {name for (name, model) in self._file_model.items() if model in del_models}
|
||||
for name in del_names:
|
||||
self.delete_file(name)
|
||||
|
||||
return len(del_models)
|
||||
|
||||
def delete_category(self, category):
|
||||
"""
|
||||
@ -280,45 +340,38 @@ class FileTracker(object):
|
||||
|
||||
@return: None
|
||||
"""
|
||||
complete_models = {_model for (_model, _complete) in self._complete_by_model.iteritems() if _complete}
|
||||
del_ids = {_id for (_id, cat) in self._category_by_id.iteritems() if cat == category}
|
||||
del_ids &= {_id for (_id, _model) in self._model_by_id.iteritems() if _model in complete_models}
|
||||
for _id in del_ids:
|
||||
self.delete_file(_id)
|
||||
del_names = {name for (name, cat) in self._file_category.items() if cat == category}
|
||||
del_names &= {name for (name, model) in self._file_model.items() if model in self._complete_models}
|
||||
for name in del_names:
|
||||
self.delete_file(name)
|
||||
|
||||
def delete_file(self, _id):
|
||||
def delete_file(self, name):
|
||||
"""
|
||||
delete a specified file from the list and the file system.
|
||||
|
||||
the file is identified by ID number.
|
||||
this method is unconditional. it does not consider category, completeness, nor R-factor.
|
||||
|
||||
@param _id: (int) ID number of the file to delete.
|
||||
the method catches errors during file deletion and prints warnings to the logger.
|
||||
|
||||
@param name: must match an existing file path identically.
|
||||
if it is not in the list, the method does nothing.
|
||||
the method uses the associated path declared in add_file() to delete the file.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
path = self._path_by_id[_id]
|
||||
cat = self._category_by_id[_id]
|
||||
model = self._model_by_id[_id]
|
||||
del self._id_by_path[path]
|
||||
del self._path_by_id[_id]
|
||||
del self._model_by_id[_id]
|
||||
del self._category_by_id[_id]
|
||||
try:
|
||||
self._os_delete_file(path)
|
||||
except OSError:
|
||||
logger.warning("error deleting file {0}".format(path))
|
||||
cat = self._file_category[name]
|
||||
model = self._file_model[name]
|
||||
path = self._file_path[name]
|
||||
except KeyError:
|
||||
logger.warning("tried to delete untracked file {0}".format(name))
|
||||
else:
|
||||
logger.debug("delete file {0} ({1}, model {2})".format(path, cat, model))
|
||||
|
||||
@staticmethod
|
||||
def _os_delete_file(path):
|
||||
"""
|
||||
have the operating system delete a file path.
|
||||
|
||||
this function is separate so that we can mock it in unit tests.
|
||||
|
||||
@param path: OS path
|
||||
@return: None
|
||||
"""
|
||||
os.remove(path)
|
||||
del self._file_model[name]
|
||||
del self._file_category[name]
|
||||
del self._file_path[name]
|
||||
try:
|
||||
os.remove(path)
|
||||
except OSError:
|
||||
logger.warning("file system error deleting file {0}".format(path))
|
||||
else:
|
||||
logger.debug("delete file {0} ({1}, model {2})".format(path, cat, model))
|
||||
|
0
pmsco/graphics/__init__.py
Normal file
0
pmsco/graphics/__init__.py
Normal file
198
pmsco/graphics/rfactor.py
Normal file
198
pmsco/graphics/rfactor.py
Normal file
@ -0,0 +1,198 @@
|
||||
"""
|
||||
@package pmsco.graphics.rfactor
|
||||
graphics rendering module for r-factor optimization results.
|
||||
|
||||
this module is under development.
|
||||
interface and implementation are subject to change.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2018 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
||||
except ImportError:
|
||||
Figure = None
|
||||
FigureCanvas = None
|
||||
logger.warning("error importing matplotlib. graphics rendering disabled.")
|
||||
|
||||
|
||||
def render_param_rfac(filename, data, param_name, summary=None, canvas=None):
|
||||
"""
|
||||
render an r-factor versus one model parameter graph.
|
||||
|
||||
the default file format is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param filename: path and name of the results file.
|
||||
this is used to derive the output file path by adding the parameter name and
|
||||
the extension of the graphics file format.
|
||||
|
||||
@param data: numpy-structured array of results (one-dimensional).
|
||||
|
||||
the field names identify the model parameters and optimization control values.
|
||||
model parameters can have any name not including a leading underscore and are evaluated as is.
|
||||
the names of the special optimization control values begin with an underscore.
|
||||
of these, at least _rfac must be provided.
|
||||
|
||||
@param param_name: name of the model parameter to display.
|
||||
this must correspond to a field name of the data array.
|
||||
|
||||
@param summary: (dict) the dictionary returned by @ref evaluate_results.
|
||||
this is used to mark the optimum value and the error limits.
|
||||
if None, these values are not marked in the plot.
|
||||
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
if canvas is None:
|
||||
canvas = FigureCanvas
|
||||
fig = Figure()
|
||||
canvas(fig)
|
||||
|
||||
ax = fig.add_subplot(111)
|
||||
ax.scatter(data[param_name], data['_rfac'], c='b', marker='o', s=4.0)
|
||||
|
||||
if summary is not None:
|
||||
xval = summary['val'][param_name]
|
||||
ymin = summary['vmin']['_rfac']
|
||||
ymax = summary['vmax']['_rfac']
|
||||
ax.plot((xval, xval), (ymin, ymax), ':k')
|
||||
xmin = summary['vmin'][param_name]
|
||||
xmax = summary['vmax'][param_name]
|
||||
varr = summary['rmin'] + summary['rvar']
|
||||
ax.plot((xmin, xmax), (varr, varr), ':k')
|
||||
|
||||
ax.grid(True)
|
||||
ax.set_xlabel(param_name)
|
||||
ax.set_ylabel('R-factor')
|
||||
|
||||
out_filename = "{0}.{1}.{2}".format(filename, param_name, canvas.get_default_filetype())
|
||||
fig.savefig(out_filename)
|
||||
return out_filename
|
||||
|
||||
|
||||
def evaluate_results(data, features=50.):
|
||||
"""
|
||||
@param data: numpy-structured array of results (one-dimensional).
|
||||
|
||||
the field names identify the model parameters and optimization control values.
|
||||
model parameters can have any name not including a leading underscore and are evaluated as is.
|
||||
the names of the special optimization control values begin with an underscore.
|
||||
of these, at least _rfac must be provided.
|
||||
|
||||
@param features: number of independent features (pieces of information) in the data.
|
||||
this quantity can be approximated as the scan range divided by the average width of a feature
|
||||
which includes an intrinsic component and the instrumental resolution.
|
||||
see Booth et al., Surf. Sci. 387 (1997), 152 for energy scans, and
|
||||
Muntwiler et al., Surf. Sci. 472 (2001), 125 for angle scans.
|
||||
the default value of 50 is a typical value.
|
||||
|
||||
@return dictionary of evaluation results.
|
||||
|
||||
the dictionary contains scalars and structured arrays as follows.
|
||||
the structured arrays have the same data type as the input data and contain exactly one element.
|
||||
|
||||
@arg rmin: (scalar) minimum r-factor.
|
||||
@arg rvar: (scalar) one-sigma variation of r-factor.
|
||||
@arg imin: (scalar) array index where the minimum is located.
|
||||
@arg val: (structured array) estimates of parameter values (parameter value at rmin).
|
||||
@arg sig: (structured array) one-sigma error of estimated values.
|
||||
@arg vmin: (structured array) minimum value of the parameter.
|
||||
@arg vmax: (structured array) maximum value of the parameter.
|
||||
"""
|
||||
imin = data['_rfac'].argmin()
|
||||
rmin = data['_rfac'][imin]
|
||||
rvar = rmin * math.sqrt(2. / float(features))
|
||||
|
||||
val = np.zeros(1, dtype=data.dtype)
|
||||
sig = np.zeros(1, dtype=data.dtype)
|
||||
vmin = np.zeros(1, dtype=data.dtype)
|
||||
vmax = np.zeros(1, dtype=data.dtype)
|
||||
sel = data['_rfac'] <= rmin + rvar
|
||||
for name in data.dtype.names:
|
||||
val[name] = data[name][imin]
|
||||
vmin[name] = data[name].min()
|
||||
vmax[name] = data[name].max()
|
||||
if name[0] != '_':
|
||||
sig[name] = (data[name][sel].max() - data[name][sel].min()) / 2.
|
||||
|
||||
results = {'rmin': rmin, 'rvar': rvar, 'imin': imin, 'val': val, 'sig': sig, 'vmin': vmin, 'vmax': vmax}
|
||||
return results
|
||||
|
||||
|
||||
def render_results(results_file, data=None):
|
||||
"""
|
||||
produce a graphics file from optimization results.
|
||||
|
||||
the results can be passed in a file name or numpy array (see parameter section).
|
||||
|
||||
the default file format is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function will log a warning message and return gracefully.
|
||||
|
||||
@param results_file: path and name of the result file.
|
||||
|
||||
result files are the ones written by swarm.SwarmPopulation.save_array, for instance.
|
||||
the file contains columns of model parameters and optimization control values.
|
||||
the first row must contain column names that identify the quantity.
|
||||
model parameters can have any name not including a leading underscore and are evaluated as is.
|
||||
the names of the special optimization control values begin with an underscore.
|
||||
of these, at least _rfac must be provided.
|
||||
|
||||
if the optional data parameter is present,
|
||||
this is used only to derive the output file path by adding the extension of the graphics file format.
|
||||
|
||||
@param data: numpy-structured array of results (one-dimensional).
|
||||
|
||||
the field names identify the model parameters and optimization control values.
|
||||
model parameters can have any name not including a leading underscore and are evaluated as is.
|
||||
the names of the special optimization control values begin with an underscore.
|
||||
of these, at least _rfac must be provided.
|
||||
|
||||
if this argument is omitted, the data is loaded from the file referenced by the filename argument.
|
||||
|
||||
@return (list of str) path names of the generated graphics files.
|
||||
empty if an error occurred.
|
||||
the most common exceptions are caught and add a warning in the log file.
|
||||
"""
|
||||
|
||||
if data is None:
|
||||
data = np.genfromtxt(results_file, names=True)
|
||||
|
||||
summary = evaluate_results(data)
|
||||
|
||||
out_files = []
|
||||
try:
|
||||
for name in data.dtype.names:
|
||||
if name[0] != '_' and summary['sig'][name] > 0.:
|
||||
graph_file = render_param_rfac(results_file, data, name, summary)
|
||||
out_files.append(graph_file)
|
||||
except (TypeError, AttributeError, IOError) as e:
|
||||
logger.warning(BMsg("error rendering scan file {file}: {msg}", file=results_file, msg=str(e)))
|
||||
|
||||
return out_files
|
273
pmsco/graphics/scan.py
Normal file
273
pmsco/graphics/scan.py
Normal file
@ -0,0 +1,273 @@
|
||||
"""
|
||||
@package pmsco.graphics.scan
|
||||
graphics rendering module for energy and angle scans.
|
||||
|
||||
this module is experimental.
|
||||
interface and implementation are subject to change.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2018 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
import pmsco.data as md
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
||||
# from matplotlib.backends.backend_pdf import FigureCanvasPdf
|
||||
# from matplotlib.backends.backend_svg import FigureCanvasSVG
|
||||
except ImportError:
|
||||
Figure = None
|
||||
FigureCanvas = None
|
||||
logger.warning("error importing matplotlib. graphics rendering disabled.")
|
||||
|
||||
|
||||
def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
|
||||
"""
|
||||
produce a graphics file from a one-dimensional scan file.
|
||||
|
||||
the default file format is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param filename: path and name of the scan file.
|
||||
this is used to derive the output file path by adding the extension of the graphics file format.
|
||||
@param data: numpy-structured array of EI, ETPI or ETPAI data.
|
||||
@param scan_mode: list containing the field name of the scanning axis of the data array.
|
||||
it must contain one element exactly.
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
@param is_modf: whether data contains a modulation function (True) or intensity (False, default).
|
||||
this parameter is used to set axis labels.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
if canvas is None:
|
||||
canvas = FigureCanvas
|
||||
fig = Figure()
|
||||
canvas(fig)
|
||||
|
||||
ax = fig.add_subplot(111)
|
||||
ax.plot(data[scan_mode[0]], data['i'])
|
||||
|
||||
ax.set_xlabel(scan_mode[0])
|
||||
if is_modf:
|
||||
ax.set_ylabel('chi')
|
||||
else:
|
||||
ax.set_ylabel('int')
|
||||
|
||||
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
|
||||
fig.savefig(out_filename)
|
||||
return out_filename
|
||||
|
||||
|
||||
def render_ea_scan(filename, data, scan_mode, canvas=None, is_modf=False):
|
||||
"""
|
||||
produce a graphics file from an energy-angle scan file.
|
||||
|
||||
the default file format is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param filename: path and name of the scan file.
|
||||
this is used to derive the output file path by adding the extension of the graphics file format.
|
||||
@param data: numpy-structured array of ETPI or ETPAI data.
|
||||
@param scan_mode: list containing the field names of the scanning axes of the data array,
|
||||
i.e. 'e' and one of the angle axes.
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
@param is_modf: whether data contains a modulation function (True) or intensity (False, default).
|
||||
this parameter is used to select a suitable color scale.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
(data2d, axis0, axis1) = md.reshape_2d(data, scan_mode, 'i')
|
||||
|
||||
if canvas is None:
|
||||
canvas = FigureCanvas
|
||||
fig = Figure()
|
||||
canvas(fig)
|
||||
|
||||
ax = fig.add_subplot(111)
|
||||
im = ax.imshow(data2d, origin='lower', aspect='auto', interpolation='none')
|
||||
im.set_extent((axis1[0], axis1[-1], axis0[0], axis0[-1]))
|
||||
|
||||
ax.set_xlabel(scan_mode[1])
|
||||
ax.set_ylabel(scan_mode[0])
|
||||
|
||||
cb = fig.colorbar(im, shrink=0.4, pad=0.1)
|
||||
|
||||
dlo = np.nanpercentile(data['i'], 1)
|
||||
dhi = np.nanpercentile(data['i'], 99)
|
||||
if is_modf:
|
||||
im.set_cmap("RdBu_r")
|
||||
dhi = max(abs(dlo), abs(dhi))
|
||||
dlo = -dhi
|
||||
im.set_clim((dlo, dhi))
|
||||
try:
|
||||
# requires matplotlib 2.1.0
|
||||
ti = cb.get_ticks()
|
||||
ti = [min(ti), 0., max(ti)]
|
||||
cb.set_ticks(ti)
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
im.set_cmap("magma")
|
||||
im.set_clim((dlo, dhi))
|
||||
|
||||
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
|
||||
fig.savefig(out_filename)
|
||||
return out_filename
|
||||
|
||||
|
||||
def render_tp_scan(filename, data, canvas=None, is_modf=False):
|
||||
"""
|
||||
produce a graphics file from an theta-phi (hemisphere) scan file.
|
||||
|
||||
the default file format is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param filename: path and name of the scan file.
|
||||
this is used to derive the output file path by adding the extension of the graphics file format.
|
||||
@param data: numpy-structured array of TPI data.
|
||||
the T and P columns describes a full or partial hemispherical scan.
|
||||
the I column contains the intensity or modulation values.
|
||||
other columns are ignored.
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
@param is_modf: whether data contains a modulation function (True) or intensity (False, default).
|
||||
this parameter is used to select a suitable color scale.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
if canvas is None:
|
||||
canvas = FigureCanvas
|
||||
fig = Figure()
|
||||
canvas(fig)
|
||||
|
||||
ax = fig.add_subplot(111, projection='polar')
|
||||
|
||||
data = data[data['t'] <= 89.0]
|
||||
# stereographic projection
|
||||
rd = 2 * np.tan(np.radians(data['t']) / 2)
|
||||
drdt = 1 + np.tan(np.radians(data['t']) / 2)**2
|
||||
|
||||
# http://matplotlib.org/api/collections_api.html#matplotlib.collections.PathCollection
|
||||
pc = ax.scatter(data['p'] * math.pi / 180., rd, c=data['i'], lw=0, alpha=1.)
|
||||
|
||||
# interpolate marker size between 4 and 9 (for theta step = 1)
|
||||
unique_theta = np.unique(data['t'])
|
||||
theta_step = (np.max(unique_theta) - np.min(unique_theta)) / unique_theta.shape[0]
|
||||
sz = np.ones_like(pc.get_sizes()) * drdt * 4.5 * theta_step**2
|
||||
pc.set_sizes(sz)
|
||||
|
||||
# xticks = angles where grid lines are displayed (in radians)
|
||||
ax.set_xticks([])
|
||||
# rticks = radii where grid lines (circles) are displayed
|
||||
ax.set_rticks([])
|
||||
ax.set_rmax(2.0)
|
||||
|
||||
cb = fig.colorbar(pc, shrink=0.4, pad=0.1)
|
||||
|
||||
dlo = np.nanpercentile(data['i'], 2)
|
||||
dhi = np.nanpercentile(data['i'], 98)
|
||||
if is_modf:
|
||||
pc.set_cmap("RdBu_r")
|
||||
# im.set_cmap("coolwarm")
|
||||
dhi = max(abs(dlo), abs(dhi))
|
||||
dlo = -dhi
|
||||
pc.set_clim((dlo, dhi))
|
||||
try:
|
||||
# requires matplotlib 2.1.0
|
||||
ti = cb.get_ticks()
|
||||
ti = [min(ti), 0., max(ti)]
|
||||
cb.set_ticks(ti)
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
pc.set_cmap("magma")
|
||||
# im.set_cmap("inferno")
|
||||
# im.set_cmap("viridis")
|
||||
pc.set_clim((dlo, dhi))
|
||||
ti = cb.get_ticks()
|
||||
ti = [min(ti), max(ti)]
|
||||
cb.set_ticks(ti)
|
||||
|
||||
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
|
||||
fig.savefig(out_filename)
|
||||
return out_filename
|
||||
|
||||
|
||||
def render_scan(filename, data=None):
|
||||
"""
|
||||
produce a graphics file from a scan file.
|
||||
|
||||
the default file format is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function will log a warning message and return gracefully.
|
||||
|
||||
@param filename: path and name of the scan file.
|
||||
the file must have one of the formats supported by pmsco.data.load_data().
|
||||
it must contain a single scan (not the combined scan from the model level of PMSCO).
|
||||
supported are all one-dimensional linear scans,
|
||||
and two-dimensional energy-angle scans (each axis must be linear).
|
||||
hemispherical scans are currently not supported.
|
||||
the filename should include ".modf" if the data contains a modulation function rather than intensity.
|
||||
|
||||
if the optional data parameter is present,
|
||||
this is used only to derive the output file path by adding the extension of the graphics file format.
|
||||
|
||||
@param data: numpy-structured array of ETPI or ETPAI data.
|
||||
if this argument is omitted, the data is loaded from the file referenced by the filename argument.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
"""
|
||||
if data is None:
|
||||
data = md.load_data(filename)
|
||||
scan_mode, scan_positions = md.detect_scan_mode(data)
|
||||
is_modf = filename.find(".modf") >= 0
|
||||
|
||||
try:
|
||||
if len(scan_mode) == 1:
|
||||
out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf)
|
||||
elif len(scan_mode) == 2 and 'e' in scan_mode:
|
||||
out_filename = render_ea_scan(filename, data, scan_mode, is_modf=is_modf)
|
||||
elif len(scan_mode) == 2 and 't' in scan_mode and 'p' in scan_mode:
|
||||
out_filename = render_tp_scan(filename, data, is_modf=is_modf)
|
||||
else:
|
||||
out_filename = ""
|
||||
logger.warning(BMsg("no render function for scan file {file}", file=filename))
|
||||
except (TypeError, AttributeError, IOError) as e:
|
||||
out_filename = ""
|
||||
logger.warning(BMsg("error rendering scan file {file}: {msg}", file=filename, msg=str(e)))
|
||||
|
||||
return out_filename
|
@ -40,21 +40,28 @@ the scan and symmetry handlers call methods of the project class to invoke proje
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-17 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime
|
||||
import os
|
||||
from functools import reduce
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
import data as md
|
||||
from helpers import BraceMessage as BMsg
|
||||
import os
|
||||
|
||||
from pmsco.compat import open
|
||||
import pmsco.data as md
|
||||
import pmsco.graphics.scan as mgs
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -66,10 +73,10 @@ class TaskHandler(object):
|
||||
this class defines the common interface of task handlers.
|
||||
"""
|
||||
|
||||
## @var project
|
||||
## @var _project
|
||||
# (Project) project instance.
|
||||
|
||||
## @var slots
|
||||
## @var _slots
|
||||
# (int) number of calculation slots (processes).
|
||||
#
|
||||
# for best efficiency the number of tasks generated should be greater or equal the number of slots.
|
||||
@ -93,7 +100,7 @@ class TaskHandler(object):
|
||||
# the dictionary keys are the task identifiers CalculationTask.id,
|
||||
# the values are the corresponding CalculationTask objects.
|
||||
|
||||
## @var invalid_count (int)
|
||||
## @var _invalid_count (int)
|
||||
# accumulated total number of invalid results received.
|
||||
#
|
||||
# the number is incremented by add_result if an invalid task is reported.
|
||||
@ -188,21 +195,22 @@ class TaskHandler(object):
|
||||
the id, model, and files attributes are required.
|
||||
if model contains a '_rfac' value, the r-factor is
|
||||
|
||||
@return: None
|
||||
@return None
|
||||
"""
|
||||
model_id = task.id.model
|
||||
for path, cat in task.files.iteritems():
|
||||
for path, cat in task.files.items():
|
||||
self._project.files.add_file(path, model_id, category=cat)
|
||||
|
||||
def cleanup_files(self, keep=10):
|
||||
def cleanup_files(self, keep=0):
|
||||
"""
|
||||
delete uninteresting files.
|
||||
|
||||
@param: number of best ranking models to keep.
|
||||
@param keep: minimum number of models to keep.
|
||||
0 (default): leave the decision to the project.
|
||||
|
||||
@return: None
|
||||
@return None
|
||||
"""
|
||||
self._project.files.delete_files(keep_rfac=keep)
|
||||
self._project.cleanup_files(keep=keep)
|
||||
|
||||
|
||||
class ModelHandler(TaskHandler):
|
||||
@ -255,6 +263,22 @@ class ModelHandler(TaskHandler):
|
||||
|
||||
return None
|
||||
|
||||
def save_report(self, root_task):
|
||||
"""
|
||||
generate a final report of the optimization procedure.
|
||||
|
||||
detailed calculation results are usually saved as soon as they become available.
|
||||
this method may be implemented in sub-classes to aggregate and summarize the results, generate plots, etc.
|
||||
in this class, the method does nothing.
|
||||
|
||||
@note: implementations must add the path names of generated files to self._project.files.
|
||||
|
||||
@param root_task: (CalculationTask) task with initial model parameters.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class SingleModelHandler(ModelHandler):
|
||||
"""
|
||||
@ -263,6 +287,10 @@ class SingleModelHandler(ModelHandler):
|
||||
this class runs a single calculation on the start parameters defined in the domain of the project.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(SingleModelHandler, self).__init__()
|
||||
self.result = {}
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
start one task with the start parameters.
|
||||
@ -316,25 +344,18 @@ class SingleModelHandler(ModelHandler):
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.file_root + modf_ext
|
||||
|
||||
rfac = 1.0
|
||||
if task.result_valid:
|
||||
try:
|
||||
rfac = self._project.calc_rfactor(task)
|
||||
except ValueError:
|
||||
task.result_valid = False
|
||||
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
||||
assert not math.isnan(task.rfac)
|
||||
self.result = task.model.copy()
|
||||
self.result['_rfac'] = task.rfac
|
||||
|
||||
task.model['_rfac'] = rfac
|
||||
self.save_report_file(task.model)
|
||||
|
||||
self._project.files.update_model_rfac(task.id.model, rfac)
|
||||
self._project.files.update_model_rfac(task.id.model, task.rfac)
|
||||
self._project.files.set_model_complete(task.id.model, True)
|
||||
|
||||
parent_task.time = task.time
|
||||
|
||||
return parent_task
|
||||
|
||||
def save_report_file(self, result):
|
||||
def save_report(self, root_task):
|
||||
"""
|
||||
save model parameters and r-factor to a file.
|
||||
|
||||
@ -343,20 +364,25 @@ class SingleModelHandler(ModelHandler):
|
||||
the first line contains the parameter names.
|
||||
this is the same format as used by the swarm and grid handlers.
|
||||
|
||||
@param result: dictionary of results and parameters. the values should be scalars and strings.
|
||||
@param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
keys = [key for key in result]
|
||||
super(SingleModelHandler, self).save_report(root_task)
|
||||
|
||||
keys = [key for key in self.result]
|
||||
keys.sort(key=lambda t: t[0].lower())
|
||||
vals = (str(result[key]) for key in keys)
|
||||
with open(self._project.output_file + ".dat", "w") as outfile:
|
||||
vals = (str(self.result[key]) for key in keys)
|
||||
filename = self._project.output_file + ".dat"
|
||||
with open(filename, "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(keys))
|
||||
outfile.write("\n")
|
||||
outfile.write(" ".join(vals))
|
||||
outfile.write("\n")
|
||||
|
||||
self._project.files.add_file(filename, root_task.id.model, "report")
|
||||
|
||||
|
||||
class ScanHandler(TaskHandler):
|
||||
"""
|
||||
@ -388,6 +414,30 @@ class ScanHandler(TaskHandler):
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the scan task handler and save processed experimental scans.
|
||||
"""
|
||||
super(ScanHandler, self).setup(project, slots)
|
||||
|
||||
for (i_scan, scan) in enumerate(self._project.scans):
|
||||
if scan.modulation is not None:
|
||||
__, filename = os.path.split(scan.filename)
|
||||
pre, ext = os.path.splitext(filename)
|
||||
filename = "{pre}_{scan}.modf{ext}".format(pre=pre, ext=ext, scan=i_scan)
|
||||
filepath = os.path.join(self._project.output_dir, filename)
|
||||
md.save_data(filepath, scan.modulation)
|
||||
mgs.render_scan(filepath, data=scan.modulation)
|
||||
|
||||
if project.combined_scan is not None:
|
||||
ext = md.format_extension(project.combined_scan)
|
||||
filename = project.output_file + ext
|
||||
md.save_data(filename, project.combined_scan)
|
||||
if project.combined_modf is not None:
|
||||
ext = md.format_extension(project.combined_modf)
|
||||
filename = project.output_file + ".modf" + ext
|
||||
md.save_data(filename, project.combined_modf)
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each scan of the given parent task.
|
||||
@ -464,6 +514,7 @@ class ScanHandler(TaskHandler):
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_scans(parent_task, child_tasks)
|
||||
self._project.evaluate_result(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'model')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'model')
|
||||
|
||||
@ -575,8 +626,11 @@ class SymmetryHandler(TaskHandler):
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_symmetries(parent_task, child_tasks)
|
||||
self._project.evaluate_result(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
|
||||
graph_file = mgs.render_scan(parent_task.modf_filename)
|
||||
self._project.files.add_file(graph_file, parent_task.id.model, 'scan')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
@ -621,7 +675,7 @@ class EmitterHandler(TaskHandler):
|
||||
|
||||
all emitters share the same model parameters.
|
||||
|
||||
@return list of @ref CalculationTask objects with one element per emitter configuration
|
||||
@return list of @ref pmsco.dispatch.CalculationTask objects with one element per emitter configuration
|
||||
if parallel processing is enabled.
|
||||
otherwise the list contains a single CalculationTask object with emitter index 0.
|
||||
the emitter index is used by the project's create_cluster method.
|
||||
@ -634,10 +688,7 @@ class EmitterHandler(TaskHandler):
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
n_emitters = self._project.cluster_generator.count_emitters(parent_task.model, parent_task.id)
|
||||
if n_emitters > 1 and self._slots > 1:
|
||||
emitters = range(1, n_emitters + 1)
|
||||
else:
|
||||
emitters = [0]
|
||||
emitters = range(n_emitters)
|
||||
|
||||
out_tasks = []
|
||||
for em in emitters:
|
||||
@ -698,8 +749,11 @@ class EmitterHandler(TaskHandler):
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_emitters(parent_task, child_tasks)
|
||||
self._project.evaluate_result(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
|
||||
graph_file = mgs.render_scan(parent_task.modf_filename)
|
||||
self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
@ -776,15 +830,10 @@ class RegionHandler(TaskHandler):
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
stack1 = [md.load_data(t.result_filename) for t in child_tasks]
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(d, dtype) for d in stack1]
|
||||
result_data = np.hstack(tuple(stack2))
|
||||
md.sort_data(result_data)
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
self._project.combine_regions(parent_task, child_tasks)
|
||||
self._project.evaluate_result(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, "emitter")
|
||||
for t in child_tasks:
|
||||
self._project.files.remove_file(t.result_filename)
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, "emitter")
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
@ -840,7 +889,7 @@ class EnergyRegionHandler(RegionHandler):
|
||||
so that all child tasks of the same parent finish approximately in the same time.
|
||||
pure angle scans are not split.
|
||||
|
||||
to use this feature, the project assigns this class to its @ref handler_classes['region'].
|
||||
to use this feature, the project assigns this class to its @ref pmsco.project.Project.handler_classes['region'].
|
||||
it is safe to use this handler for calculations that do not involve energy scans.
|
||||
the handler is best used for single calculations.
|
||||
in optimizations that calculate many models there is no advantage in using it
|
||||
|
@ -1,4 +1,20 @@
|
||||
class BraceMessage:
|
||||
"""
|
||||
@package pmsco.helpers
|
||||
helper classes
|
||||
|
||||
a collection of small and generic code bits mostly collected from the www.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class BraceMessage(object):
|
||||
"""
|
||||
a string formatting proxy class useful for logging and exceptions.
|
||||
|
||||
use BraceMessage("{0} {1}", "formatted", "message")
|
||||
in place of "{0} {1}".format("formatted", "message").
|
||||
the advantage is that the format method is called only if the string is actually used.
|
||||
"""
|
||||
def __init__(self, fmt, *args, **kwargs):
|
||||
self.fmt = fmt
|
||||
self.args = args
|
||||
|
@ -5,45 +5,42 @@ SHELL=/bin/sh
|
||||
# required libraries: libblas, liblapack, libf2c
|
||||
# (you may have to set soft links so that linker finds them)
|
||||
#
|
||||
# the makefile calls python-config to get the compilation flags and include path.
|
||||
# you may override the corresponding variables on the command line or by environment variables:
|
||||
#
|
||||
# PYTHON_INC: specify additional include directories. each dir must start with -I prefix.
|
||||
# PYTHON_CFLAGS: specify the C compiler flags.
|
||||
#
|
||||
# see the top-level makefile for additional information.
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .x
|
||||
.PHONY: all loess test gas madeup ethanol air galaxy
|
||||
|
||||
HOST=$(shell hostname)
|
||||
CFLAGS=-O
|
||||
FFLAGS=-O
|
||||
OBJ=loessc.o loess.o predict.o misc.o loessf.o dqrsl.o dsvdc.o fix_main.o
|
||||
|
||||
FFLAGS?=-O
|
||||
LIB=-lblas -lm -lf2c
|
||||
LIBPATH=
|
||||
CC=gcc
|
||||
CCOPTS=
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHONOPTS=
|
||||
ifneq (,$(filter merlin%,$(HOST)))
|
||||
PYTHONINC=-I/usr/include/python2.7 -I/opt/python/python-2.7.5/include/python2.7/
|
||||
else ifneq (,$(filter ra%,$(HOST)))
|
||||
PYTHONINC=-I${PSI_PYTHON27_INCLUDE_DIR}/python2.7 -I${PSI_PYTHON27_LIBRARY_DIR}/python2.7/site-packages/numpy/core/include
|
||||
else
|
||||
PYTHONINC=-I/usr/include/python2.7
|
||||
endif
|
||||
LIBPATH?=
|
||||
CC?=gcc
|
||||
CCOPTS?=
|
||||
SWIG?=swig
|
||||
SWIGOPTS?=
|
||||
PYTHON?=python
|
||||
PYTHONOPTS?=
|
||||
PYTHON_CONFIG = ${PYTHON}-config
|
||||
#PYTHON_LIB ?= $(shell ${PYTHON_CONFIG} --libs)
|
||||
#PYTHON_INC ?= $(shell ${PYTHON_CONFIG} --includes)
|
||||
PYTHON_INC ?=
|
||||
PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
|
||||
#PYTHON_LDFLAGS ?= $(shell ${PYTHON_CONFIG} --ldflags)
|
||||
|
||||
all: loess
|
||||
|
||||
loess: _loess.so
|
||||
|
||||
loess_wrap.c: loess.c loess.i
|
||||
$(SWIG) $(SWIGOPTS) -python loess.i
|
||||
|
||||
loess.py _loess.so: loess_wrap.c
|
||||
# setuptools doesn't handle the fortran files correctly
|
||||
# $(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
|
||||
$(CC) $(CFLAGS) -fpic -c loessc.c loess.c predict.c misc.c loessf.f dqrsl.f dsvdc.f fix_main.c
|
||||
$(CC) $(CFLAGS) -fpic -c loess_wrap.c $(PYTHONINC)
|
||||
$(CC) -shared $(OBJ) $(LIB) $(LIBPATH) loess_wrap.o -o _loess.so
|
||||
loess.py _loess.so: loess.c loess.i
|
||||
$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
|
||||
|
||||
examples: gas madeup ethanol air galaxy
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = 'Matthias Muntwiler'
|
||||
|
||||
"""
|
||||
@package loess.setup
|
||||
setup.py file for LOESS
|
||||
@ -17,39 +15,49 @@ the Python wrapper was set up by M. Muntwiler
|
||||
with the help of the SWIG toolkit
|
||||
and other incredible goodies available in the Linux world.
|
||||
|
||||
@bug this file is currently not used because
|
||||
distutils does not compile the included Fortran files.
|
||||
@bug numpy.distutils.build_src in python 2.7 treats all Fortran files with f2py
|
||||
so that they are compiled via both f2py and swig.
|
||||
this produces extra object files which cause the linker to fail.
|
||||
to fix this issue, this module hacks the build_src class.
|
||||
this hack does not work with python 3. perhaps it's even unnecessary.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from distutils.core import setup, Extension
|
||||
from distutils import sysconfig
|
||||
|
||||
import numpy
|
||||
try:
|
||||
numpy_include = numpy.get_include()
|
||||
except AttributeError:
|
||||
numpy_include = numpy.get_numpy_include()
|
||||
|
||||
loess_module = Extension('_loess',
|
||||
sources=['loess.i', 'loess_wrap.c', 'loess.c', 'loessc.c', 'predict.c', 'misc.c', 'loessf.f',
|
||||
'dqrsl.f', 'dsvdc.f'],
|
||||
include_dirs = [numpy_include],
|
||||
libraries=['blas', 'm', 'f2c'],
|
||||
)
|
||||
|
||||
setup(name='loess',
|
||||
version='0.1',
|
||||
author=__author__,
|
||||
author_email='matthias.muntwiler@psi.ch',
|
||||
description="""LOESS module in Python""",
|
||||
ext_modules=[loess_module],
|
||||
py_modules=["loess"], requires=['numpy']
|
||||
)
|
||||
def configuration(parent_package='', top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
config = Configuration('loess', parent_package, top_path)
|
||||
lib = ['blas', 'm', 'f2c']
|
||||
src = ['loess.c', 'loessc.c', 'predict.c', 'misc.c', 'loessf.f', 'dqrsl.f', 'dsvdc.f', 'fix_main.c', 'loess.i']
|
||||
inc_dir = [numpy_include]
|
||||
config.add_extension('_loess',
|
||||
sources=src,
|
||||
libraries=lib,
|
||||
include_dirs=inc_dir
|
||||
)
|
||||
return config
|
||||
|
||||
def ignore_sources(self, sources, extension):
|
||||
return sources
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
from numpy.distutils.core import numpy_cmdclass
|
||||
numpy_cmdclass['build_src'].f2py_sources = ignore_sources
|
||||
except ImportError:
|
||||
pass
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
||||
|
||||
|
@ -12,16 +12,17 @@ SHELL=/bin/sh
|
||||
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
|
||||
.PHONY: all clean edac msc mufpot
|
||||
|
||||
FC=gfortran
|
||||
FCCOPTS=
|
||||
F2PY=f2py
|
||||
F2PYOPTS=
|
||||
CC=gcc
|
||||
CCOPTS=
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHONOPTS=
|
||||
FC?=gfortran
|
||||
FCCOPTS?=
|
||||
F2PY?=f2py
|
||||
F2PYOPTS?=
|
||||
CC?=gcc
|
||||
CCOPTS?=
|
||||
SWIG?=swig
|
||||
SWIGOPTS?=
|
||||
PYTHON?=python
|
||||
PYTHONOPTS?=
|
||||
PYTHONINC?=
|
||||
|
||||
all: msc
|
||||
|
||||
|
@ -20,7 +20,7 @@ CC=gcc
|
||||
CCOPTS=
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHON=python2
|
||||
PYTHONOPTS=
|
||||
|
||||
all: mufpot
|
||||
|
0
pmsco/optimizers/__init__.py
Normal file
0
pmsco/optimizers/__init__.py
Normal file
308
pmsco/optimizers/genetic.py
Normal file
308
pmsco/optimizers/genetic.py
Normal file
@ -0,0 +1,308 @@
|
||||
"""
|
||||
@package pmsco.optimizers.genetic
|
||||
genetic optimization algorithm.
|
||||
|
||||
this module implements a genetic algorithm for structural optimization.
|
||||
|
||||
the genetic algorithm is adapted from
|
||||
D. A. Duncan et al., Surface Science 606, 278 (2012)
|
||||
|
||||
the genetic algorithm evolves a population of individuals
|
||||
by a combination of inheritance, crossover and mutation
|
||||
and R-factor based selection.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2018 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import logging
|
||||
import numpy as np
|
||||
import random
|
||||
import pmsco.optimizers.population as population
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GeneticPopulation(population.Population):
|
||||
"""
|
||||
population implementing a genetic optimization algorithm.
|
||||
|
||||
the genetic algorithm implements the following principles:
|
||||
|
||||
1. inheritance: two children of a new generation are generated from the genes (i.e. model parameters)
|
||||
of two parents of the old generation.
|
||||
2. elitism: individuals with similar r-factors are more likely to mate.
|
||||
3. crossover: the genes of the parents are randomly distributed to their children.
|
||||
4. mutation: a gene may mutate at random.
|
||||
5. selection: the globally best individual is added to a parent population (and replaces the worst).
|
||||
|
||||
the main tuning parameter of the algorithm is the mutation_step which is copied from the domain.step.
|
||||
it defines the width of a gaussian distribution of change under a weak mutation.
|
||||
it should be large enough so that the whole parameter space can be probed,
|
||||
but small enough that a frequent mutation does not throw the individual out of the convergence region.
|
||||
typically, the step should be of the order of the parameter range divided by the population size.
|
||||
|
||||
other tunable parameters are the mating_factor, the weak_mutation_probability and the strong_mutation_probability.
|
||||
the defaults should normally be fine.
|
||||
"""
|
||||
|
||||
## @var weak_mutation_probability
|
||||
#
|
||||
# probability (between 0 and 1) that a parameter changes in the mutate_weak() method.
|
||||
#
|
||||
# the default is 1.0, i.e., each parameter mutates in each generation.
|
||||
#
|
||||
# 1.0 has shown better coverage of the continuous parameter space and faster finding of the optimum.
|
||||
|
||||
## @var strong_mutation_probability
|
||||
#
|
||||
# probability (between 0 and 1) that a parameter changes in the mutate_strong() method.
|
||||
#
|
||||
# the default is 0.01, i.e., on average, every hundredth probed parameter is affected by a strong mutation.
|
||||
# if the model contains 10 parameters, for example,
|
||||
# every tenth particle would see a mutation of at least one of its parameters.
|
||||
#
|
||||
# too high value may disturb convergence,
|
||||
# too low value may trap the algorithm in a local optimum.
|
||||
|
||||
## @var mating_factor
|
||||
#
|
||||
# inverse width of the mating preference distribution.
|
||||
#
|
||||
# the greater this value, the more similar partners are mated by the mate_parents() method.
|
||||
#
|
||||
# the default value 4.0 results in a probability of about 0.0025
|
||||
# that the best particle mates the worst.
|
||||
|
||||
## @var position_constrain_mode
|
||||
#
|
||||
# the position constrain mode selects what to do if a particle violates the parameter limits.
|
||||
#
|
||||
# the default is "random" which resets the parameter to a random value.
|
||||
|
||||
## @var mutation_step
|
||||
#
|
||||
# standard deviations of the exponential distribution function used in the mutate_weak() method.
|
||||
# the variable is a dictionary with the same keys as model_step (the parameter domain).
|
||||
#
|
||||
# it is initialized from the domain.step
|
||||
# or set to a default value based on the parameter range and population size.
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
initialize the population object.
|
||||
|
||||
"""
|
||||
super(GeneticPopulation, self).__init__()
|
||||
|
||||
self.weak_mutation_probability = 1.0
|
||||
self.strong_mutation_probability = 0.01
|
||||
self.mating_factor = 4.
|
||||
self.position_constrain_mode = 'random'
|
||||
self.mutation_step = {}
|
||||
|
||||
def setup(self, size, domain, **kwargs):
|
||||
"""
|
||||
@copydoc Population.setup()
|
||||
|
||||
in addition to the inherited behaviour, this method initializes self.mutation_step.
|
||||
mutation_step of a parameter is set to its domain.step if non-zero.
|
||||
otherwise it is set to the parameter range divided by the population size.
|
||||
"""
|
||||
super(GeneticPopulation, self).setup(size, domain, **kwargs)
|
||||
|
||||
for key in self.model_step:
|
||||
val = self.model_step[key]
|
||||
self.mutation_step[key] = val if val != 0 else (self.model_max[key] - self.model_min[key]) / size
|
||||
|
||||
def randomize(self, pos=True, vel=True):
|
||||
"""
|
||||
initializes a "random" population.
|
||||
|
||||
this implementation is a new proposal.
|
||||
the distribution is not completely random.
|
||||
rather, a position vector (by parameter) is initialized with a linear function
|
||||
that covers the parameter domain.
|
||||
the linear function is then permuted randomly.
|
||||
|
||||
the method does not update the particle info fields.
|
||||
|
||||
@param pos: randomize positions. if False, the positions are not changed.
|
||||
@param vel: randomize velocities. if False, the velocities are not changed.
|
||||
"""
|
||||
if pos:
|
||||
for key in self.model_start:
|
||||
self.pos[key] = np.random.permutation(np.linspace(self.model_min[key], self.model_max[key],
|
||||
self.pos.shape[0]))
|
||||
if vel:
|
||||
for key in self.model_start:
|
||||
d = (self.model_max[key] - self.model_min[key]) / 8
|
||||
self.vel[key] = np.random.permutation(np.linspace(-d, d, self.vel.shape[0]))
|
||||
|
||||
def advance_population(self):
|
||||
"""
|
||||
advance the population by one generation.
|
||||
|
||||
the population is advanced in several steps:
|
||||
1. replace the worst individual by the best found so far.
|
||||
2. mate the parents in pairs of two.
|
||||
3. produce children by crossover from the parents.
|
||||
4. apply weak mutations.
|
||||
5. apply strong mutations.
|
||||
|
||||
if generation is lower than zero, the method increases the generation number but does not advance the particles.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if not self._hold_once:
|
||||
self.generation += 1
|
||||
|
||||
pop = self.pos.copy()
|
||||
pop.sort(order='_rfac')
|
||||
elite = self.best.copy()
|
||||
elite.sort(order='_rfac')
|
||||
if elite[0]['_model'] not in pop['_model']:
|
||||
elite[0]['_particle'] = pop[-1]['_particle']
|
||||
pop[-1] = elite[0]
|
||||
pop.sort(order='_rfac')
|
||||
|
||||
parents = self.mate_parents(pop)
|
||||
|
||||
children = []
|
||||
for x, y in parents:
|
||||
a, b = self.crossover(x, y)
|
||||
children.append(a)
|
||||
children.append(b)
|
||||
|
||||
for child in children:
|
||||
index = child['_particle']
|
||||
self.mutate_weak(child, self.weak_mutation_probability)
|
||||
self.mutate_strong(child, self.strong_mutation_probability)
|
||||
self.mutate_duplicate(child)
|
||||
for key in self.model_start:
|
||||
vel = child[key] - self.pos[index][key]
|
||||
child[key], vel, self.model_min[key], self.model_max[key] = \
|
||||
self.constrain_position(child[key], vel, self.model_min[key], self.model_max[key],
|
||||
self.position_constrain_mode)
|
||||
|
||||
self.pos[index] = child
|
||||
self.update_particle_info(index)
|
||||
|
||||
super(GeneticPopulation, self).advance_population()
|
||||
|
||||
def mate_parents(self, positions):
|
||||
"""
|
||||
group the population in pairs of two.
|
||||
|
||||
to mate two individuals, the first individual of the (remaining) population selects one of the following
|
||||
with an exponential preference of earlier ones.
|
||||
the process is repeated until all individuals are mated.
|
||||
|
||||
@param positions: original population (numpy structured array)
|
||||
the population should be ordered with best model first.
|
||||
@return: sequence of pairs (tuples) of structured arrays holding one model each.
|
||||
"""
|
||||
seq = [model for model in positions]
|
||||
parents = []
|
||||
while len(seq) >= 2:
|
||||
p1 = seq.pop(0)
|
||||
ln = len(seq)
|
||||
i = min(int(random.expovariate(self.mating_factor / ln) * ln), ln - 1)
|
||||
p2 = seq.pop(i)
|
||||
parents.append((p1, p2))
|
||||
return parents
|
||||
|
||||
def crossover(self, parent1, parent2):
|
||||
"""
|
||||
crossover two parents to create two children.
|
||||
|
||||
for each model parameter, the parent's value is randomly assigned to either one of the children.
|
||||
|
||||
@param parent1: numpy structured array holding the model of the first parent.
|
||||
@param parent2: numpy structured array holding the model of the second parent.
|
||||
@return: tuple of the two crossed children.
|
||||
these are two new ndarray instances that are independent of their parents.
|
||||
"""
|
||||
child1 = parent1.copy()
|
||||
child2 = parent2.copy()
|
||||
for key in self.model_start:
|
||||
if random.random() >= 0.5:
|
||||
child1[key], child2[key] = parent2[key], parent1[key]
|
||||
return child1, child2
|
||||
|
||||
def mutate_weak(self, model, probability):
|
||||
"""
|
||||
apply a weak mutation to a model.
|
||||
|
||||
each parameter is changed to a different value in the domain of the parameter at the given probability.
|
||||
the amount of change has a gaussian distribution with a standard deviation of mutation_step.
|
||||
|
||||
@param[in,out] model: structured numpy.ndarray holding the model parameters.
|
||||
model is modified in place.
|
||||
|
||||
@param probability: probability between 0 and 1 at which to change a parameter.
|
||||
0 = no change, 1 = force change.
|
||||
|
||||
@return: model (same instance as the @c model input argument).
|
||||
"""
|
||||
for key in self.model_start:
|
||||
if random.random() < probability:
|
||||
model[key] += random.gauss(0, self.mutation_step[key])
|
||||
return model
|
||||
|
||||
def mutate_strong(self, model, probability):
|
||||
"""
|
||||
apply a strong mutation to a model.
|
||||
|
||||
each parameter is changed to a random value in the domain of the parameter at the given probability.
|
||||
|
||||
@param[in,out] model: structured numpy.ndarray holding the model parameters.
|
||||
model is modified in place.
|
||||
|
||||
@param probability: probability between 0 and 1 at which to change a parameter.
|
||||
0 = no change, 1 = force change.
|
||||
|
||||
@return: model (same instance as the @c model input argument).
|
||||
"""
|
||||
for key in self.model_start:
|
||||
if random.random() < probability:
|
||||
model[key] = (self.model_max[key] - self.model_min[key]) * random.random() + self.model_min[key]
|
||||
return model
|
||||
|
||||
def mutate_duplicate(self, model):
|
||||
"""
|
||||
mutate a model if it is identical to a previously calculated one.
|
||||
|
||||
if the model was calculated before, the mutate_weak mutation is applied with probability 1.
|
||||
|
||||
@param[in,out] model: structured numpy.ndarray holding the model parameters.
|
||||
model is modified in place.
|
||||
|
||||
@return: model (same instance as the @c model input argument).
|
||||
"""
|
||||
try:
|
||||
self.find_model(model)
|
||||
self.mutate_weak(model, 1.0)
|
||||
except ValueError:
|
||||
pass
|
||||
return model
|
||||
|
||||
|
||||
class GeneticOptimizationHandler(population.PopulationHandler):
|
||||
"""
|
||||
model handler which implements a genetic algorithm.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(GeneticOptimizationHandler, self).__init__()
|
||||
self._pop = GeneticPopulation()
|
@ -13,14 +13,19 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
import copy
|
||||
import os
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime
|
||||
import math
|
||||
import numpy as np
|
||||
import logging
|
||||
import handlers
|
||||
from helpers import BraceMessage as BMsg
|
||||
|
||||
from pmsco.compat import open
|
||||
import pmsco.handlers as handlers
|
||||
import pmsco.graphics as graphics
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -114,16 +119,14 @@ class GridPopulation(object):
|
||||
|
||||
@param domain: definition of initial and limiting model parameters
|
||||
expected by the cluster and parameters functions.
|
||||
the attributes have the following meanings:
|
||||
@arg start: values of the fixed parameters.
|
||||
@arg min: minimum values allowed.
|
||||
@arg max: maximum values allowed.
|
||||
if abs(max - min) < step/2 , the parameter is kept constant.
|
||||
@arg step: step size (distance between two grid points).
|
||||
if step <= 0, the parameter is kept constant.
|
||||
|
||||
@param domain.start: values of the fixed parameters.
|
||||
|
||||
@param domain.min: minimum values allowed.
|
||||
|
||||
@param domain.max: maximum values allowed.
|
||||
if abs(max - min) < step/2 , the parameter is kept constant.
|
||||
|
||||
@param domain.step: step size (distance between two grid points).
|
||||
if step <= 0, the parameter is kept constant.
|
||||
"""
|
||||
self.model_start = domain.start
|
||||
self.model_min = domain.min
|
||||
@ -347,7 +350,7 @@ class GridSearchHandler(handlers.ModelHandler):
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
pos = self._pop.positions[model]
|
||||
new_task.model = {k:pos[k] for k in pos.dtype.names}
|
||||
new_task.model = {k: pos[k] for k in pos.dtype.names}
|
||||
new_task.change_id(model=model)
|
||||
|
||||
child_id = new_task.id
|
||||
@ -374,17 +377,10 @@ class GridSearchHandler(handlers.ModelHandler):
|
||||
del self._pending_tasks[task.id]
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
rfac = 1.0
|
||||
if task.result_valid:
|
||||
try:
|
||||
rfac = self._project.calc_rfactor(task)
|
||||
except ValueError:
|
||||
task.result_valid = False
|
||||
self._invalid_count += 1
|
||||
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
||||
|
||||
task.model['_rfac'] = rfac
|
||||
self._pop.add_result(task.model, rfac)
|
||||
assert not math.isnan(task.rfac)
|
||||
task.model['_rfac'] = task.rfac
|
||||
self._pop.add_result(task.model, task.rfac)
|
||||
|
||||
if self._outfile:
|
||||
s = (str(task.model[name]) for name in self._pop.positions.dtype.names)
|
||||
@ -392,12 +388,14 @@ class GridSearchHandler(handlers.ModelHandler):
|
||||
self._outfile.write("\n")
|
||||
self._outfile.flush()
|
||||
|
||||
self._project.files.update_model_rfac(task.id.model, rfac)
|
||||
self._project.files.update_model_rfac(task.id.model, task.rfac)
|
||||
self._project.files.set_model_complete(task.id.model, True)
|
||||
|
||||
if task.result_valid:
|
||||
if task.time > self._model_time:
|
||||
self._model_time = task.time
|
||||
else:
|
||||
self._invalid_count += 1
|
||||
|
||||
# grid search complete?
|
||||
if len(self._pending_tasks) == 0:
|
||||
@ -407,3 +405,17 @@ class GridSearchHandler(handlers.ModelHandler):
|
||||
|
||||
self.cleanup_files()
|
||||
return parent_task
|
||||
|
||||
def save_report(self, root_task):
|
||||
"""
|
||||
generate a graphical summary of the optimization.
|
||||
|
||||
@param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
super(GridSearchHandler, self).save_report(root_task)
|
||||
|
||||
files = graphics.rfactor.render_results(self._project.output_file + ".dat", self._pop.positions)
|
||||
for f in files:
|
||||
self._project.files.add_file(f, root_task.id.model, "report")
|
1358
pmsco/optimizers/population.py
Normal file
1358
pmsco/optimizers/population.py
Normal file
File diff suppressed because it is too large
Load Diff
139
pmsco/optimizers/swarm.py
Normal file
139
pmsco/optimizers/swarm.py
Normal file
@ -0,0 +1,139 @@
|
||||
"""
|
||||
@package pmsco.optimizers.swarm
|
||||
particle swarm optimization handler.
|
||||
|
||||
the module starts multiple MSC calculations and optimizes the model parameters
|
||||
according to the particle swarm optimization algorithm.
|
||||
|
||||
Particle swarm optimization adapted from
|
||||
D. A. Duncan et al., Surface Science 606, 278 (2012)
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import logging
|
||||
import numpy as np
|
||||
import pmsco.optimizers.population as population
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SwarmPopulation(population.Population):
|
||||
"""
|
||||
particle swarm population.
|
||||
"""
|
||||
|
||||
## @var friends
|
||||
# number of other particles that each particle consults for the global best fit.
|
||||
# default = 3.
|
||||
|
||||
## @var momentum
|
||||
# momentum of the particle.
|
||||
# default = 0.689343.
|
||||
|
||||
## @var attract_local
|
||||
# preference for returning to the local best fit
|
||||
# default = 1.92694.
|
||||
|
||||
## @var attract_global
|
||||
# preference for heading towards the global best fit.
|
||||
# default = 1.92694
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
initialize the population object.
|
||||
|
||||
"""
|
||||
super(SwarmPopulation, self).__init__()
|
||||
|
||||
self.friends = 3
|
||||
self.momentum = 0.689343
|
||||
self.attract_local = 1.92694
|
||||
self.attract_global = 1.92694
|
||||
self.position_constrain_mode = 'default'
|
||||
self.velocity_constrain_mode = 'default'
|
||||
|
||||
def advance_population(self):
|
||||
"""
|
||||
advance the population by one step.
|
||||
|
||||
this method just calls advance_particle() for each particle of the population.
|
||||
if generation is lower than zero, the method increases the generation number but does not advance the particles.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if not self._hold_once:
|
||||
self.generation += 1
|
||||
for index, __ in enumerate(self.pos):
|
||||
self.advance_particle(index)
|
||||
|
||||
super(SwarmPopulation, self).advance_population()
|
||||
|
||||
def advance_particle(self, index):
|
||||
"""
|
||||
advance a particle by one step.
|
||||
|
||||
@param index: index of the particle in the population.
|
||||
"""
|
||||
|
||||
# note: the following two identifiers are views,
|
||||
# assignment will modify the original array
|
||||
pos = self.pos[index]
|
||||
vel = self.vel[index]
|
||||
# best fit that this individual has seen
|
||||
xl = self.best[index]
|
||||
# best fit that a group of others have seen
|
||||
xg = self.best_friend(index)
|
||||
|
||||
for key in self.model_start:
|
||||
# update velocity
|
||||
dxl = xl[key] - pos[key]
|
||||
dxg = xg[key] - pos[key]
|
||||
pv = np.random.random()
|
||||
pl = np.random.random()
|
||||
pg = np.random.random()
|
||||
vel[key] = (self.momentum * pv * vel[key] +
|
||||
self.attract_local * pl * dxl +
|
||||
self.attract_global * pg * dxg)
|
||||
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
||||
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
||||
self.velocity_constrain_mode)
|
||||
# update position
|
||||
pos[key] += vel[key]
|
||||
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
||||
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
||||
self.position_constrain_mode)
|
||||
|
||||
self.update_particle_info(index)
|
||||
|
||||
# noinspection PyUnusedLocal
|
||||
def best_friend(self, index):
|
||||
"""
|
||||
select the best fit out of a random set of particles
|
||||
|
||||
returns the "best friend"
|
||||
"""
|
||||
friends = np.random.choice(self.best, self.friends, replace=False)
|
||||
index = np.argmin(friends['_rfac'])
|
||||
return friends[index]
|
||||
|
||||
|
||||
class ParticleSwarmHandler(population.PopulationHandler):
|
||||
"""
|
||||
model handler which implements the particle swarm optimization algorithm.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(ParticleSwarmHandler, self).__init__()
|
||||
self._pop = SwarmPopulation()
|
155
pmsco/optimizers/table.py
Normal file
155
pmsco/optimizers/table.py
Normal file
@ -0,0 +1,155 @@
|
||||
"""
|
||||
@package pmsco.table
|
||||
table scan optimization handler
|
||||
|
||||
the table scan scans through an explicit table of model parameters.
|
||||
it can be used to recalculate models from a previous optimization run on different scans,
|
||||
or as an interface to external optimizers.
|
||||
new elements can be added to the table while the calculation loop is in progress.
|
||||
|
||||
though the concepts _population_ and _optimization_ are not intrinsic to a table scan,
|
||||
the classes defined here inherit from the generic population class and optimization handler.
|
||||
this is done to share as much code as possible between the different optimizers.
|
||||
the only difference is that the table optimizer does not generate models internally.
|
||||
instead, it loads them (possibly repeatedly) from a file or asks the project code to provide the data.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import logging
|
||||
import numpy as np
|
||||
import pmsco.optimizers.population as population
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TablePopulation(population.Population):
|
||||
"""
|
||||
population generated from explicit values.
|
||||
|
||||
this class maintains a population that is updated from a table of explicit values.
|
||||
the table can be static (defined at the start of the optimization process)
|
||||
or dynamic (new models appended during the optimization process).
|
||||
|
||||
for each generation, the table is read and the next models are imported into the population.
|
||||
the class de-duplicates the table, i.e. models with equal parameters as a previous one are not calculated again.
|
||||
it is, thus, perfectly fine that new models are appended to the table rather than overwrite previous entries.
|
||||
|
||||
the table can be built from the following data sources:
|
||||
|
||||
@arg (numpy.ndarray): structured array that can be added to self.positions,
|
||||
having at least the columns defining the model parameters.
|
||||
@arg (sequence of dict, numpy.ndarray, numpy.void, named tuple):
|
||||
each element must be syntactically compatible with a dict
|
||||
that holds the model parameters.
|
||||
@arg (str): file name that contains a table in the same format as
|
||||
@ref pmsco.optimizers.population.Population.save_array produces.
|
||||
@arg (callable): a function that returns one of the above objects
|
||||
(or None to mark the end of the table).
|
||||
|
||||
the data source is passed as an argument to the self.setup() method.
|
||||
structured arrays and sequences cannot be modified after they are passed to `setup`.
|
||||
this means that the complete table must be known at the start of the process.
|
||||
|
||||
the most flexible way is to pass a function that generates a structured array in each call.
|
||||
this would even allow to include a non-standard optimization algorithm.
|
||||
the function is best defined in the custom project class.
|
||||
the population calls it every time before a new generation starts.
|
||||
to end the optimization process, it simply returns None.
|
||||
|
||||
the table can also be defined in an external file, e.g. as calculated by other programs or edited manually.
|
||||
the table file can either remain unchanged during the optimization process,
|
||||
or new models can be added while the optimization is in progress.
|
||||
in the latter case, note that there is no reliable synchronization of file access.
|
||||
|
||||
first, writing to the file must be as short as possible.
|
||||
the population class has a read timeout of ten seconds.
|
||||
|
||||
second, because it is impossible to know whether the file has been read or not,
|
||||
new models should be _appended_ rather than _overwrite_ previous ones.
|
||||
the population class automatically skips models that have already been read.
|
||||
|
||||
this class supports does not support seeding.
|
||||
although, a seed file is accepted, it is not used.
|
||||
patching is allowed, but there is normally no advantage over modifying the table.
|
||||
|
||||
the domain is used to define the model parameters and the parameter range.
|
||||
models violating the parameter domain are ignored.
|
||||
"""
|
||||
|
||||
## @var table_source
|
||||
# data source of the model table
|
||||
#
|
||||
# this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
|
||||
# e.g. a file name, a numpy structured array, or a function returning a structured array.
|
||||
# see the class description for details.
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
initialize the population object.
|
||||
|
||||
"""
|
||||
super(TablePopulation, self).__init__()
|
||||
self.table_source = None
|
||||
self.position_constrain_mode = 'error'
|
||||
|
||||
def setup(self, size, domain, **kwargs):
|
||||
"""
|
||||
set up the population arrays, parameter domain and data source.
|
||||
|
||||
@param size: requested number of particles.
|
||||
this does not need to correspond to the number of table entries.
|
||||
on each generation the population loads up to this number of new entries from the table source.
|
||||
|
||||
@param domain: definition of initial and limiting model parameters
|
||||
expected by the cluster and parameters functions.
|
||||
@arg domain.start: not used.
|
||||
@arg domain.min: minimum values allowed.
|
||||
@arg domain.max: maximum values allowed.
|
||||
@arg domain.step: not used.
|
||||
|
||||
the following arguments are keyword arguments.
|
||||
the method also accepts the inherited arguments for seeding. they do not have an effect, however.
|
||||
|
||||
@param table_source: data source of the model table.
|
||||
this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
|
||||
e.g. a file name, a numpy structured array, or a function returning a structured array.
|
||||
see the class description for details.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
super(TablePopulation, self).setup(size, domain, **kwargs)
|
||||
self.table_source = kwargs['table_source']
|
||||
|
||||
def advance_population(self):
|
||||
"""
|
||||
advance the population by one step.
|
||||
|
||||
this methods re-imports the table file
|
||||
and copies the table to current population.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.import_positions(self.table_source)
|
||||
self.advance_from_import()
|
||||
super(TablePopulation, self).advance_population()
|
||||
|
||||
|
||||
class TableModelHandler(population.PopulationHandler):
|
||||
"""
|
||||
model handler which implements the table algorithm.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
super(TableModelHandler, self).__init__()
|
||||
self._pop = TablePopulation()
|
193
pmsco/pmsco.py
193
pmsco/pmsco.py
@ -4,13 +4,9 @@
|
||||
@package pmsco.pmsco
|
||||
PEARL Multiple-Scattering Calculation and Structural Optimization
|
||||
|
||||
this is the main entry point and top-level interface of the PMSCO package.
|
||||
all calculations (any mode, any project) start by calling the main_pmsco() function of this module.
|
||||
the module also provides a command line parser.
|
||||
|
||||
command line usage: call with -h option to see the list of arguments.
|
||||
|
||||
python usage: call main_pmsco() with suitable arguments.
|
||||
this is the top-level interface of the PMSCO package.
|
||||
all calculations (any mode, any project) start by calling the run_project() function of this module.
|
||||
the module also provides a command line parser for common options.
|
||||
|
||||
for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
|
||||
note that in parallel mode, one process takes the role of the coordinator (master).
|
||||
@ -24,48 +20,39 @@ PMSCO serializes the calculations automatically.
|
||||
the code of the main module is independent of a particular calculation project.
|
||||
all project-specific code must be in a separate python module.
|
||||
the project module must implement a class derived from pmsco.project.Project,
|
||||
and a global function create_project which returns a new instance of the derived project class.
|
||||
and call run_project() with an instance of the project class.
|
||||
refer to the projects folder for examples.
|
||||
|
||||
@pre
|
||||
* python 2.7, including python-pip
|
||||
* numpy
|
||||
* nose from Debian python-nose
|
||||
* statsmodels from Debian python-statsmodels, or PyPI (https://pypi.python.org/pypi/statsmodels)
|
||||
* periodictable from PyPI (https://pypi.python.org/pypi/periodictable)
|
||||
* mpi4py from PyPI (the Debian package may have a bug causing the program to crash)
|
||||
* OpenMPI, including libopenmpi-dev
|
||||
* SWIG from Debian swig
|
||||
|
||||
to install a PyPI package, e.g. periodictable, do
|
||||
@code{.sh}
|
||||
pip install --user periodictable
|
||||
@endcode
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
from builtins import range
|
||||
import datetime
|
||||
import logging
|
||||
import importlib
|
||||
import os.path
|
||||
import sys
|
||||
import datetime
|
||||
import argparse
|
||||
import logging
|
||||
import cluster
|
||||
import dispatch
|
||||
import handlers
|
||||
import files
|
||||
import calculator
|
||||
import swarm
|
||||
import grid
|
||||
# import gradient
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
import pmsco.calculators.calculator as calculator
|
||||
import pmsco.cluster as cluster
|
||||
import pmsco.dispatch as dispatch
|
||||
import pmsco.files as files
|
||||
import pmsco.handlers as handlers
|
||||
from pmsco.optimizers import genetic, swarm, grid, table
|
||||
|
||||
# the module-level logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -152,13 +139,20 @@ def set_common_args(project, args):
|
||||
|
||||
logger.debug("creating project")
|
||||
mode = args.mode.lower()
|
||||
if mode in {'single', 'grid', 'swarm'}:
|
||||
if mode in {'single', 'grid', 'swarm', 'genetic', 'table'}:
|
||||
project.mode = mode
|
||||
else:
|
||||
logger.error("invalid optimization mode '%s'.", mode)
|
||||
|
||||
if args.pop_size:
|
||||
project.pop_size = args.pop_size
|
||||
project.optimizer_params['pop_size'] = args.pop_size
|
||||
|
||||
if args.seed_file:
|
||||
project.optimizer_params['seed_file'] = args.seed_file
|
||||
if args.seed_limit:
|
||||
project.optimizer_params['seed_limit'] = args.seed_limit
|
||||
if args.table_file:
|
||||
project.optimizer_params['table_file'] = args.table_file
|
||||
|
||||
code = args.code.lower()
|
||||
if code in {'edac', 'msc', 'test'}:
|
||||
@ -178,6 +172,10 @@ def set_common_args(project, args):
|
||||
if mode == 'single':
|
||||
cats -= {'model'}
|
||||
project.files.categories_to_delete = cats
|
||||
if args.keep_levels > project.keep_levels:
|
||||
project.keep_levels = args.keep_levels
|
||||
if args.keep_best > project.keep_best:
|
||||
project.keep_best = args.keep_best
|
||||
|
||||
|
||||
def log_project_args(project):
|
||||
@ -190,8 +188,19 @@ def log_project_args(project):
|
||||
try:
|
||||
logger.info("scattering code: {0}".format(project.code))
|
||||
logger.info("optimization mode: {0}".format(project.mode))
|
||||
logger.info("minimum swarm size: {0}".format(project.pop_size))
|
||||
|
||||
try:
|
||||
logger.info("minimum population size: {0}".format(project.optimizer_params['pop_size']))
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
logger.info("seed file: {0}".format(project.optimizer_params['seed_file']))
|
||||
logger.info("seed limit: {0}".format(project.optimizer_params['seed_limit']))
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
logger.info("table file: {0}".format(project.optimizer_params['table_file']))
|
||||
except KeyError:
|
||||
pass
|
||||
logger.info("data directory: {0}".format(project.data_dir))
|
||||
logger.info("output file: {0}".format(project.output_file))
|
||||
|
||||
@ -217,10 +226,14 @@ def run_project(project):
|
||||
optimizer_class = grid.GridSearchHandler
|
||||
elif project.mode == 'swarm':
|
||||
optimizer_class = swarm.ParticleSwarmHandler
|
||||
elif project.mode == 'genetic':
|
||||
optimizer_class = genetic.GeneticOptimizationHandler
|
||||
elif project.mode == 'gradient':
|
||||
logger.error("gradient search not implemented")
|
||||
# TODO: implement gradient search
|
||||
# optimizer_class = gradient.GradientSearchHandler
|
||||
elif project.mode == 'table':
|
||||
optimizer_class = table.TableModelHandler
|
||||
else:
|
||||
logger.error("invalid optimization mode '%s'.", project.mode)
|
||||
project.handler_classes['model'] = optimizer_class
|
||||
@ -230,14 +243,14 @@ def run_project(project):
|
||||
calculator_class = None
|
||||
if project.code == 'edac':
|
||||
logger.debug("importing EDAC interface")
|
||||
import edac_calculator
|
||||
from pmsco.calculators import edac
|
||||
project.cluster_format = cluster.FMT_EDAC
|
||||
calculator_class = edac_calculator.EdacCalculator
|
||||
calculator_class = edac.EdacCalculator
|
||||
elif project.code == 'msc':
|
||||
logger.debug("importing MSC interface")
|
||||
import msc_calculator
|
||||
from pmsco.calculators import msc
|
||||
project.cluster_format = cluster.FMT_MSC
|
||||
calculator_class = msc_calculator.MscCalculator
|
||||
calculator_class = msc.MscCalculator
|
||||
elif project.code == 'test':
|
||||
logger.debug("importing TEST interface")
|
||||
project.cluster_format = cluster.FMT_EDAC
|
||||
@ -273,7 +286,7 @@ class Args(object):
|
||||
values as the command line parser.
|
||||
"""
|
||||
|
||||
def __init__(self, mode="single", code="edac", output_file=""):
|
||||
def __init__(self, mode="single", code="edac", output_file="pmsco_data"):
|
||||
"""
|
||||
constructor.
|
||||
|
||||
@ -284,14 +297,19 @@ class Args(object):
|
||||
"""
|
||||
self.mode = mode
|
||||
self.pop_size = 0
|
||||
self.seed_file = ""
|
||||
self.seed_limit = 0
|
||||
self.code = code
|
||||
self.data_dir = os.getcwd()
|
||||
self.output_file = output_file
|
||||
self.time_limit = 24.0
|
||||
self.keep_files = []
|
||||
self.keep_files = files.FILE_CATEGORIES_TO_KEEP
|
||||
self.keep_best = 10
|
||||
self.keep_levels = 1
|
||||
self.log_level = "WARNING"
|
||||
self.log_file = ""
|
||||
self.log_enable = True
|
||||
self.table_file = ""
|
||||
|
||||
|
||||
def get_cli_parser(default_args=None):
|
||||
@ -324,26 +342,47 @@ def get_cli_parser(default_args=None):
|
||||
# for simplicity, the parser does not check these requirements.
|
||||
# all parameters are optional and accepted regardless of mode.
|
||||
# errors may occur if implicit requirements are not met.
|
||||
parser.add_argument('-m', '--mode', default='single',
|
||||
choices=['single', 'grid', 'swarm', 'gradient'],
|
||||
parser.add_argument('project_module',
|
||||
help="path to custom module that defines the calculation project")
|
||||
parser.add_argument('-m', '--mode', default=default_args.mode,
|
||||
choices=['single', 'grid', 'swarm', 'genetic', 'table'],
|
||||
help='calculation mode')
|
||||
parser.add_argument('--pop-size', type=int, default=0,
|
||||
help='population size (number of particles) in swarm optimization mode. ' +
|
||||
parser.add_argument('--pop-size', type=int, default=default_args.pop_size,
|
||||
help='population size (number of particles) in swarm or genetic optimization mode. ' +
|
||||
'default is the greater of 4 or two times the number of calculation processes.')
|
||||
parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default="edac",
|
||||
parser.add_argument('--seed-file',
|
||||
help='path and name of population seed file. ' +
|
||||
'population data of previous optimizations can be used to seed a new optimization. ' +
|
||||
'the file must have the same structure as the .pop or .dat files.')
|
||||
parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit,
|
||||
help='maximum number of models to use from the seed file. ' +
|
||||
'the models with the best R-factors are selected.')
|
||||
parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default=default_args.code,
|
||||
help='scattering code (default: edac)')
|
||||
parser.add_argument('-d', '--data-dir', default=os.getcwd(),
|
||||
parser.add_argument('-d', '--data-dir', default=default_args.data_dir,
|
||||
help='directory path for experimental data files (if required by project). ' +
|
||||
'default: working directory')
|
||||
parser.add_argument('-o', '--output-file',
|
||||
parser.add_argument('-o', '--output-file', default=default_args.output_file,
|
||||
help='base path for intermediate and output files.' +
|
||||
'default: pmsco_data')
|
||||
parser.add_argument('-k', '--keep-files', nargs='*', default=files.FILE_CATEGORIES_TO_KEEP,
|
||||
'default: pmsco_data')
|
||||
parser.add_argument('--table-file',
|
||||
help='path and name of population table file for table optimization mode. ' +
|
||||
'the file must have the same structure as the .pop or .dat files.')
|
||||
parser.add_argument('-k', '--keep-files', nargs='*', default=default_args.keep_files,
|
||||
choices=KEEP_FILES_CHOICES,
|
||||
help='output file categories to keep after the calculation. '
|
||||
'by default, cluster and model (simulated data) '
|
||||
'of a limited number of best models are kept.')
|
||||
parser.add_argument('-t', '--time-limit', type=float, default=24.0,
|
||||
parser.add_argument('--keep-best', type=int, default=default_args.keep_best,
|
||||
help='number of best models for which to keep result files '
|
||||
'(at each node from root down to keep-levels). '
|
||||
'default 10 (project can define higher default).')
|
||||
parser.add_argument('--keep-levels', type=int, choices=range(5),
|
||||
default=default_args.keep_levels,
|
||||
help='task level down to which result files of best models are kept. '
|
||||
'0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region. '
|
||||
'default 1 (project can define higher default).')
|
||||
parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit,
|
||||
help='wall time limit in hours. the optimizers try to finish before the limit. default: 24.')
|
||||
parser.add_argument('--log-file', default=default_args.log_file,
|
||||
help='name of the main log file. ' +
|
||||
@ -375,7 +414,47 @@ def parse_cli():
|
||||
return args, unknown_args
|
||||
|
||||
|
||||
def import_project_module(path):
|
||||
"""
|
||||
import the custom project module.
|
||||
|
||||
imports the project module given its file path.
|
||||
the path is expanded to its absolute form and appended to the python path.
|
||||
|
||||
@param path: path and name of the module to be loaded.
|
||||
path is optional and defaults to the python path.
|
||||
if the name includes an extension, it is stripped off.
|
||||
|
||||
@return: the loaded module as a python object
|
||||
"""
|
||||
path, name = os.path.split(path)
|
||||
name, __ = os.path.splitext(name)
|
||||
path = os.path.abspath(path)
|
||||
sys.path.append(path)
|
||||
project_module = importlib.import_module(name)
|
||||
return project_module
|
||||
|
||||
|
||||
def main():
|
||||
args, unknown_args = parse_cli()
|
||||
|
||||
if args:
|
||||
module = import_project_module(args.project_module)
|
||||
try:
|
||||
project_args = module.parse_project_args(unknown_args)
|
||||
except NameError:
|
||||
project_args = None
|
||||
|
||||
project = module.create_project()
|
||||
set_common_args(project, args)
|
||||
try:
|
||||
module.set_project_args(project, project_args)
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
run_project(project)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main_parser = get_cli_parser()
|
||||
main_parser.print_help()
|
||||
main()
|
||||
sys.exit(0)
|
||||
|
771
pmsco/project.py
771
pmsco/project.py
@ -26,16 +26,27 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import datetime
|
||||
import logging
|
||||
import numpy as np
|
||||
import collections
|
||||
import data as md
|
||||
import cluster as mc
|
||||
import files
|
||||
import handlers
|
||||
import os.path
|
||||
import socket
|
||||
import sys
|
||||
|
||||
import pmsco.cluster as mc
|
||||
from pmsco.compat import open
|
||||
import pmsco.data as md
|
||||
import pmsco.database as database
|
||||
import pmsco.dispatch as dispatch
|
||||
import pmsco.files as files
|
||||
import pmsco.handlers as handlers
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -95,7 +106,7 @@ class Domain(object):
|
||||
self.max = {}
|
||||
self.step = {}
|
||||
|
||||
def add_param(self, name, start, min, max, step):
|
||||
def add_param(self, name, start, min=None, max=None, step=None, width=None):
|
||||
"""
|
||||
set the domain of one parameter with all necessary values at once.
|
||||
|
||||
@ -107,15 +118,29 @@ class Domain(object):
|
||||
@param start (float) start value.
|
||||
|
||||
@param min (float) lower bound of the parameter interval.
|
||||
must be lower or equal to start.
|
||||
if None, the field is set to start.
|
||||
|
||||
@param max (float) upper bound of the parameter interval.
|
||||
must be greater or equal to start.
|
||||
if None, the field is set to start.
|
||||
|
||||
@param width (float) width of the parameter interval.
|
||||
instead of min and max, the interval can be set centered around the start value.
|
||||
this is equivalent to min = start - width/2, max = start + width/2.
|
||||
this argument overrides min and max. don't use both arguments.
|
||||
|
||||
@param step (float) step size.
|
||||
must be greater or equal to zero.
|
||||
if None, the field is set to zero.
|
||||
"""
|
||||
self.start[name] = start
|
||||
self.min[name] = min
|
||||
self.max[name] = max
|
||||
self.step[name] = step
|
||||
self.min[name] = min if min is not None else start
|
||||
self.max[name] = max if max is not None else start
|
||||
if width is not None:
|
||||
self.min[name] = start - width / 2.
|
||||
self.max[name] = start + width / 2.
|
||||
self.step[name] = step if step is not None else 0.0
|
||||
|
||||
def get_param(self, name):
|
||||
"""
|
||||
@ -144,9 +169,27 @@ class Params(object):
|
||||
objects of this class are created by the implementation of the create_params() method
|
||||
of the actual project class.
|
||||
"""
|
||||
|
||||
## @var angular_resolution (float)
|
||||
# FWHM angular resolution of the detector.
|
||||
#
|
||||
# maps to:
|
||||
# @arg emission angle window (EDAC)
|
||||
# @arg angular_broadening (MSC)
|
||||
|
||||
## @var phase_files (dict)
|
||||
# dictionary of phase files.
|
||||
#
|
||||
# the keys are atomic numbers, the values file names.
|
||||
# if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
|
||||
#
|
||||
# maps to:
|
||||
# @arg scatterer (EDAC)
|
||||
# @arg atomic_number, phase_file (MSC)
|
||||
|
||||
def __init__(self):
|
||||
self.title = "MSC default parameters"
|
||||
self.comment = "from msc_project.Params()"
|
||||
self.title = "default parameters"
|
||||
self.comment = "set by project.Params()"
|
||||
self.cluster_file = ""
|
||||
self.output_file = ""
|
||||
self.scan_file = ""
|
||||
@ -154,7 +197,7 @@ class Params(object):
|
||||
self.initial_state = "1s"
|
||||
# MSC convention: H, V, L, R, U
|
||||
self.polarization = "H"
|
||||
self.angular_broadening = 0.0
|
||||
self.angular_resolution = 1.0
|
||||
self.z_surface = 0.0
|
||||
self.inner_potential = 10.0
|
||||
# the energy scale of EDAC is referenced to the vacuum level
|
||||
@ -167,16 +210,14 @@ class Params(object):
|
||||
self.experiment_temperature = 300.0
|
||||
self.debye_temperature = 400.0
|
||||
self.debye_wavevector = 1.0
|
||||
self.phase_files = {}
|
||||
# used by MSC only
|
||||
self.spherical_order = 2
|
||||
self.scattering_level = 5
|
||||
self.fcut = 15.0
|
||||
self.cut = 15.0
|
||||
self.lattice_constant = 1.0
|
||||
self.atom_types = 0
|
||||
self.atomic_number = [1, 2, 3, 4]
|
||||
self.phase_file = ["1.pha", "2.pha", "3.pha", "4.pha"]
|
||||
self.msq_displacement = [0.1, 0.1, 0.1, 0.1]
|
||||
self.msq_displacement = {}
|
||||
self.planewave_attenuation = 1.0
|
||||
self.vibration_model = "N"
|
||||
self.substrate_atomic_mass = 1.0
|
||||
@ -216,9 +257,12 @@ class Scan(object):
|
||||
# example: ['t','p']
|
||||
|
||||
## @var emitter (string)
|
||||
# chemical symbol of emitter atom
|
||||
# chemical symbol and, optionally following, further specification (chemical state, environment, ...)
|
||||
# of photo-emitting atoms.
|
||||
# the interpretation of this string is up to the project and its cluster generator.
|
||||
# it should, however, always start with a chemical element symbol.
|
||||
#
|
||||
# example: 'Cu'
|
||||
# examples: 'Ca' (calcium), 'CA' (carbon A), 'C a' (carbon a), 'C 1' (carbon one), 'N=O', 'FeIII'.
|
||||
|
||||
## @var initial_state (string)
|
||||
# nl term of initial state
|
||||
@ -342,142 +386,6 @@ class Scan(object):
|
||||
self.alphas = np.zeros((1))
|
||||
|
||||
|
||||
class ClusterGenerator(object):
|
||||
"""
|
||||
cluster generator class.
|
||||
|
||||
this class bundles the cluster methods in one place
|
||||
so that it's easier to exchange them for different kinds of clusters.
|
||||
|
||||
the project must override at least the create_cluster method.
|
||||
if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
"""
|
||||
initialize the cluster generator.
|
||||
|
||||
@param project: reference to the project object.
|
||||
cluster generators may need to look up project parameters.
|
||||
"""
|
||||
self.project = project
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
return the number of emitter configurations for a particular model.
|
||||
|
||||
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
|
||||
by default, the method returns 1, which means that there is only one emitter configuration.
|
||||
|
||||
emitter configurations are mainly a way to distribute the calculations to multiple processes
|
||||
based on emitters since the resulting diffraction patterns add up incoherently.
|
||||
for this to work, the create_cluster() method must pay attention to the emitter index
|
||||
and generate either a full cluster with all emitters (single process)
|
||||
or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
|
||||
whether all emitters are calculated in one or multiple processes is decided at run-time
|
||||
based on the available resources.
|
||||
|
||||
note that this function returns the number of _configurations_ not _atoms_.
|
||||
an emitter configuration (declared in a Cluster) may include more than one atom.
|
||||
it is up to the project, what is included in a particular configuration.
|
||||
|
||||
to enable multiple emitter configurations, the derived project class must override this method
|
||||
and return a number greater than 1.
|
||||
|
||||
@note in some cases it may be most efficient to call create_cluster and
|
||||
return Cluster.get_emitter_count() of the generated cluster.
|
||||
this is possible because the method is called with emitter index -1.
|
||||
model and index can be passed unchanged to create_cluster.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index is -1 if called by the emitter handler.
|
||||
|
||||
@return number of emitter configurations.
|
||||
this implementation returns the default value of 1.
|
||||
"""
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
create a Cluster object given the model parameters and calculation index.
|
||||
|
||||
the generated cluster will typically depend on the model parameters.
|
||||
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
|
||||
|
||||
the scan index can be used to generate a different cluster for different scan geometry,
|
||||
e.g., if some atoms can be excluded due to a longer mean free path.
|
||||
if this is not the case for the specific project, the scan index can be ignored.
|
||||
|
||||
the symmetry index may select a particular domain that has a different atomic arrangement.
|
||||
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
|
||||
to the particular domain/symmetry.
|
||||
the method can ignore the symmetry index if the project defines only one symmetry,
|
||||
or if the symmetry does not correspond to a different atomic structure.
|
||||
|
||||
the emitter index selects a particular emitter configuration.
|
||||
depending on the value of the emitter index, the method must react differently:
|
||||
|
||||
1. if the value lower or equal to zero, return the full cluster and mark all inequivalent emitter atoms.
|
||||
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
|
||||
the full diffraction scan will be calculated in one calculation.
|
||||
|
||||
2. if the value is greater than zero, generate the cluster with the emitter configuration
|
||||
selected by the emitter index.
|
||||
the index is in the range between 1 and the return value of count_emitters().
|
||||
the results of the individual emitter calculations are summed up in combine_emitters().
|
||||
|
||||
the code should ideally be written such that either case yields the same diffraction result.
|
||||
if count_emitters() always returns 1 (default), the second case does not have to be implemented,
|
||||
and the method can ignore the emitter index.
|
||||
|
||||
the method must ignore the model and energy index.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index.
|
||||
if lower or equal to zero, generate the full cluster and mark all emitters.
|
||||
if greater than zero, the value is a 1-based index of the emitter configuration.
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
class LegacyClusterGenerator(ClusterGenerator):
|
||||
"""
|
||||
cluster generator class for projects that don't declare a generator.
|
||||
|
||||
in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
|
||||
this class redirects generator calls to the project methods
|
||||
providing compatibility to older project code.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
super(LegacyClusterGenerator, self).__init__(project)
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method if implemented.
|
||||
"""
|
||||
try:
|
||||
return self.project.count_emitters(model, index)
|
||||
except AttributeError:
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method.
|
||||
"""
|
||||
return self.project.create_cluster(model, index)
|
||||
|
||||
|
||||
# noinspection PyMethodMayBeStatic
|
||||
class Project(object):
|
||||
"""
|
||||
@ -549,39 +457,27 @@ class Project(object):
|
||||
# the initial value is a LegacyClusterGenerator object
|
||||
# which routes cluster calls back to the project for compatibility with older project code.
|
||||
|
||||
## @var pop_size (int)
|
||||
# population size (number of particles) in the particle swarm optimization.
|
||||
## @var optimizer_params (dict)
|
||||
# optional parameters of the model optimizer.
|
||||
#
|
||||
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
|
||||
# you may want to override the default value in cases where the automatic choice is not appropriate, e.g.:
|
||||
# - the calculation of a model takes a long time compared to the available computing time.
|
||||
# - the calculation of a model spawns many sub-tasks due to complex symmetry.
|
||||
# - you want to increase the number of generations compared to the number of particles.
|
||||
# this is a dictionary that can have (among others) the following values.
|
||||
# for a detailed list, see the documentation of the respective model handler.
|
||||
#
|
||||
# the default value is 0.
|
||||
#
|
||||
# the value can be set by the command line.
|
||||
|
||||
## @var history_file (string)
|
||||
# name of a file containing the results from previous optimization runs.
|
||||
# this can be used to resume a swarm optimization where it was interrupted before.
|
||||
#
|
||||
# the history file is a space-delimited, multi-column, text file.
|
||||
# output files of a previous optimization run can be used as is.
|
||||
# there must be one column for each model parameter, and one column of R factors.
|
||||
# the first row must contain the names of the model parameters.
|
||||
# the name of th R factor column must be '_rfac'.
|
||||
# additional columns may be included and are ignored.
|
||||
#
|
||||
# by default, no history is loaded.
|
||||
|
||||
## @var recalc_history (bool)
|
||||
# select whether the R-factors of the historic models are calculated again.
|
||||
#
|
||||
# this is useful if the historic data was calculated for a different cluster, different set of parameters,
|
||||
# or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
||||
# set this argument to False only if the calculation is a continuation of a previous one
|
||||
# without any changes to the code.
|
||||
# @arg @c 'pop_size' (int)
|
||||
# population size (number of particles) in the swarm or genetic optimization mode.
|
||||
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
|
||||
# you may want to override the default value in cases where the automatic choice is not appropriate.
|
||||
# the value can be set by the command line.
|
||||
# @arg @c 'seed_file' (string)
|
||||
# name of a file containing the results from previous optimization runs.
|
||||
# this can be used to resume a swarm or genetic optimization where it was interrupted before.
|
||||
# the seed file is a space-delimited, multi-column, text file,
|
||||
# e.g., the output file of a previous optimization.
|
||||
# by default, no seed is loaded.
|
||||
# @arg @c 'recalc_seed' (bool)
|
||||
# select whether the R-factors of the seed models are calculated again.
|
||||
# set this argument to False only if the calculation is a continuation of a previous one
|
||||
# without any changes to the code.
|
||||
|
||||
## @var data_dir
|
||||
# directory path to experimental data.
|
||||
@ -594,9 +490,17 @@ class Project(object):
|
||||
# if the location of the files may depend on the machine or user account,
|
||||
# the user may want to specify the data path on the command line.
|
||||
|
||||
## @var output_dir (string)
|
||||
# directory path for data files produced during the calculation, including intermediate files.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
|
||||
## @var output_file (string)
|
||||
# file name root for data files produced during the calculation, including intermediate files.
|
||||
#
|
||||
# the file name should include the path. the path must also be set in @ref output_dir.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
|
||||
## @var timedelta_limit (datetime.timedelta)
|
||||
# wall time after which no new calculations should be started.
|
||||
@ -604,11 +508,11 @@ class Project(object):
|
||||
# the actual wall time may be longer by the remaining time of running calculations.
|
||||
# running calculations will not be aborted.
|
||||
|
||||
## @var _combined_scan
|
||||
## @var combined_scan
|
||||
# combined raw data from scans.
|
||||
# updated by add_scan().
|
||||
|
||||
## @var _combined_modf
|
||||
## @var combined_modf
|
||||
# combined modulation function from scans.
|
||||
# updated by add_scan().
|
||||
|
||||
@ -618,30 +522,55 @@ class Project(object):
|
||||
#
|
||||
# files.categories_to_delete determines which files can be deleted.
|
||||
|
||||
## @var keep_best
|
||||
# number of best models for which result files should be kept.
|
||||
#
|
||||
# this attribute determines how many models are kept based on R-factor ranking at each node of the task tree
|
||||
# (up to keep_levels).
|
||||
|
||||
## @var keep_levels
|
||||
# numeric task level down to which R-factors are considered when model files are cleaned up.
|
||||
#
|
||||
# @arg 0 = model level: combined results only.
|
||||
# @arg 1 = scan level: scan nodes in addition to combined results (level 0).
|
||||
# @arg 2 = symmetry level: symmetry nodes in addition to level 1.
|
||||
# @arg 3 = emitter level: emitter nodes in addition to level 1.
|
||||
# @arg 4 = region level: region nodes in addition to level 1.
|
||||
|
||||
def __init__(self):
|
||||
self.mode = "single"
|
||||
self.code = "edac"
|
||||
self.features = {}
|
||||
self.cluster_format = mc.FMT_EDAC
|
||||
self.cluster_generator = LegacyClusterGenerator(self)
|
||||
self.cluster_generator = mc.LegacyClusterGenerator(self)
|
||||
self.scans = []
|
||||
self.symmetries = []
|
||||
self.pop_size = 0
|
||||
self.history_file = ""
|
||||
self.recalc_history = True
|
||||
self.optimizer_params = {
|
||||
'pop_size': 0,
|
||||
'seed_file': "",
|
||||
'seed_limit': 0,
|
||||
'recalc_seed': True,
|
||||
'table_file': ""
|
||||
}
|
||||
self.data_dir = ""
|
||||
self.output_dir = ""
|
||||
self.output_file = "pmsco_data"
|
||||
self.timedelta_limit = datetime.timedelta(days=1)
|
||||
self._combined_scan = None
|
||||
self._combined_modf = None
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
self.files = files.FileTracker()
|
||||
self.handler_classes = {}
|
||||
self.handler_classes['model'] = handlers.SingleModelHandler
|
||||
self.handler_classes['scan'] = handlers.ScanHandler
|
||||
self.handler_classes['symmetry'] = handlers.SymmetryHandler
|
||||
self.handler_classes['emitter'] = handlers.EmitterHandler
|
||||
self.handler_classes['region'] = handlers.SingleRegionHandler
|
||||
self.keep_levels = 1
|
||||
self.keep_best = 10
|
||||
self.handler_classes = {
|
||||
'model': handlers.SingleModelHandler,
|
||||
'scan': handlers.ScanHandler,
|
||||
'sym': handlers.SymmetryHandler,
|
||||
'emit': handlers.EmitterHandler,
|
||||
'region': handlers.SingleRegionHandler
|
||||
}
|
||||
self.calculator_class = None
|
||||
self._tasks_fields = []
|
||||
self._db = database.ResultsDatabase()
|
||||
|
||||
def create_domain(self):
|
||||
"""
|
||||
@ -676,8 +605,8 @@ class Project(object):
|
||||
@return: None
|
||||
"""
|
||||
self.scans = []
|
||||
self._combined_scan = None
|
||||
self._combined_modf = None
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
|
||||
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
|
||||
"""
|
||||
@ -695,7 +624,7 @@ class Project(object):
|
||||
* intensity vs theta and phi (hemisphere or hologram scan)
|
||||
|
||||
the method calculates the modulation function if @c is_modf is @c False.
|
||||
it also updates @c _combined_scan and @c _combined_modf which may be used as R-factor comparison targets.
|
||||
it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.
|
||||
|
||||
@param filename: (string) file name of the experimental data, possibly including a path.
|
||||
|
||||
@ -732,22 +661,26 @@ class Project(object):
|
||||
scan.modulation = None
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if self._combined_scan is not None:
|
||||
dtype = md.common_dtype((self._combined_scan, scan.raw_data))
|
||||
self._combined_scan = np.hstack((self._combined_scan, md.restructure_data(scan.raw_data, dtype)))
|
||||
if self.combined_scan is not None:
|
||||
dt = md.common_dtype((self.combined_scan, scan.raw_data))
|
||||
d1 = md.restructure_data(self.combined_scan, dt)
|
||||
d2 = md.restructure_data(scan.raw_data, dt)
|
||||
self.combined_scan = np.hstack((d1, d2))
|
||||
else:
|
||||
self._combined_scan = scan.raw_data.copy()
|
||||
self.combined_scan = scan.raw_data.copy()
|
||||
else:
|
||||
self._combined_scan = None
|
||||
self.combined_scan = None
|
||||
|
||||
if scan.modulation is not None:
|
||||
if self._combined_modf is not None:
|
||||
dtype = md.common_dtype((self._combined_modf, scan.modulation))
|
||||
self._combined_modf = np.hstack((self._combined_modf, md.restructure_data(scan.modulation, dtype)))
|
||||
if self.combined_modf is not None:
|
||||
dt = md.common_dtype((self.combined_modf, scan.modulation))
|
||||
d1 = md.restructure_data(self.combined_modf, dt)
|
||||
d2 = md.restructure_data(scan.modulation, dt)
|
||||
self.combined_modf = np.hstack((d1, d2))
|
||||
else:
|
||||
self._combined_modf = scan.modulation.copy()
|
||||
self.combined_modf = scan.modulation.copy()
|
||||
else:
|
||||
self._combined_modf = None
|
||||
self.combined_modf = None
|
||||
|
||||
return scan
|
||||
|
||||
@ -783,9 +716,16 @@ class Project(object):
|
||||
|
||||
def set_output(self, filename):
|
||||
"""
|
||||
set base name of output file
|
||||
set path and base name of output file.
|
||||
|
||||
path and name are copied to the output_file attribute.
|
||||
path is copied to the output_dir attribute.
|
||||
|
||||
if the path is missing, the destination is the current working directory.
|
||||
"""
|
||||
self.output_file = filename
|
||||
path, name = os.path.split(filename)
|
||||
self.output_dir = path
|
||||
|
||||
def set_timedelta_limit(self, timedelta):
|
||||
"""
|
||||
@ -797,12 +737,15 @@ class Project(object):
|
||||
|
||||
def combine_symmetries(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results of different symmetry into one result. calculate the modulation function.
|
||||
combine results of different symmetry into one result and calculate the modulation function.
|
||||
|
||||
the symmetry results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
by default, this method adds all symmetries with equal weight.
|
||||
weights can be defined in the model dictionary with keys 'wsym0', 'wsym1', etc.
|
||||
missing weights default to 1.
|
||||
note: to avoid correlated parameters, one symmetry must always have a fixed weight.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
||||
the method must write the results to the files indicated
|
||||
@ -817,19 +760,26 @@ class Project(object):
|
||||
|
||||
@raise IndexError if child_tasks is empty
|
||||
|
||||
@raise KeyError if a filename is missing
|
||||
@raise IOError if a filename is missing
|
||||
|
||||
@note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters.
|
||||
the model parameters are available as the @c model attribute of the calculation tasks.
|
||||
"""
|
||||
|
||||
result_data = None
|
||||
sum_weights = 0.
|
||||
for task in child_tasks:
|
||||
data = md.load_data(task.result_filename)
|
||||
if result_data is not None:
|
||||
result_data['i'] += data['i']
|
||||
else:
|
||||
result_data = data
|
||||
if result_data is None:
|
||||
result_data = data.copy()
|
||||
result_data['i'] = 0.
|
||||
try:
|
||||
weight = task.model['wsym{}'.format(task.id.sym)]
|
||||
except KeyError:
|
||||
weight = 1.
|
||||
result_data['i'] += weight * data['i']
|
||||
sum_weights += weight
|
||||
result_data['i'] /= sum_weights
|
||||
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
|
||||
@ -865,7 +815,7 @@ class Project(object):
|
||||
|
||||
@raise IndexError if child_tasks is empty
|
||||
|
||||
@raise KeyError if a filename is missing
|
||||
@raise IOError if a filename is missing
|
||||
|
||||
@note the weights of the emitters (in derived classes) can be part of the optimizable model parameters.
|
||||
the model parameters are available as the @c model attribute of the calculation tasks.
|
||||
@ -898,7 +848,7 @@ class Project(object):
|
||||
the datasets of the scans are appended.
|
||||
this is done for intensity and modulation data independently.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
||||
@param parent_task: (CalculationTask) parent task of the scan tasks.
|
||||
the method must write the results to the files indicated
|
||||
by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@ -910,14 +860,12 @@ class Project(object):
|
||||
@return: None
|
||||
|
||||
@raise IndexError if child_tasks is empty.
|
||||
|
||||
@raise KeyError if a filename is missing.
|
||||
"""
|
||||
|
||||
# intensity
|
||||
try:
|
||||
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
|
||||
except (KeyError, IOError):
|
||||
except IOError:
|
||||
parent_task.result_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
@ -928,7 +876,7 @@ class Project(object):
|
||||
# modulation
|
||||
try:
|
||||
stack1 = [md.load_data(task.modf_filename) for task in child_tasks]
|
||||
except (KeyError, IOError):
|
||||
except IOError:
|
||||
parent_task.modf_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
@ -936,6 +884,142 @@ class Project(object):
|
||||
result_modf = np.hstack(tuple(stack2))
|
||||
md.save_data(parent_task.modf_filename, result_modf)
|
||||
|
||||
def combine_regions(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results from different regions into one result, for intensity and modulation.
|
||||
|
||||
the scan results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
the datasets of the regions are appended and sorted in the standard order of the data module.
|
||||
if the resulting length differs from the corresponding experimental scan,
|
||||
an error is printed to the logger, but the calculation continues.
|
||||
|
||||
the modulation function is calculated by calling @ref calc_modulation.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the region tasks.
|
||||
the method writes the results to the file names
|
||||
given by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each region.
|
||||
the reads the source data from the files
|
||||
indicated by the @c result_filename attributes.
|
||||
the sequence is sorted by task ID, i.e., essentially, by region index.
|
||||
|
||||
@return: None
|
||||
|
||||
@raise IndexError if child_tasks is empty.
|
||||
"""
|
||||
# intensity
|
||||
try:
|
||||
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
|
||||
except IOError:
|
||||
parent_task.result_valid = False
|
||||
parent_task.result_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
result_data = np.hstack(tuple(stack2))
|
||||
md.sort_data(result_data)
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
|
||||
scan = self.scans[parent_task.id.scan]
|
||||
if result_data.shape[0] != scan.raw_data.shape[0]:
|
||||
logger.error(BMsg("scan length mismatch: combined result: {result}, experimental data: {expected}",
|
||||
result=result_data.shape[0], expected=scan.raw_data.shape[0]))
|
||||
|
||||
# modulation
|
||||
try:
|
||||
data = md.load_data(parent_task.result_filename)
|
||||
modf = self.calc_modulation(data, parent_task.model)
|
||||
except IOError:
|
||||
parent_task.modf_filename = ""
|
||||
else:
|
||||
md.save_data(parent_task.modf_filename, modf)
|
||||
|
||||
def setup(self, handlers):
|
||||
"""
|
||||
prepare for calculations.
|
||||
|
||||
this method is called in the master process before starting the task loop.
|
||||
at this point the task handlers have been created and set up.
|
||||
if the project needs to change settings of task handlers it can do so in this method.
|
||||
|
||||
this instance writes the header of the tasks.dat file
|
||||
that will receive sub-task evaluation results from the evaluate_result() method.
|
||||
|
||||
@param handlers: dictionary listing the initialized task handler instances.
|
||||
the dictionary keys are the attribute names of pmsco.dispatch.CalcID:
|
||||
'model', 'scan', 'sym', 'emit' and 'region'.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
fields = ["rfac"]
|
||||
fields.extend(dispatch.CalcID._fields)
|
||||
fields = ["_" + f for f in fields]
|
||||
dom = self.create_domain()
|
||||
model_fields = dom.start.keys()
|
||||
model_fields.sort(key=lambda name: name.lower())
|
||||
fields.extend(model_fields)
|
||||
self._tasks_fields = fields
|
||||
|
||||
with open(self.output_file + ".tasks.dat", "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(fields))
|
||||
outfile.write("\n")
|
||||
|
||||
# todo : fill in the descriptive fields, change to file-database
|
||||
self._db.connect(":memory:")
|
||||
project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
|
||||
job_id = self._db.register_job(project_id,
|
||||
"job-name",
|
||||
self.mode,
|
||||
socket.gethostname(),
|
||||
"git-hash",
|
||||
datetime.datetime.now(),
|
||||
"description")
|
||||
self._db.register_params(model_fields)
|
||||
self._db.create_models_view()
|
||||
|
||||
def evaluate_result(self, parent_task, child_tasks):
|
||||
"""
|
||||
evaluate the result of a calculation task.
|
||||
|
||||
this method is called from the add_result of the task handlers at each level.
|
||||
it gives the project a hook to check the progress of a model at any level of the task tree.
|
||||
|
||||
the method calculates the r-factor by calling the Project.calc_rfactor method.
|
||||
the result is written to the task.rfac field and to the .tasks.dat file.
|
||||
invalid and region-level results are skipped.
|
||||
|
||||
this method is called in the master process only.
|
||||
|
||||
@param parent_task: (CalculationTask) a calculation task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
|
||||
the sequence must be sorted by task ID.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if parent_task.result_valid and parent_task.id.region == -1:
|
||||
try:
|
||||
parent_task.rfac = self.calc_rfactor(parent_task, child_tasks)
|
||||
except ValueError:
|
||||
parent_task.result_valid = False
|
||||
logger.warning(BMsg("calculation {0} resulted in an undefined R-factor.", parent_task.id))
|
||||
else:
|
||||
values_dict = parent_task.id._asdict()
|
||||
values_dict = {"_" + k: v for k, v in values_dict.items()}
|
||||
values_dict.update(parent_task.model)
|
||||
values_dict['_rfac'] = parent_task.rfac
|
||||
values_list = [values_dict[field] for field in self._tasks_fields]
|
||||
with open(self.output_file + ".tasks.dat", "a") as outfile:
|
||||
outfile.write(" ".join(format(value) for value in values_list) + "\n")
|
||||
|
||||
self._db.insert_result(parent_task.id, values_dict)
|
||||
|
||||
return None
|
||||
|
||||
# noinspection PyUnusedLocal
|
||||
def calc_modulation(self, data, model):
|
||||
"""
|
||||
@ -965,31 +1049,246 @@ class Project(object):
|
||||
|
||||
return md.calc_modfunc_loess(data)
|
||||
|
||||
def calc_rfactor(self, task):
|
||||
def calc_rfactor(self, parent_task, child_tasks):
|
||||
"""
|
||||
calculate the R-factor of a task.
|
||||
calculate the r-factor of a task.
|
||||
|
||||
the method calculates the R-factor over the combined scans.
|
||||
the corresponding experimental data is taken from self._combined_modf.
|
||||
the r-factor is calculated on the experimental and simulated modulation functions.
|
||||
the algorithm differs for the model level and the lower task levels.
|
||||
at the model level, the calculation is delegated to Project.combine_rfactors.
|
||||
at all other levels, the r-factor is calculated by Project.rfactor,
|
||||
where the simulated is loaded from the file specified by parent_task.modf_filename
|
||||
and the experimental data from Project.scan.
|
||||
|
||||
this method is called by the model handler.
|
||||
this method is called by the task handlers.
|
||||
all child tasks belonging to the parent task must be complete.
|
||||
|
||||
by default, the R-factor is calculated by data.rfactor() over the combined scans.
|
||||
override this method in your project to use a different R-factor algorithm.
|
||||
to select or implement a specific R-factor algorithm,
|
||||
the project sub-class should override Project.rfactor.
|
||||
to combine scan r-factors, it should override or patch Project.combine_rfactors.
|
||||
|
||||
@param task: (CalculationTask) a model task.
|
||||
@version in earlier versions,
|
||||
projects had to override this method to implement their algorithm.
|
||||
this has lead to duplication of common code.
|
||||
the r-factor algorithm is now distributed over several methods,
|
||||
and the method signature has changed.
|
||||
new projects should override Project.rfactor and/or Project.combine_rfactors.
|
||||
|
||||
@return (int) calculated R-factor.
|
||||
@param parent_task: (CalculationTask) a calculation task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
|
||||
the sequence must be sorted by task ID.
|
||||
|
||||
@return (float) calculated R-factor.
|
||||
|
||||
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
task_data = md.load_data(task.modf_filename)
|
||||
result_r = md.rfactor(self._combined_modf, task_data)
|
||||
if parent_task.id.scan >= 0:
|
||||
task_data = md.load_data(parent_task.modf_filename)
|
||||
exp_data = self.scans[parent_task.id.scan].modulation
|
||||
result_r = self.rfactor(exp_data, task_data)
|
||||
else:
|
||||
result_r = self.combine_rfactors(parent_task, child_tasks)
|
||||
|
||||
return result_r
|
||||
|
||||
def rfactor(self, exp_data, theo_data):
|
||||
"""
|
||||
calculate the r-factor of simulated diffraction data.
|
||||
|
||||
in this class, the method calls the data.rfactor function to calculate the r-factor.
|
||||
override this method in your project to use a different R-factor algorithm.
|
||||
|
||||
the input arrays must have the same shape,
|
||||
and the coordinate columns must be identical (they are ignored, however).
|
||||
the array elements are compared element-by-element.
|
||||
terms having NaN intensity are ignored.
|
||||
|
||||
if the sigma column is present in experiment and non-zero,
|
||||
the R-factor terms are weighted.
|
||||
|
||||
@param exp_data: (numpy structured array)
|
||||
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
||||
if an @c s field is present and non-zero,
|
||||
the R-factor terms are weighted by 1/sigma**2.
|
||||
|
||||
@param theo_data: (numpy structured array)
|
||||
ETPI or ETPAI array containing the calculated modulation functions.
|
||||
|
||||
@return: (float) scalar R-factor
|
||||
|
||||
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
return md.rfactor(exp_data, theo_data)
|
||||
|
||||
def opt_rfactor(self, exp_data, theo_data):
|
||||
"""
|
||||
calculate the r-factor of simulated diffraction data, adjusting their amplitude.
|
||||
|
||||
this is an alternative r-factor calculation algorithm
|
||||
using the pmsco.data.optimize_rfactor() function.
|
||||
|
||||
to activate this method (replacing the default one), assign it to Project.rfactor
|
||||
in the overriding __init__ or setup method:
|
||||
@code{.py}
|
||||
self.rfactor = self.opt_rfactor
|
||||
@endcode
|
||||
|
||||
@param exp_data: (numpy structured array)
|
||||
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
||||
if an @c s field is present and non-zero,
|
||||
the R-factor terms are weighted by 1/sigma**2.
|
||||
|
||||
@param theo_data: (numpy structured array)
|
||||
ETPI or ETPAI array containing the calculated modulation functions.
|
||||
|
||||
@return: (float) scalar R-factor
|
||||
|
||||
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
return md.optimize_rfactor(exp_data, theo_data)
|
||||
|
||||
def combine_rfactors(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine r-factors of child tasks.
|
||||
|
||||
the r-factors are taken from the rfac attribute of the child_tasks.
|
||||
the result is an average of the child r-rfactors.
|
||||
|
||||
to produce a balanced result, every child dataset must contain a similar amount of information.
|
||||
if this is not the case, the child r-factors must be weighted.
|
||||
weighting is currently not implemented but may be introduced in a future version.
|
||||
|
||||
the method is intended to be used at the model level (children are scans).
|
||||
though it can technically be used at any level where child r-factors are available.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task for which the r-factor is calculated,
|
||||
i.e. a model task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) child tasks of parent_tasks
|
||||
that may be consulted for calculating the r-factor.
|
||||
|
||||
@return: (float) r-factor, NaN if parent task is invalid
|
||||
|
||||
@raise ValueError or IndexError if child_tasks is empty.
|
||||
"""
|
||||
if parent_task.result_valid:
|
||||
rsum = 0.
|
||||
for task in child_tasks:
|
||||
rsum += task.rfac
|
||||
return rsum / len(child_tasks)
|
||||
else:
|
||||
return float('nan')
|
||||
|
||||
def alt_combine_rfactors(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine r-factors of child tasks by explicit calculation on the combined result.
|
||||
|
||||
this is an alternative implementation of combine_rfactors.
|
||||
instead of using the r-factors from child tasks,
|
||||
it re-calculates the r-factor for the combined dataset.
|
||||
this method avoids the issue of weighting
|
||||
but can introduce bias if the amplitudes of the child datasets differ substantially.
|
||||
|
||||
the experimental dataset is loaded from the file specified by the parent task,
|
||||
the corresponding experimental data is taken from self.combined_modf.
|
||||
|
||||
to activate this method, assign it to combine_rfactors.
|
||||
in the overriding __init__ or setup method:
|
||||
@code{.py}
|
||||
self.combine_rfactors = self.alt_combine_rfactors
|
||||
@endcode
|
||||
|
||||
@param parent_task: (CalculationTask) parent task for which the r-factor is calculated,
|
||||
i.e. a model task.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) child tasks of parent_tasks
|
||||
that may be consulted for calculating the r-factor.
|
||||
|
||||
@return: (float) r-factor, NaN if parent task is invalid
|
||||
"""
|
||||
if parent_task.result_valid:
|
||||
task_data = md.load_data(parent_task.modf_filename)
|
||||
exp_data = self.combined_modf
|
||||
return self.rfactor(exp_data, task_data)
|
||||
else:
|
||||
return float('nan')
|
||||
|
||||
def export_cluster(self, index, filename, cluster):
|
||||
"""
|
||||
export the cluster of a calculation task in XYZ format for diagnostics and reporting.
|
||||
|
||||
this method is called with the final cluster just before it is handed over to the calculator.
|
||||
it saves the atom coordinates in XYZ format for future reference (e.g. graphics).
|
||||
|
||||
the method creates two files:
|
||||
@arg a file with extension '.xyz' contains the whole cluster in XYZ format.
|
||||
@arg a file with extension '.emit.xyz' contains only emitter atoms in XYZ format.
|
||||
|
||||
the first part of the file name is formatted with the output name and the complete task identification.
|
||||
the file is registered with the file tracker in the 'cluster' category
|
||||
so that it will be deleted unless the cluster category is selected for keeping.
|
||||
|
||||
derived project class may override or extend this method
|
||||
to carry out further diagnostics or reporting on the cluster.
|
||||
|
||||
@param index: (CalcID) calculation index to which the cluster belongs.
|
||||
region may be -1 if only one cluster is exported for all regions
|
||||
(clusters do not depend on the scan region).
|
||||
emit may be -1 if the cluster is a master from which emitter-related child clusters are derived.
|
||||
|
||||
@param filename: (str) base file name for the output files.
|
||||
the filename should be formatted using pmsco.dispatch.CalculationTask.format_filename().
|
||||
extensions are appended by this method.
|
||||
|
||||
@param cluster: a pmsco.cluster.Cluster() object with all atom positions and emitters.
|
||||
|
||||
@return: dictionary listing the names of the created files with their category.
|
||||
the dictionary key is the file name,
|
||||
the value is the file category (cluster).
|
||||
"""
|
||||
_files = {}
|
||||
xyz_filename = filename + ".xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
xyz_filename = filename + ".emit.xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ, emitters_only=True)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
return _files
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
delete unwanted files at the end of a project.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.cleanup_files()
|
||||
self._db.disconnect()
|
||||
|
||||
def cleanup_files(self, keep=0):
|
||||
"""
|
||||
delete uninteresting files.
|
||||
|
||||
these are all files that
|
||||
belong to one of the self.files.categories_to_delete categories or
|
||||
do not belong to one of the "best" models.
|
||||
|
||||
"best" models are a number (self.keep_best) of models that gave the lowest R-factors
|
||||
at each task level from root to self.keep_levels.
|
||||
for example if `keep_best = 10` and `keep_levels = 1`
|
||||
the 10 best models at the top level, and the 10 best at the scan level are kept.
|
||||
this means that in total up to `n = 10 + 10 * n_scans` models may be kept,
|
||||
where n_scans is the number of scan files in the job.
|
||||
|
||||
@param keep: minimum number of best models to keep.
|
||||
0 (default): use the project parameter self.keep_best.
|
||||
|
||||
@return None
|
||||
"""
|
||||
self.files.delete_files()
|
||||
if 'rfac' in self.files.categories_to_delete:
|
||||
keep = max(keep, self.keep_best)
|
||||
keepers = self._db.query_best_task_models(self.keep_levels, keep)
|
||||
self.files.delete_models(keep=keepers)
|
||||
|
909
pmsco/swarm.py
909
pmsco/swarm.py
@ -1,909 +0,0 @@
|
||||
"""
|
||||
@package pmsco.swarm
|
||||
particle swarm optimization handler.
|
||||
|
||||
the module starts multiple MSC calculations and optimizes the model parameters
|
||||
according to the particle swarm optimization algorithm.
|
||||
|
||||
Particle swarm optimization adapted from
|
||||
D. A. Duncan et al., Surface Science 606, 278 (2012)
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import copy
|
||||
import os
|
||||
import datetime
|
||||
import logging
|
||||
import numpy as np
|
||||
import handlers
|
||||
from helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
CONSTRAIN_MODES = {'re-enter', 'bounce', 'scatter', 'stick', 'expand'}
|
||||
|
||||
|
||||
class Population(object):
|
||||
"""
|
||||
particle swarm population.
|
||||
"""
|
||||
|
||||
## @var size_req
|
||||
# requested number of particles.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_start
|
||||
# (dict) initial model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_min
|
||||
# (dict) low limits of the model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_max
|
||||
# (dict) high limits of the model parameters.
|
||||
# if min == max, the parameter is kept constant.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_max
|
||||
# (dict) high limits of the model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_step
|
||||
# (dict) initial velocity (difference between two steps) of the particle.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var friends
|
||||
# number of other particles that each particle consults for the global best fit.
|
||||
# default = 3.
|
||||
|
||||
## @var momentum
|
||||
# momentum of the particle.
|
||||
# default = 0.689343.
|
||||
|
||||
## @var attract_local
|
||||
# preference for returning to the local best fit
|
||||
# default = 1.92694.
|
||||
|
||||
## @var attract_global
|
||||
# preference for heading towards the global best fit.
|
||||
# default = 1.92694
|
||||
|
||||
## @var generation
|
||||
# generation number. the counter is incremented by advance_population().
|
||||
# initial value = 0.
|
||||
|
||||
## @var model_count
|
||||
# model number.
|
||||
# the counter is incremented by advance_particle() each time a particle position is changed.
|
||||
# initial value = 0.
|
||||
|
||||
## @var pos
|
||||
# (numpy.ndarray) current positions of each particle.
|
||||
#
|
||||
# the column names include the names of the model parameters, taken from domain.start,
|
||||
# and the special names @c '_particle', @c '_model', @c '_rfac'.
|
||||
# the special fields have the following meanings:
|
||||
#
|
||||
# * @c '_particle': index of the particle in the array.
|
||||
# the particle index is used to match a calculation result and its original particle.
|
||||
# it must be preserved during the calculation process.
|
||||
#
|
||||
# * @c '_gen': generation number.
|
||||
# the generation number counts the number of calls to advance_population().
|
||||
# this field is not used internally.
|
||||
# the first population is generation 0.
|
||||
#
|
||||
# * @c '_model': model number.
|
||||
# the model number counts the number of calls to advance_particle().
|
||||
# the field is filled with the current value of model_count whenever the position is changed.
|
||||
# this field is not used internally.
|
||||
# the model handlers use it to derive their model ID.
|
||||
#
|
||||
# * @c '_rfac': calculated R-factor for this position.
|
||||
# this field is meaningful in the best and results arrays only
|
||||
# where it is set by the add_result() method.
|
||||
# in the pos and vel arrays, the field value is arbitrary.
|
||||
#
|
||||
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
|
||||
# this object is a <em>view</em> of the original array item
|
||||
|
||||
## @var vel
|
||||
# (numpy.ndarray) current the velocities of each particle.
|
||||
# the structure is the same as for the pos array.
|
||||
|
||||
## @var best
|
||||
# (numpy.ndarray) best positions found by each particle so far.
|
||||
# the structure is the same as for the pos array.
|
||||
|
||||
## @var results
|
||||
# (numpy.ndarray) all positions and resulting R-factors calculated.
|
||||
# the structure is the same as for the pos array.
|
||||
|
||||
## @var _hold_once
|
||||
# (bool) hold the population once during the next update.
|
||||
# if _hold_once is True, advance_population() will skip the update process once.
|
||||
# this flag is set by setup() because it sets up a valid initial population.
|
||||
# the caller then doesn't have to care whether to skip advance_population() after setup.
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
initialize the population object.
|
||||
|
||||
"""
|
||||
self.size_req = 0
|
||||
self.model_start = {}
|
||||
self.model_min = {}
|
||||
self.model_max = {}
|
||||
self.model_step = {}
|
||||
|
||||
self.friends = 3
|
||||
self.momentum = 0.689343
|
||||
self.attract_local = 1.92694
|
||||
self.attract_global = 1.92694
|
||||
self.position_constrain_mode = 'default'
|
||||
self.velocity_constrain_mode = 'default'
|
||||
|
||||
self.generation = 0
|
||||
self.model_count = 0
|
||||
self._hold_once = False
|
||||
|
||||
self.pos = None
|
||||
self.vel = None
|
||||
self.best = None
|
||||
self.results = None
|
||||
|
||||
def pos_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the pos array.
|
||||
|
||||
the generator can be used to loop over the array.
|
||||
on each iteration, it yields a dictionary of the position at the current index.
|
||||
for example,
|
||||
@code{.py}
|
||||
for pos in pop.pos_gen():
|
||||
print pos['_index'], pos['_rfac']
|
||||
@endcode
|
||||
"""
|
||||
return ({name: pos[name] for name in pos.dtype.names} for pos in self.pos)
|
||||
|
||||
def vel_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the vel array.
|
||||
|
||||
@see pos_gen() for details.
|
||||
"""
|
||||
return ({name: vel[name] for name in vel.dtype.names} for vel in self.vel)
|
||||
|
||||
def best_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the best array.
|
||||
|
||||
@see pos_gen() for details.
|
||||
"""
|
||||
return ({name: best[name] for name in best.dtype.names} for best in self.best)
|
||||
|
||||
def results_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the results array.
|
||||
|
||||
@see pos_gen() for details.
|
||||
"""
|
||||
return ({name: results[name] for name in results.dtype.names} for results in self.results)
|
||||
|
||||
@staticmethod
|
||||
def get_model_dtype(model_params):
|
||||
"""
|
||||
get numpy array data type for model parameters and swarm control variables.
|
||||
|
||||
@param model_params: dictionary of model parameters or list of parameter names.
|
||||
|
||||
@return: dtype for use with numpy array constructors.
|
||||
this is a sorted list of (name, type) tuples.
|
||||
"""
|
||||
dt = []
|
||||
for key in model_params:
|
||||
dt.append((key, 'f4'))
|
||||
dt.append(('_particle', 'i4'))
|
||||
dt.append(('_gen', 'i4'))
|
||||
dt.append(('_model', 'i4'))
|
||||
dt.append(('_rfac', 'f4'))
|
||||
dt.sort(key=lambda t: t[0].lower())
|
||||
return dt
|
||||
|
||||
def setup(self, size, domain, history_file="", recalc_history=True):
|
||||
"""
|
||||
set up the population arrays seeded with previous results and the start model.
|
||||
|
||||
* set the population parameters and allocate the data arrays.
|
||||
* set one particle to the initial guess, and the others to positions from a previous results file.
|
||||
if the file contains less particles than allocated, the remaining particles are initialized randomly.
|
||||
|
||||
seeding from a history file can be used to continue an interrupted optimization process.
|
||||
the method loads the results into the best and position arrays,
|
||||
and updates the other arrays and variables
|
||||
so that the population can be advanced and calculated.
|
||||
|
||||
by default, the calculations of the previous parameters are repeated.
|
||||
this is recommended whenever the code, the experimental input, or the project arguments change
|
||||
because all of them may have an influence on the R-factor.
|
||||
|
||||
re-calculation can be turned off by setting recalc_history to false.
|
||||
this is recommended only if the calculation is a direct continuation of a previous one
|
||||
without any changes to the code or input.
|
||||
in that case, the previous results are marked as generation -1 with a negative model number.
|
||||
upon the first iteration before running the scattering calculations,
|
||||
new parameters will be derived by the swarm algorithm.
|
||||
|
||||
@param size: requested number of particles.
|
||||
|
||||
@param domain: definition of initial and limiting model parameters
|
||||
expected by the cluster and parameters functions.
|
||||
|
||||
@arg domain.start: initial guess.
|
||||
@arg domain.min: minimum values allowed.
|
||||
@arg domain.max: maximum values allowed. if min == max, the parameter is kept constant.
|
||||
@arg domain.step: initial velocity (difference between two steps) for particle swarm.
|
||||
|
||||
@param history_file: name of the results history file.
|
||||
this can be a file created by the @ref save_array or @ref save_results methods.
|
||||
the columns of the plain-text file contain model parameters and
|
||||
the _rfac values of a previous calculation.
|
||||
additional columns are ignored.
|
||||
the first row must contain the column names.
|
||||
if a parameter column is missing,
|
||||
the corresponding parameter is seeded with a random value within the domain.
|
||||
in this case, a warning is added to the log file.
|
||||
|
||||
the number of rows does not need to be equal to the population size.
|
||||
if it is lower, the remaining particles are initialized randomly.
|
||||
if it is higher, only the ones with the lowest R factors are used.
|
||||
results with R >= 1.0 are ignored in any case.
|
||||
|
||||
@param recalc_history: select whether the R-factors of the historic models are calculated again.
|
||||
this is useful if the historic data was calculated for a different cluster, different set of parameters,
|
||||
or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
||||
set this argument to False only if the calculation is a continuation of a previous one
|
||||
without any changes to the code.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.size_req = size
|
||||
self.model_start = domain.start
|
||||
self.model_min = domain.min
|
||||
self.model_max = domain.max
|
||||
self.model_step = domain.step
|
||||
|
||||
# allocate arrays
|
||||
dt = self.get_model_dtype(self.model_start)
|
||||
self.pos = np.zeros(self.size_req, dtype=dt)
|
||||
self.vel = np.zeros(self.size_req, dtype=dt)
|
||||
self.results = np.empty((0), dtype=dt)
|
||||
|
||||
# randomize population
|
||||
self.generation = 0
|
||||
self.randomize()
|
||||
self.pos['_particle'] = np.arange(self.size_req)
|
||||
self.pos['_gen'] = self.generation
|
||||
self.pos['_model'] = np.arange(self.size_req)
|
||||
self.pos['_rfac'] = 2.1
|
||||
self.model_count = self.size_req
|
||||
|
||||
# add previous results
|
||||
if history_file:
|
||||
hist = np.genfromtxt(history_file, names=True)
|
||||
hist = hist[hist['_rfac'] < 1.0]
|
||||
hist.sort(order='_rfac')
|
||||
hist_size = min(hist.shape[0], self.size_req - 1)
|
||||
|
||||
discarded_fields = {'_particle', '_gen', '_model'}
|
||||
source_fields = set(hist.dtype.names) - discarded_fields
|
||||
dest_fields = set(self.pos.dtype.names) - discarded_fields
|
||||
common_fields = source_fields & dest_fields
|
||||
if len(common_fields) < len(dest_fields):
|
||||
logger.warning(BMsg("missing columns in history file {hf} default to random seed value.",
|
||||
hf=history_file))
|
||||
for name in common_fields:
|
||||
self.pos[name][0:hist_size] = hist[name][0:hist_size]
|
||||
|
||||
self.pos['_particle'] = np.arange(self.size_req)
|
||||
logger.info(BMsg("seeding swarm population with {hs} models from history file {hf}.",
|
||||
hs=hist_size, hf=history_file))
|
||||
if recalc_history:
|
||||
self.pos['_gen'] = self.generation
|
||||
self.pos['_model'] = np.arange(self.size_req)
|
||||
self.pos['_rfac'] = 2.1
|
||||
logger.info("historic models will be re-calculated.")
|
||||
else:
|
||||
self.pos['_gen'][0:hist_size] = -1
|
||||
self.pos['_model'][0:hist_size] = -np.arange(hist_size) - 1
|
||||
self.model_count = self.size_req - hist_size
|
||||
self.pos['_model'][hist_size:] = np.arange(self.model_count)
|
||||
logger.info("historic models will not be re-calculated.")
|
||||
|
||||
# seed last particle with start parameters
|
||||
self.seed(self.model_start, index=-1)
|
||||
|
||||
# initialize best array
|
||||
self.best = self.pos.copy()
|
||||
|
||||
self._hold_once = True
|
||||
|
||||
def randomize(self, pos=True, vel=True):
|
||||
"""
|
||||
initializes a random population.
|
||||
|
||||
the position array is filled with random values (uniform distribution) from the parameter domain.
|
||||
velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain.
|
||||
|
||||
the method does not update the particle info fields.
|
||||
|
||||
@param pos: randomize positions. if False, the positions are not changed.
|
||||
@param vel: randomize velocities. if False, the velocities are not changed.
|
||||
"""
|
||||
if pos:
|
||||
for key in self.model_start:
|
||||
self.pos[key] = ((self.model_max[key] - self.model_min[key]) *
|
||||
np.random.random_sample(self.pos.shape) + self.model_min[key])
|
||||
if vel:
|
||||
for key in self.model_start:
|
||||
self.vel[key] = ((self.model_max[key] - self.model_min[key]) *
|
||||
(np.random.random_sample(self.pos.shape) - 0.5) / 4.0)
|
||||
|
||||
def seed(self, params, index=0):
|
||||
"""
|
||||
set the one of the particles to the specified seed values.
|
||||
|
||||
the method does not update the particle info fields.
|
||||
|
||||
@param params: dictionary of model parameters.
|
||||
the keys must match the ones of domain.start.
|
||||
|
||||
@param index: index of the particle that is seeded.
|
||||
the index must be in the allowed range of the self.pos array.
|
||||
0 is the first, -1 the last particle.
|
||||
"""
|
||||
for key in params:
|
||||
self.pos[key][index] = params[key]
|
||||
|
||||
def update_particle_info(self, index, inc_model=True):
|
||||
"""
|
||||
set the internal particle info fields.
|
||||
|
||||
the fields @c _particle, @c _gen, and @c _model are updated with the current values.
|
||||
@c _rfac is set to the default value 2.1.
|
||||
|
||||
this method must be called after each change of particle position.
|
||||
|
||||
@param index: (int) particle index.
|
||||
|
||||
@param inc_model: (bool) if True, increment the model count afterwards.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.pos['_particle'][index] = index
|
||||
self.pos['_gen'][index] = self.generation
|
||||
self.pos['_model'][index] = self.model_count
|
||||
self.pos['_rfac'][index] = 2.1
|
||||
|
||||
if inc_model:
|
||||
self.model_count += 1
|
||||
|
||||
def advance_population(self):
|
||||
"""
|
||||
advance the population by one step.
|
||||
|
||||
this method just calls advance_particle() for each particle of the population.
|
||||
if generation is lower than zero, the method increases the generation number but does not advance the particles.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if not self._hold_once:
|
||||
self.generation += 1
|
||||
for index, __ in enumerate(self.pos):
|
||||
self.advance_particle(index)
|
||||
self._hold_once = False
|
||||
|
||||
def advance_particle(self, index):
|
||||
"""
|
||||
advance a particle by one step.
|
||||
|
||||
@param index: index of the particle in the population.
|
||||
"""
|
||||
|
||||
# note: the following two identifiers are views,
|
||||
# assignment will modify the original array
|
||||
pos = self.pos[index]
|
||||
vel = self.vel[index]
|
||||
# best fit that this individual has seen
|
||||
xl = self.best[index]
|
||||
# best fit that a group of others have seen
|
||||
xg = self.best_friend(index)
|
||||
|
||||
for key in self.model_start:
|
||||
# update velocity
|
||||
dxl = xl[key] - pos[key]
|
||||
dxg = xg[key] - pos[key]
|
||||
pv = np.random.random()
|
||||
pl = np.random.random()
|
||||
pg = np.random.random()
|
||||
vel[key] = (self.momentum * pv * vel[key] +
|
||||
self.attract_local * pl * dxl +
|
||||
self.attract_global * pg * dxg)
|
||||
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
||||
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
||||
self.velocity_constrain_mode)
|
||||
# update position
|
||||
pos[key] += vel[key]
|
||||
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
||||
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
||||
self.position_constrain_mode)
|
||||
|
||||
self.update_particle_info(index)
|
||||
|
||||
@staticmethod
|
||||
def constrain_velocity(_pos, _vel, _min, _max, _mode='default'):
|
||||
"""
|
||||
constrain a velocity to the given bounds.
|
||||
|
||||
@param _pos: current position of the particle.
|
||||
|
||||
@param _vel: new velocity of the particle, i.e. distance to move.
|
||||
|
||||
@param _min: lower position boundary.
|
||||
|
||||
@param _max: upper position boundary.
|
||||
_max must be greater or equal to _min.
|
||||
|
||||
@param _mode: what to do if a boundary constraint is violated.
|
||||
reserved for future use. should be set to 'default'.
|
||||
|
||||
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
|
||||
in the current implementation only the velocity may change.
|
||||
however, in future versions any of these values may change.
|
||||
"""
|
||||
d = abs(_max - _min) / 2.0
|
||||
if d > 0.0:
|
||||
while abs(_vel) >= d:
|
||||
_vel /= 2.0
|
||||
else:
|
||||
_vel = 0.0
|
||||
return _pos, _vel, _min, _max
|
||||
|
||||
@staticmethod
|
||||
def constrain_position(_pos, _vel, _min, _max, _mode='default'):
|
||||
"""
|
||||
constrain a position to the given bounds.
|
||||
|
||||
@param _pos: new position of the particle, possible out of bounds.
|
||||
|
||||
@param _vel: velocity of the particle, i.e. distance from the previous position.
|
||||
_vel must be lower than _max - _min.
|
||||
|
||||
@param _min: lower boundary.
|
||||
|
||||
@param _max: upper boundary.
|
||||
_max must be greater or equal to _min.
|
||||
|
||||
@param _mode: what to do if a boundary constraint is violated:
|
||||
@arg 're-enter': re-enter from the opposite side of the parameter interval.
|
||||
@arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain.
|
||||
@arg 'scatter': place the particle at a random place between its old position and the violated boundary.
|
||||
@arg 'stick': place the particle at the violated boundary.
|
||||
@arg 'expand': move the boundary so that the particle fits.
|
||||
@arg 'random': place the particle at a random position between the lower and upper boundaries.
|
||||
@arg 'default': the default mode is 'bounce'. this may change in future versions.
|
||||
|
||||
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
|
||||
depending on the mode, any of these values may change.
|
||||
the velocity is adjusted to be consistent with the change of position.
|
||||
"""
|
||||
_rng = max(_max - _min, 0.0)
|
||||
_old = _pos - _vel
|
||||
|
||||
# prevent undershoot
|
||||
if _vel > 0.0 and _pos < _min:
|
||||
_pos = _min
|
||||
_vel = _pos - _old
|
||||
if _vel < 0.0 and _pos > _max:
|
||||
_pos = _max
|
||||
_vel = _pos - _old
|
||||
|
||||
assert abs(_vel) <= _rng, \
|
||||
"velocity: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
|
||||
assert (_vel >= 0 and _pos >= _min) or (_vel <= 0 and _pos <= _max), \
|
||||
"undershoot: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
|
||||
|
||||
if _rng > 0.0:
|
||||
while _pos > _max:
|
||||
if _mode == 're-enter':
|
||||
_pos -= _rng
|
||||
elif _mode == 'bounce' or _mode == 'default':
|
||||
_pos = _max - (_pos - _max)
|
||||
_vel = -_vel
|
||||
elif _mode == 'scatter':
|
||||
_pos = _old + (_max - _old) * np.random.random()
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'stick':
|
||||
_pos = _max
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'expand':
|
||||
_max = _pos
|
||||
elif _mode == 'random':
|
||||
_pos = _min + _rng * np.random.random()
|
||||
_vel = _pos - _old
|
||||
else:
|
||||
raise ValueError('invalid constrain mode')
|
||||
|
||||
while _pos < _min:
|
||||
if _mode == 're-enter':
|
||||
_pos += _rng
|
||||
elif _mode == 'bounce' or _mode == 'default':
|
||||
_pos = _min - (_pos - _min)
|
||||
_vel = -_vel
|
||||
elif _mode == 'scatter':
|
||||
_pos = _old + (_min - _old) * np.random.random()
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'stick':
|
||||
_pos = _min
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'expand':
|
||||
_min = _pos
|
||||
elif _mode == 'random':
|
||||
_pos = _min + _rng * np.random.random()
|
||||
_vel = _pos - _old
|
||||
else:
|
||||
raise ValueError('invalid constrain mode')
|
||||
else:
|
||||
_pos = _max
|
||||
_vel = 0.0
|
||||
|
||||
return _pos, _vel, _min, _max
|
||||
|
||||
# noinspection PyUnusedLocal
|
||||
def best_friend(self, index):
|
||||
"""
|
||||
select the best fit out of a random set of particles
|
||||
|
||||
returns the "best friend"
|
||||
"""
|
||||
friends = np.random.choice(self.best, self.friends, replace=False)
|
||||
index = np.argmin(friends['_rfac'])
|
||||
return friends[index]
|
||||
|
||||
def add_result(self, particle, rfac):
|
||||
"""
|
||||
add a calculation particle to the results array, and update the best fit array.
|
||||
|
||||
@param particle: dictionary of model parameters and particle values.
|
||||
the keys must correspond to the columns of the pos array,
|
||||
i.e. the names of the model parameters plus the _rfac, _particle, and _model fields.
|
||||
|
||||
@param rfac: calculated R-factor.
|
||||
the R-factor is written to the '_rfac' field.
|
||||
|
||||
@return better (bool): True if the new R-factor is better than the particle's previous best mark.
|
||||
"""
|
||||
particle['_rfac'] = rfac
|
||||
l = [particle[n] for n in self.results.dtype.names]
|
||||
t = tuple(l)
|
||||
a = np.asarray(t, dtype=self.results.dtype)
|
||||
self.results = np.append(self.results, a)
|
||||
index = particle['_particle']
|
||||
better = particle['_rfac'] < self.best['_rfac'][index]
|
||||
if better:
|
||||
self.best[index] = a
|
||||
|
||||
return better
|
||||
|
||||
def is_converged(self, tol=0.01):
|
||||
"""
|
||||
check whether the population has converged.
|
||||
|
||||
convergence is reached when the R-factors of the N latest results,
|
||||
do not vary more than tol, where N is the size of the population.
|
||||
|
||||
@param tol: max. difference allowed between greatest and lowest value of the R factor in the population.
|
||||
"""
|
||||
nres = self.results.shape[0]
|
||||
npop = self.pos.shape[0]
|
||||
if nres >= npop:
|
||||
rfac1 = np.min(self.results['_rfac'][-npop:])
|
||||
rfac2 = np.max(self.results['_rfac'][-npop:])
|
||||
converg = rfac2 - rfac1 < tol
|
||||
return converg
|
||||
else:
|
||||
return False
|
||||
|
||||
def save_array(self, filename, array):
|
||||
"""
|
||||
save a population array to a text file.
|
||||
|
||||
the columns are space-delimited.
|
||||
the first line contains the column names.
|
||||
|
||||
@param filename: name of destination file, optionally including a path.
|
||||
|
||||
@param array: population array to save.
|
||||
must be one of self.pos, self.vel, self.best, self.results
|
||||
"""
|
||||
header = " ".join(self.results.dtype.names)
|
||||
np.savetxt(filename, array, fmt='%g', header=header)
|
||||
|
||||
def load_array(self, filename, array):
|
||||
"""
|
||||
load a population array from a text file.
|
||||
|
||||
the array to load must be compatible with the current population
|
||||
(same number of rows, same columns).
|
||||
the first row must contain column names.
|
||||
the ordering of columns may be different.
|
||||
the returned array is ordered according to the array argument.
|
||||
|
||||
@param filename: name of source file, optionally including a path.
|
||||
|
||||
@param array: population array to load.
|
||||
must be one of self.pos, self.vel, self.results.
|
||||
|
||||
@return array with loaded data.
|
||||
this may be the same instance as on input.
|
||||
|
||||
@raise AssertionError if the number of rows of the two files differ.
|
||||
"""
|
||||
data = np.genfromtxt(filename, names=True)
|
||||
assert data.shape == array.shape
|
||||
for name in data.dtype.names:
|
||||
array[name] = data[name]
|
||||
return array
|
||||
|
||||
def save_population(self, base_filename):
|
||||
"""
|
||||
save the population array to a set of text files.
|
||||
|
||||
the file name extensions are .pos, .vel, and .best
|
||||
"""
|
||||
self.save_array(base_filename + ".pos", self.pos)
|
||||
self.save_array(base_filename + ".vel", self.vel)
|
||||
self.save_array(base_filename + ".best", self.best)
|
||||
|
||||
def load_population(self, base_filename):
|
||||
"""
|
||||
load the population array from a set of previously saved text files.
|
||||
this can be used to continue an optimization job.
|
||||
|
||||
the file name extensions are .pos, .vel, and .best.
|
||||
the files must have the same format as produced by save_population.
|
||||
the files must have the same number of rows.
|
||||
"""
|
||||
self.pos = self.load_array(base_filename + ".pos", self.pos)
|
||||
self.vel = self.load_array(base_filename + ".vel", self.vel)
|
||||
self.best = self.load_array(base_filename + ".best", self.best)
|
||||
|
||||
def save_results(self, filename):
|
||||
"""
|
||||
saves the complete list of calculations results.
|
||||
"""
|
||||
self.save_array(filename, self.results)
|
||||
|
||||
|
||||
class ParticleSwarmHandler(handlers.ModelHandler):
|
||||
"""
|
||||
model handler which implements the particle swarm optimization algorithm.
|
||||
|
||||
"""
|
||||
|
||||
## @var _pop (Population)
|
||||
# holds the population object.
|
||||
|
||||
## @var _pop_size (int)
|
||||
# number of particles in the swarm.
|
||||
|
||||
## @var _outfile (file)
|
||||
# output file for model parametes and R factor.
|
||||
# the file is open during calculations.
|
||||
# each calculation result adds one line.
|
||||
|
||||
## @var _model_time (timedelta)
|
||||
# estimated CPU time to calculate one model.
|
||||
# this value is the maximum time measured of the completed calculations.
|
||||
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
|
||||
|
||||
## @var _converged (bool)
|
||||
# indicates that the population has converged.
|
||||
# convergence is detected by calling Population.is_converged().
|
||||
# once convergence has been reached, this flag is set, and further convergence tests are skipped.
|
||||
|
||||
## @var _timeout (bool)
|
||||
# indicates when the handler has run out of time,
|
||||
# i.e. time is up before convergence has been reached.
|
||||
# if _timeout is True, create_tasks() will not create further tasks,
|
||||
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
|
||||
|
||||
## @var _invalid_limit (int)
|
||||
# maximum tolerated number of invalid calculations.
|
||||
#
|
||||
# if the number of invalid calculations (self._invalid_count) exceeds this limit,
|
||||
# the optimization is aborted.
|
||||
# the variable is initialized by self.setup() to 10 times the population size.
|
||||
|
||||
def __init__(self):
|
||||
super(ParticleSwarmHandler, self).__init__()
|
||||
self._pop = None
|
||||
self._pop_size = 0
|
||||
self._outfile = None
|
||||
self._model_time = datetime.timedelta()
|
||||
self._converged = False
|
||||
self._timeout = False
|
||||
self._invalid_limit = 10
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the particle swarm and open an output file.
|
||||
|
||||
the population size is set to project.pop_size if it is defined and greater than 4.
|
||||
otherwise, it defaults to <code>max(2 * slots, 4)</code>.
|
||||
|
||||
for good efficiency the population size (number of particles) should be
|
||||
greater or equal to the number of available processing slots,
|
||||
otherwise the next generation is created before all particles have been calculated
|
||||
which may slow down convergence.
|
||||
|
||||
if calculations take a long time compared to the available computation time
|
||||
or spawn a lot of sub-tasks due to complex symmetry,
|
||||
and you prefer to allow for a good number of generations,
|
||||
you should override the population size.
|
||||
|
||||
@param project: project instance.
|
||||
|
||||
@param slots: number of calculation processes available through MPI.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
super(ParticleSwarmHandler, self).setup(project, slots)
|
||||
|
||||
_min_size = 4
|
||||
if project.pop_size:
|
||||
self._pop_size = max(project.pop_size, _min_size)
|
||||
else:
|
||||
self._pop_size = max(self._slots * 2, _min_size)
|
||||
self._pop = Population()
|
||||
self._pop.setup(self._pop_size, self._project.create_domain(), self._project.history_file,
|
||||
self._project.recalc_history)
|
||||
self._invalid_limit = self._pop_size * 10
|
||||
|
||||
self._outfile = open(self._project.output_file + ".dat", "w")
|
||||
self._outfile.write("# ")
|
||||
self._outfile.write(" ".join(self._pop.results.dtype.names))
|
||||
self._outfile.write("\n")
|
||||
|
||||
return None
|
||||
|
||||
def cleanup(self):
|
||||
self._outfile.close()
|
||||
super(ParticleSwarmHandler, self).cleanup()
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
develop the particle population and create a calculation task per particle.
|
||||
|
||||
this method advances the population by one step.
|
||||
it generates one task for each particle if its model number is positive.
|
||||
negative model numbers indicate that the particle is used for seeding
|
||||
and does not need to be calculated in the first generation.
|
||||
|
||||
if the time limit is approaching, no new tasks are created.
|
||||
|
||||
the process loop calls this method every time the length of the task queue drops
|
||||
below the number of calculation processes (slots).
|
||||
this means in particular that a population will not be completely calculated
|
||||
before the next generation starts.
|
||||
for efficiency reasons, we do not wait until a population is complete.
|
||||
this will cause a certain mixing of generations and slow down convergence
|
||||
because the best peer position in the generation may not be known yet.
|
||||
the effect can be reduced by making the population larger than the number of processes.
|
||||
|
||||
@return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
|
||||
"""
|
||||
|
||||
super(ParticleSwarmHandler, self).create_tasks(parent_task)
|
||||
|
||||
# this is the top-level handler, we expect just one parent: root.
|
||||
parent_id = parent_task.id
|
||||
assert parent_id == (-1, -1, -1, -1, -1)
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
|
||||
time_pending = self._model_time * len(self._pending_tasks)
|
||||
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
|
||||
|
||||
out_tasks = []
|
||||
if not self._timeout and not self._converged:
|
||||
self._pop.advance_population()
|
||||
|
||||
for pos in self._pop.pos_gen():
|
||||
time_pending += self._model_time
|
||||
if time_pending > time_avail:
|
||||
self._timeout = True
|
||||
logger.info("time limit reached")
|
||||
break
|
||||
|
||||
if pos['_model'] >= 0:
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.model = pos
|
||||
new_task.change_id(model=pos['_model'])
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
out_tasks.append(new_task)
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
calculate the R factor of the result and add it to the results list of the population.
|
||||
|
||||
* save the current population.
|
||||
* append the result to the result output file.
|
||||
* update the execution time statistics.
|
||||
* remove temporary files if requested.
|
||||
* check whether the population has converged.
|
||||
|
||||
@return parent task (CalculationTask) if the optimization has converged, @c None otherwise.
|
||||
"""
|
||||
super(ParticleSwarmHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
rfac = 1.0
|
||||
if task.result_valid:
|
||||
try:
|
||||
rfac = self._project.calc_rfactor(task)
|
||||
except ValueError:
|
||||
task.result_valid = False
|
||||
self._invalid_count += 1
|
||||
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
||||
|
||||
task.model['_rfac'] = rfac
|
||||
self._pop.add_result(task.model, rfac)
|
||||
self._pop.save_population(self._project.output_file + ".pop")
|
||||
|
||||
if self._outfile:
|
||||
s = (str(task.model[name]) for name in self._pop.results.dtype.names)
|
||||
self._outfile.write(" ".join(s))
|
||||
self._outfile.write("\n")
|
||||
self._outfile.flush()
|
||||
|
||||
self._project.files.update_model_rfac(task.id.model, rfac)
|
||||
self._project.files.set_model_complete(task.id.model, True)
|
||||
|
||||
if task.result_valid:
|
||||
if self._pop.is_converged() and not self._converged:
|
||||
logger.info("population converged")
|
||||
self._converged = True
|
||||
|
||||
if task.time > self._model_time:
|
||||
self._model_time = task.time
|
||||
else:
|
||||
if self._invalid_count >= self._invalid_limit:
|
||||
logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)
|
||||
self._converged = True
|
||||
|
||||
# optimization complete?
|
||||
if (self._timeout or self._converged) and len(self._pending_tasks) == 0:
|
||||
del self._parent_tasks[parent_task.id]
|
||||
else:
|
||||
parent_task = None
|
||||
|
||||
self.cleanup_files(keep=self._pop_size)
|
||||
return parent_task
|
Reference in New Issue
Block a user